begin parallel downloads
This commit is contained in:
@@ -8,6 +8,7 @@ from pyfzf.pyfzf import FzfPrompt
|
|||||||
import click
|
import click
|
||||||
|
|
||||||
import os, subprocess, shlex, logging, re
|
import os, subprocess, shlex, logging, re
|
||||||
|
import concurrent.futures
|
||||||
import importlib.resources as pkg_resources
|
import importlib.resources as pkg_resources
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import List
|
from typing import List
|
||||||
@@ -44,6 +45,7 @@ class GBConvert():
|
|||||||
title:str = None,
|
title:str = None,
|
||||||
showprogress: bool = False,
|
showprogress: bool = False,
|
||||||
cleanpages: bool = True,
|
cleanpages: bool = True,
|
||||||
|
max_workers: int = 10,
|
||||||
):
|
):
|
||||||
tocpage = os.path.dirname(url) # ToC website url
|
tocpage = os.path.dirname(url) # ToC website url
|
||||||
dir_output = self.getDir(url)
|
dir_output = self.getDir(url)
|
||||||
@@ -72,13 +74,22 @@ class GBConvert():
|
|||||||
#run
|
#run
|
||||||
#TODO include images flag
|
#TODO include images flag
|
||||||
# download all files in toc (chapters)
|
# download all files in toc (chapters)
|
||||||
chapter_files = []
|
def process_item(item):
|
||||||
for item in (tqdm(chapter_urls) if showprogress else chapter_urls):
|
|
||||||
item_url = self.parse_toc_entry(tocpage, item)
|
item_url = self.parse_toc_entry(tocpage, item)
|
||||||
parsed_url = urlparse(item_url)
|
parsed_url = urlparse(item_url)
|
||||||
filepath = os.path.join(self.dir_download, parsed_url.netloc + parsed_url.path)
|
filepath = os.path.join(self.dir_download, parsed_url.netloc + parsed_url.path)
|
||||||
if cleanpages: self.parse_page(filepath)
|
if cleanpages:
|
||||||
chapter_files.append(os.path.basename(item_url))
|
self.parse_page(filepath)
|
||||||
|
return os.path.basename(item_url)
|
||||||
|
|
||||||
|
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||||
|
futures = []
|
||||||
|
items = tqdm(chapter_urls) if showprogress else chapter_urls
|
||||||
|
for item in items:
|
||||||
|
futures.append(executor.submit(process_item, item))
|
||||||
|
|
||||||
|
# Wait for completion and preserve order
|
||||||
|
chapter_files = [future.result() for future in futures]
|
||||||
|
|
||||||
return self.create_epub(author,title,chapter_files,dir_output)
|
return self.create_epub(author,title,chapter_files,dir_output)
|
||||||
|
|
||||||
@@ -174,8 +185,9 @@ def slugify(value, replacement='_'):
|
|||||||
@click.option('--silent', '-s', is_flag=True, help='Disable the progress bar')
|
@click.option('--silent', '-s', is_flag=True, help='Disable the progress bar')
|
||||||
@click.option('--path','-p',type=str,default='./', help='The path to which files are saved' )
|
@click.option('--path','-p',type=str,default='./', help='The path to which files are saved' )
|
||||||
@click.option('--no-clean',is_flag=True,help='Do not parse html files with blocklist')
|
@click.option('--no-clean',is_flag=True,help='Do not parse html files with blocklist')
|
||||||
|
@click.option('--max-workers', '-w', type=int, default=10, help='Number of concurrent download workers')
|
||||||
@click.argument('args', nargs=-1)
|
@click.argument('args', nargs=-1)
|
||||||
def main(args, debug, silent, path, no_clean):
|
def main(args, debug, silent, path, no_clean, max_workers):
|
||||||
'''
|
'''
|
||||||
Download ePUBs from https://www.projekt-gutenberg.org/ \n
|
Download ePUBs from https://www.projekt-gutenberg.org/ \n
|
||||||
Provide either 0 arguments to enter interactive mode or an arbitrary number of URLs to download from
|
Provide either 0 arguments to enter interactive mode or an arbitrary number of URLs to download from
|
||||||
@@ -198,10 +210,10 @@ def main(args, debug, silent, path, no_clean):
|
|||||||
logger.debug('Attempting to download from %d URL(s)', len(books))
|
logger.debug('Attempting to download from %d URL(s)', len(books))
|
||||||
converter = GBConvert(path)
|
converter = GBConvert(path)
|
||||||
if len(books)==1:
|
if len(books)==1:
|
||||||
converter.download(books[0], showprogress=not silent, cleanpages= not no_clean)
|
converter.download(books[0], showprogress=not silent, cleanpages= not no_clean, max_workers=max_workers)
|
||||||
else:
|
else:
|
||||||
for book in (tqdm(books) if not silent else books):
|
for book in (tqdm(books) if not silent else books):
|
||||||
converter.download(book, cleanpages= not no_clean)
|
converter.download(book, cleanpages= not no_clean, max_workers=max_workers)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|||||||
Reference in New Issue
Block a user