refactor: crawl unified from list

2025-03-03 22:56:58 +01:00
parent 8f77a97733
commit 7be0fbc126
1 changed files with 5 additions and 14 deletions
--- a/src/epub2go/crawl.py
+++ b/src/epub2go/crawl.py
@@ -6,23 +6,14 @@ from bs4 import ResultSet
 import os
 from urllib.parse import urljoin

-from convert import GBConvert, allbooks_url
-
-def parse_book_tags ()-> ResultSet:
-    response = requests.get(allbooks_url)
-    response.raise_for_status()
-    soup = BeautifulSoup(response.content, 'html.parser', from_encoding='utf-8')
-    books = soup.find('dl').find_all('a')
-    books = [(book.get_text(), book.get('href')) for book in books]
-    return books
+from convert import GBConvert, allbooks_url, get_all_books

 def main():
-    books = parse_book_tags()
+    books = get_all_books()
    # NOTE consider making this a map()
-    for book in tqdm(parse_book_tags()):
-        (book_title, book_url_relative) = book
-        if book_url_relative is not None:
-            book_url = urljoin(allbooks_url, book_url_relative)
+    for book in tqdm(books):
+        book_url = book['url']
+        if book_url is not None:
            GBConvert(book_url).run()