feat: better crawling

2025-02-24 21:17:32 +01:00
parent 8c37822a02
commit daddb58c3c
4 changed files with 41 additions and 28 deletions
--- a/src/epub2go/crawl.py
+++ b/src/epub2go/crawl.py
@@ -0,0 +1,21 @@
+import requests
+from tqdm import tqdm
+
+import os
+
+from convert import GBConvert
+import utils
+
+def main():
+    books = utils.get_all_book_urls()
+    # NOTE consider making this a map()
+    for book in tqdm(books):
+        book_title = book.get_text()
+        book_url_relative = book.get('href')
+        if book_url_relative is not None:
+            book_url = utils.root_url + os.path.dirname(book_url_relative)[5:]
+            GBConvert(book_url).run()
+
+
+if __name__ == "__main__":
+    main()