feat: better crawling

This commit is contained in:
eneller
2025-02-24 21:17:32 +01:00
parent 8c37822a02
commit daddb58c3c
4 changed files with 41 additions and 28 deletions

21
src/epub2go/crawl.py Normal file
View File

@@ -0,0 +1,21 @@
import requests
from tqdm import tqdm
import os
from convert import GBConvert
import utils
def main():
books = utils.get_all_book_urls()
# NOTE consider making this a map()
for book in tqdm(books):
book_title = book.get_text()
book_url_relative = book.get('href')
if book_url_relative is not None:
book_url = utils.root_url + os.path.dirname(book_url_relative)[5:]
GBConvert(book_url).run()
if __name__ == "__main__":
main()