feat: better crawling
This commit is contained in:
21
src/epub2go/crawl.py
Normal file
21
src/epub2go/crawl.py
Normal file
@@ -0,0 +1,21 @@
|
||||
import requests
|
||||
from tqdm import tqdm
|
||||
|
||||
import os
|
||||
|
||||
from convert import GBConvert
|
||||
import utils
|
||||
|
||||
def main():
|
||||
books = utils.get_all_book_urls()
|
||||
# NOTE consider making this a map()
|
||||
for book in tqdm(books):
|
||||
book_title = book.get_text()
|
||||
book_url_relative = book.get('href')
|
||||
if book_url_relative is not None:
|
||||
book_url = utils.root_url + os.path.dirname(book_url_relative)[5:]
|
||||
GBConvert(book_url).run()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user