refactor: better typing

dataclass replacing dict
2025-03-03 23:09:19 +01:00
parent 7be0fbc126
commit 9ae25e40ad
2 changed files with 16 additions and 10 deletions
--- a/src/epub2go/convert.py
+++ b/src/epub2go/convert.py
@@ -8,14 +8,19 @@ from pyfzf.pyfzf import FzfPrompt

 import os, sys, subprocess, shlex
 import importlib.resources as pkg_resources
+from dataclasses import dataclass
+from typing import List


 allbooks_url ='https://www.projekt-gutenberg.org/info/texte/allworka.html'
 root_url = '{url.scheme}://{url.netloc}'.format(url = urlparse(allbooks_url))

+@dataclass
+class Book():
+    author: str
+    title: str
+    url: str
 class GBConvert():
-    #TODO fix toc / headings
-    
    def __init__(self,
        url:str,
        standalone = False,
@@ -52,6 +57,8 @@ class GBConvert():


    def create_epub(self,  filename='out.epub'):
+        #TODO --epub-cover-image
+        #TODO toc if it isnt described by <h> tags, e.g. https://www.projekt-gutenberg.org/adlersfe/maskenba/
        command = f'''pandoc -f html -t epub \
                    -o "{filename}" \
                    --reference-location=section \
@@ -59,7 +66,7 @@ class GBConvert():
                    --metadata title="{self.title}" \
                    --metadata author="{self.author}" \
                    --epub-title-page=false \
-                    {" ".join(self.chapters)} '''#TODO --epub-cover-image
+                    {" ".join(self.chapters)} '''
        return subprocess.Popen(shlex.split(command), cwd=self.output)

    def save_page(self, url):
@@ -87,7 +94,7 @@ class GBConvert():
        return self.create_epub(f'{self.title} - {self.author}.epub')

 # get a list of all books for interactive selection or scraping
-def get_all_books() -> list:
+def get_all_books() -> List[Book]:
    response = requests.get(allbooks_url)
    response.raise_for_status()
    soup = BeautifulSoup(response.content, 'html.parser', from_encoding='utf-8')
@@ -112,7 +119,7 @@ def get_all_books() -> list:
                book_href = book_tag.get('href')
                book_url = urljoin(allbooks_url, book_href)
                book_title = ' '.join(book_tag.getText().split())
-                book = {'author': book_author, 'title': book_title, 'url': book_url}
+                book = Book(book_author, book_title, book_url)
                books.append(book)
    return books

@@ -126,7 +133,7 @@ def main():
    else:
        delimiter = ';'
        # create lines for fzf
-        books = [f"{item['author']} - {item['title']} {delimiter} {item['url']}" for item in get_all_books()]
+        books = [f"{ item.author } - { item.title } {delimiter} { item.url }" for item in get_all_books()]
        fzf = FzfPrompt()
        selection = fzf.prompt(choices=books,  fzf_options=r'--exact --with-nth 1 -m -d\;')
        books = [item.split(';')[1].strip() for item in selection]
--- a/src/epub2go/crawl.py
+++ b/src/epub2go/crawl.py
@@ -6,15 +6,14 @@ from bs4 import ResultSet
 import os
 from urllib.parse import urljoin

-from convert import GBConvert, allbooks_url, get_all_books
+from convert import GBConvert, allbooks_url, get_all_books, Book

 def main():
    books = get_all_books()
    # NOTE consider making this a map()
    for book in tqdm(books):
-        book_url = book['url']
-        if book_url is not None:
-            GBConvert(book_url).run()
+        if book.url is not None:
+            GBConvert(book.url).run()


 if __name__ == "__main__":