refactor: better typing

dataclass replacing dict
This commit is contained in:
eneller
2025-03-03 23:09:19 +01:00
parent 7be0fbc126
commit 9ae25e40ad
2 changed files with 16 additions and 10 deletions

View File

@@ -8,14 +8,19 @@ from pyfzf.pyfzf import FzfPrompt
import os, sys, subprocess, shlex import os, sys, subprocess, shlex
import importlib.resources as pkg_resources import importlib.resources as pkg_resources
from dataclasses import dataclass
from typing import List
allbooks_url ='https://www.projekt-gutenberg.org/info/texte/allworka.html' allbooks_url ='https://www.projekt-gutenberg.org/info/texte/allworka.html'
root_url = '{url.scheme}://{url.netloc}'.format(url = urlparse(allbooks_url)) root_url = '{url.scheme}://{url.netloc}'.format(url = urlparse(allbooks_url))
@dataclass
class Book():
author: str
title: str
url: str
class GBConvert(): class GBConvert():
#TODO fix toc / headings
def __init__(self, def __init__(self,
url:str, url:str,
standalone = False, standalone = False,
@@ -52,6 +57,8 @@ class GBConvert():
def create_epub(self, filename='out.epub'): def create_epub(self, filename='out.epub'):
#TODO --epub-cover-image
#TODO toc if it isnt described by <h> tags, e.g. https://www.projekt-gutenberg.org/adlersfe/maskenba/
command = f'''pandoc -f html -t epub \ command = f'''pandoc -f html -t epub \
-o "{filename}" \ -o "{filename}" \
--reference-location=section \ --reference-location=section \
@@ -59,7 +66,7 @@ class GBConvert():
--metadata title="{self.title}" \ --metadata title="{self.title}" \
--metadata author="{self.author}" \ --metadata author="{self.author}" \
--epub-title-page=false \ --epub-title-page=false \
{" ".join(self.chapters)} '''#TODO --epub-cover-image {" ".join(self.chapters)} '''
return subprocess.Popen(shlex.split(command), cwd=self.output) return subprocess.Popen(shlex.split(command), cwd=self.output)
def save_page(self, url): def save_page(self, url):
@@ -87,7 +94,7 @@ class GBConvert():
return self.create_epub(f'{self.title} - {self.author}.epub') return self.create_epub(f'{self.title} - {self.author}.epub')
# get a list of all books for interactive selection or scraping # get a list of all books for interactive selection or scraping
def get_all_books() -> list: def get_all_books() -> List[Book]:
response = requests.get(allbooks_url) response = requests.get(allbooks_url)
response.raise_for_status() response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser', from_encoding='utf-8') soup = BeautifulSoup(response.content, 'html.parser', from_encoding='utf-8')
@@ -112,7 +119,7 @@ def get_all_books() -> list:
book_href = book_tag.get('href') book_href = book_tag.get('href')
book_url = urljoin(allbooks_url, book_href) book_url = urljoin(allbooks_url, book_href)
book_title = ' '.join(book_tag.getText().split()) book_title = ' '.join(book_tag.getText().split())
book = {'author': book_author, 'title': book_title, 'url': book_url} book = Book(book_author, book_title, book_url)
books.append(book) books.append(book)
return books return books
@@ -126,7 +133,7 @@ def main():
else: else:
delimiter = ';' delimiter = ';'
# create lines for fzf # create lines for fzf
books = [f"{item['author']} - {item['title']} {delimiter} {item['url']}" for item in get_all_books()] books = [f"{ item.author } - { item.title } {delimiter} { item.url }" for item in get_all_books()]
fzf = FzfPrompt() fzf = FzfPrompt()
selection = fzf.prompt(choices=books, fzf_options=r'--exact --with-nth 1 -m -d\;') selection = fzf.prompt(choices=books, fzf_options=r'--exact --with-nth 1 -m -d\;')
books = [item.split(';')[1].strip() for item in selection] books = [item.split(';')[1].strip() for item in selection]

View File

@@ -6,15 +6,14 @@ from bs4 import ResultSet
import os import os
from urllib.parse import urljoin from urllib.parse import urljoin
from convert import GBConvert, allbooks_url, get_all_books from convert import GBConvert, allbooks_url, get_all_books, Book
def main(): def main():
books = get_all_books() books = get_all_books()
# NOTE consider making this a map() # NOTE consider making this a map()
for book in tqdm(books): for book in tqdm(books):
book_url = book['url'] if book.url is not None:
if book_url is not None: GBConvert(book.url).run()
GBConvert(book_url).run()
if __name__ == "__main__": if __name__ == "__main__":