refactor: better typing
dataclass replacing dict
This commit is contained in:
@@ -8,14 +8,19 @@ from pyfzf.pyfzf import FzfPrompt
|
|||||||
|
|
||||||
import os, sys, subprocess, shlex
|
import os, sys, subprocess, shlex
|
||||||
import importlib.resources as pkg_resources
|
import importlib.resources as pkg_resources
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
|
||||||
allbooks_url ='https://www.projekt-gutenberg.org/info/texte/allworka.html'
|
allbooks_url ='https://www.projekt-gutenberg.org/info/texte/allworka.html'
|
||||||
root_url = '{url.scheme}://{url.netloc}'.format(url = urlparse(allbooks_url))
|
root_url = '{url.scheme}://{url.netloc}'.format(url = urlparse(allbooks_url))
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Book():
|
||||||
|
author: str
|
||||||
|
title: str
|
||||||
|
url: str
|
||||||
class GBConvert():
|
class GBConvert():
|
||||||
#TODO fix toc / headings
|
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
url:str,
|
url:str,
|
||||||
standalone = False,
|
standalone = False,
|
||||||
@@ -52,6 +57,8 @@ class GBConvert():
|
|||||||
|
|
||||||
|
|
||||||
def create_epub(self, filename='out.epub'):
|
def create_epub(self, filename='out.epub'):
|
||||||
|
#TODO --epub-cover-image
|
||||||
|
#TODO toc if it isnt described by <h> tags, e.g. https://www.projekt-gutenberg.org/adlersfe/maskenba/
|
||||||
command = f'''pandoc -f html -t epub \
|
command = f'''pandoc -f html -t epub \
|
||||||
-o "{filename}" \
|
-o "{filename}" \
|
||||||
--reference-location=section \
|
--reference-location=section \
|
||||||
@@ -59,7 +66,7 @@ class GBConvert():
|
|||||||
--metadata title="{self.title}" \
|
--metadata title="{self.title}" \
|
||||||
--metadata author="{self.author}" \
|
--metadata author="{self.author}" \
|
||||||
--epub-title-page=false \
|
--epub-title-page=false \
|
||||||
{" ".join(self.chapters)} '''#TODO --epub-cover-image
|
{" ".join(self.chapters)} '''
|
||||||
return subprocess.Popen(shlex.split(command), cwd=self.output)
|
return subprocess.Popen(shlex.split(command), cwd=self.output)
|
||||||
|
|
||||||
def save_page(self, url):
|
def save_page(self, url):
|
||||||
@@ -87,7 +94,7 @@ class GBConvert():
|
|||||||
return self.create_epub(f'{self.title} - {self.author}.epub')
|
return self.create_epub(f'{self.title} - {self.author}.epub')
|
||||||
|
|
||||||
# get a list of all books for interactive selection or scraping
|
# get a list of all books for interactive selection or scraping
|
||||||
def get_all_books() -> list:
|
def get_all_books() -> List[Book]:
|
||||||
response = requests.get(allbooks_url)
|
response = requests.get(allbooks_url)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
soup = BeautifulSoup(response.content, 'html.parser', from_encoding='utf-8')
|
soup = BeautifulSoup(response.content, 'html.parser', from_encoding='utf-8')
|
||||||
@@ -112,7 +119,7 @@ def get_all_books() -> list:
|
|||||||
book_href = book_tag.get('href')
|
book_href = book_tag.get('href')
|
||||||
book_url = urljoin(allbooks_url, book_href)
|
book_url = urljoin(allbooks_url, book_href)
|
||||||
book_title = ' '.join(book_tag.getText().split())
|
book_title = ' '.join(book_tag.getText().split())
|
||||||
book = {'author': book_author, 'title': book_title, 'url': book_url}
|
book = Book(book_author, book_title, book_url)
|
||||||
books.append(book)
|
books.append(book)
|
||||||
return books
|
return books
|
||||||
|
|
||||||
@@ -126,7 +133,7 @@ def main():
|
|||||||
else:
|
else:
|
||||||
delimiter = ';'
|
delimiter = ';'
|
||||||
# create lines for fzf
|
# create lines for fzf
|
||||||
books = [f"{item['author']} - {item['title']} {delimiter} {item['url']}" for item in get_all_books()]
|
books = [f"{ item.author } - { item.title } {delimiter} { item.url }" for item in get_all_books()]
|
||||||
fzf = FzfPrompt()
|
fzf = FzfPrompt()
|
||||||
selection = fzf.prompt(choices=books, fzf_options=r'--exact --with-nth 1 -m -d\;')
|
selection = fzf.prompt(choices=books, fzf_options=r'--exact --with-nth 1 -m -d\;')
|
||||||
books = [item.split(';')[1].strip() for item in selection]
|
books = [item.split(';')[1].strip() for item in selection]
|
||||||
|
|||||||
@@ -6,15 +6,14 @@ from bs4 import ResultSet
|
|||||||
import os
|
import os
|
||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
from convert import GBConvert, allbooks_url, get_all_books
|
from convert import GBConvert, allbooks_url, get_all_books, Book
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
books = get_all_books()
|
books = get_all_books()
|
||||||
# NOTE consider making this a map()
|
# NOTE consider making this a map()
|
||||||
for book in tqdm(books):
|
for book in tqdm(books):
|
||||||
book_url = book['url']
|
if book.url is not None:
|
||||||
if book_url is not None:
|
GBConvert(book.url).run()
|
||||||
GBConvert(book_url).run()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
Reference in New Issue
Block a user