feat: interactive cli

using fzf wrapped by pyfzf
This commit is contained in:
eneller
2025-02-25 12:18:13 +01:00
parent 7f488c638c
commit 8e0d92d796
6 changed files with 61 additions and 45 deletions

View File

@@ -2,8 +2,9 @@
web to epub converter for https://projekt-gutenberg.org.
Requires:
- [pandoc](https://pandoc.org/)
- wget
- python
- [wget](https://www.gnu.org/software/wget/)
- [fzf](https://github.com/junegunn/fzf) (only for interactive mode)
- python (duh)
## Usage
Invoke the script using the url of any page of the book you would like to download:
```
@@ -12,6 +13,6 @@ epub2go https://www.projekt-gutenberg.org/ibsen/solness/
## Installation
Assuming you have a recent version of python installed, run
```
pip install git+https://github.com/eneller/epub2go.py
pip install git+https://github.com/eneller/epub2go.py@latest
```
This will provide the 'epub2go' command.

View File

@@ -6,6 +6,7 @@ readme = "README.md"
requires-python = ">=3.12"
dependencies = [
"beautifulsoup4==4.12.3",
"pyfzf>=0.3.1", # hasnt been updated for some time
"requests==2.32.3",
"tqdm>=4.67.1",
"urllib3==2.2.2",

View File

@@ -1,12 +1,18 @@
import requests
from bs4 import BeautifulSoup
from bs4 import ResultSet
from urllib.parse import urljoin
from urllib.request import urlopen, urlparse
from urllib.request import urlparse
from tqdm import tqdm
from pyfzf.pyfzf import FzfPrompt
import os, sys
import importlib.resources as pkg_resources
from pathlib import Path
allbooks_url ='https://www.projekt-gutenberg.org/info/texte/allworka.html'
root_url = '{url.scheme}://{url.netloc}'.format(url = urlparse(allbooks_url))
class GBConvert():
#TODO fix toc / headings
@@ -17,7 +23,7 @@ class GBConvert():
# NOTE move non-code files to data folder
self.style_path_drama = pkg_resources.files('epub2go').joinpath("drama.css")
self.blocklist = open(pkg_resources.files('epub2go').joinpath('blocklist.txt')).read().splitlines()
self.root = os.path.dirname(url) if url.endswith('html') else url
self.root = os.path.dirname(url)
self.url = urlparse(self.root)
self.output = self.url.netloc + self.url.path
self.standalone = standalone
@@ -81,12 +87,43 @@ class GBConvert():
self.create_epub(f'{self.title} - {self.author}.epub')
def get_all_books() -> list:
books = get_all_book_tags()
d = []
for book in books:
book_href = book.get('href')
if book_href is not None:
book_url = urljoin(allbooks_url, book_href)
book_title = book.getText().translate(str.maketrans('','', '\n\t'))
d.append({'title': book_title, 'url': book_url})
return d
def get_all_book_tags ()-> ResultSet:
response = requests.get(allbooks_url)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser', from_encoding='utf-8')
books = soup.find('dl').find_all('a')
return books
def main():
g = GBConvert(sys.argv[1], standalone=True)
g.run()
sys.argv.pop(0)
# non-interactive mode
if len(sys.argv) > 0 :
books = sys.argv
# interactive mode using fzf
else:
delimiter = ';'
# create lines for fzf
# TODO display author
books = [f"{item['title']} {delimiter} {item['url']}" for item in get_all_books()]
fzf = FzfPrompt()
selection = fzf.prompt(choices=books, fzf_options=r'--exact --with-nth 1 -m -d\;')
books = [item.split(';')[1].strip() for item in selection]
if len(books)==1:
GBConvert(books[0], standalone=True).run()
else:
for book in tqdm(books):
GBConvert(book).run()
if __name__ == "__main__":
main()

View File

@@ -8,7 +8,7 @@ from convert import GBConvert
import utils
def main():
books = utils.get_all_book_urls()
books = utils.get_all_book_tags()
# NOTE consider making this a map()
for book in tqdm(books):
book_title = book.get_text()

View File

@@ -1,34 +0,0 @@
from bs4 import BeautifulSoup, ResultSet
import requests
from urllib.parse import urlparse, urljoin
import os
import json
import unicodedata
allbooks_url ='https://www.projekt-gutenberg.org/info/texte/allworka.html'
root_url = '{url.scheme}://{url.netloc}'.format(url = urlparse(allbooks_url))
def get_all_book_urls ()-> ResultSet:
response = requests.get(allbooks_url)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser', from_encoding='utf-8')
books = soup.find('dl').find_all('a')
return books
def main():
books = get_all_book_urls()
d = []
for book in books:
book_href = book.get('href')
if book_href is not None:
book_url = urljoin(allbooks_url, book_href)
book_title = book.getText().translate(str.maketrans('','', '\n\t'))
d.append({'title': book_title, 'url': book_url})
json.dump(d, open('dict.json', 'w' ), ensure_ascii=False)
print(len(d))
if __name__ == '__main__':
main()

11
uv.lock generated
View File

@@ -72,6 +72,7 @@ version = "1.0"
source = { editable = "." }
dependencies = [
{ name = "beautifulsoup4" },
{ name = "pyfzf" },
{ name = "requests" },
{ name = "tqdm" },
{ name = "urllib3" },
@@ -80,6 +81,7 @@ dependencies = [
[package.metadata]
requires-dist = [
{ name = "beautifulsoup4", specifier = "==4.12.3" },
{ name = "pyfzf", specifier = ">=0.3.1" },
{ name = "requests", specifier = "==2.32.3" },
{ name = "tqdm", specifier = ">=4.67.1" },
{ name = "urllib3", specifier = "==2.2.2" },
@@ -94,6 +96,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442 },
]
[[package]]
name = "pyfzf"
version = "0.3.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/d4/4c/c0c658a1e1e9f0e01932990d7947579515fe048d0a515f07458ecd992b8f/pyfzf-0.3.1.tar.gz", hash = "sha256:dd902e34cffeca9c3082f96131593dd20b4b3a9bba5b9dde1b0688e424b46bd2", size = 3652 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/99/35/6a6c7b95390ec58904646a04f54e1b56fd57d7a247588b791c6331697797/pyfzf-0.3.1-py3-none-any.whl", hash = "sha256:736f71563461b75f6f85b55345bdc638fa0dc14c32c857c59e8b1ca1cfa3cf4a", size = 4315 },
]
[[package]]
name = "requests"
version = "2.32.3"