feat: interactive cli

using fzf wrapped by pyfzf
2025-02-25 12:18:13 +01:00
parent 7f488c638c
commit 8e0d92d796
6 changed files with 61 additions and 45 deletions
--- a/src/epub2go/convert.py
+++ b/src/epub2go/convert.py
@@ -1,12 +1,18 @@
 import requests
 from bs4 import BeautifulSoup
+from bs4 import ResultSet
 from urllib.parse import urljoin
-from urllib.request import urlopen, urlparse
+from urllib.request import  urlparse
 from tqdm import tqdm
+from pyfzf.pyfzf import FzfPrompt

 import os, sys
 import importlib.resources as pkg_resources
-from pathlib import Path
+
+
+allbooks_url ='https://www.projekt-gutenberg.org/info/texte/allworka.html'
+root_url = '{url.scheme}://{url.netloc}'.format(url = urlparse(allbooks_url))
+
 class GBConvert():
    #TODO fix toc / headings
    
@@ -17,7 +23,7 @@ class GBConvert():
        # NOTE move non-code files to data folder
        self.style_path_drama = pkg_resources.files('epub2go').joinpath("drama.css")
        self.blocklist = open(pkg_resources.files('epub2go').joinpath('blocklist.txt')).read().splitlines()
-        self.root = os.path.dirname(url) if url.endswith('html') else url
+        self.root = os.path.dirname(url)
        self.url = urlparse(self.root)
        self.output = self.url.netloc + self.url.path
        self.standalone = standalone
@@ -81,12 +87,43 @@ class GBConvert():
        
        self.create_epub(f'{self.title} - {self.author}.epub')
        
-    
+def get_all_books() -> list:
+    books = get_all_book_tags()
+    d = []
+    for book in books:
+        book_href = book.get('href')
+        if book_href is not None:
+            book_url = urljoin(allbooks_url, book_href)
+            book_title = book.getText().translate(str.maketrans('','', '\n\t'))
+            d.append({'title': book_title, 'url': book_url})
+    return d
+
+def get_all_book_tags ()-> ResultSet:
+    response = requests.get(allbooks_url)
+    response.raise_for_status()
+    soup = BeautifulSoup(response.content, 'html.parser', from_encoding='utf-8')
+    books = soup.find('dl').find_all('a')
+    return books
    
 def main():
-    g = GBConvert(sys.argv[1], standalone=True)
-    g.run()
-
+    sys.argv.pop(0)
+    # non-interactive mode
+    if len(sys.argv) > 0 :
+        books = sys.argv
+    # interactive mode using fzf
+    else:
+        delimiter = ';'
+        # create lines for fzf
+        # TODO display author
+        books = [f"{item['title']} {delimiter} {item['url']}" for item in get_all_books()]
+        fzf = FzfPrompt()
+        selection = fzf.prompt(choices=books,  fzf_options=r'--exact --with-nth 1 -m -d\;')
+        books = [item.split(';')[1].strip() for item in selection]

+    if len(books)==1:
+        GBConvert(books[0], standalone=True).run()
+    else:
+        for book in tqdm(books):
+                GBConvert(book).run()
 if __name__ == "__main__":
    main()
--- a/src/epub2go/crawl.py
+++ b/src/epub2go/crawl.py
@@ -8,7 +8,7 @@ from convert import GBConvert
 import utils

 def main():
-    books = utils.get_all_book_urls()
+    books = utils.get_all_book_tags()
    # NOTE consider making this a map()
    for book in tqdm(books):
        book_title = book.get_text()
--- a/src/epub2go/utils.py
+++ b/src/epub2go/utils.py
@@ -1,34 +0,0 @@
-from bs4 import BeautifulSoup, ResultSet
-import requests
-
-from urllib.parse import urlparse, urljoin
-import os
-import json
-import unicodedata
-
-allbooks_url ='https://www.projekt-gutenberg.org/info/texte/allworka.html'
-root_url = '{url.scheme}://{url.netloc}'.format(url = urlparse(allbooks_url))
-
-
-def get_all_book_urls ()-> ResultSet:
-    response = requests.get(allbooks_url)
-    response.raise_for_status()
-    soup = BeautifulSoup(response.content, 'html.parser', from_encoding='utf-8')
-    books = soup.find('dl').find_all('a')
-    return books
-
-def main():
-    books = get_all_book_urls()
-    d = []
-    for book in books:
-        book_href = book.get('href')
-        if book_href is not None:
-            book_url = urljoin(allbooks_url, book_href)
-            book_title = book.getText().translate(str.maketrans('','', '\n\t'))
-            d.append({'title': book_title, 'url': book_url})
-
-    json.dump(d, open('dict.json', 'w' ), ensure_ascii=False)
-    print(len(d))
-    
-if __name__ == '__main__':
-    main()