feat: interactive cli
using fzf wrapped by pyfzf
This commit is contained in:
@@ -1,12 +1,18 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4 import ResultSet
|
||||
from urllib.parse import urljoin
|
||||
from urllib.request import urlopen, urlparse
|
||||
from urllib.request import urlparse
|
||||
from tqdm import tqdm
|
||||
from pyfzf.pyfzf import FzfPrompt
|
||||
|
||||
import os, sys
|
||||
import importlib.resources as pkg_resources
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
allbooks_url ='https://www.projekt-gutenberg.org/info/texte/allworka.html'
|
||||
root_url = '{url.scheme}://{url.netloc}'.format(url = urlparse(allbooks_url))
|
||||
|
||||
class GBConvert():
|
||||
#TODO fix toc / headings
|
||||
|
||||
@@ -17,7 +23,7 @@ class GBConvert():
|
||||
# NOTE move non-code files to data folder
|
||||
self.style_path_drama = pkg_resources.files('epub2go').joinpath("drama.css")
|
||||
self.blocklist = open(pkg_resources.files('epub2go').joinpath('blocklist.txt')).read().splitlines()
|
||||
self.root = os.path.dirname(url) if url.endswith('html') else url
|
||||
self.root = os.path.dirname(url)
|
||||
self.url = urlparse(self.root)
|
||||
self.output = self.url.netloc + self.url.path
|
||||
self.standalone = standalone
|
||||
@@ -81,12 +87,43 @@ class GBConvert():
|
||||
|
||||
self.create_epub(f'{self.title} - {self.author}.epub')
|
||||
|
||||
|
||||
def get_all_books() -> list:
|
||||
books = get_all_book_tags()
|
||||
d = []
|
||||
for book in books:
|
||||
book_href = book.get('href')
|
||||
if book_href is not None:
|
||||
book_url = urljoin(allbooks_url, book_href)
|
||||
book_title = book.getText().translate(str.maketrans('','', '\n\t'))
|
||||
d.append({'title': book_title, 'url': book_url})
|
||||
return d
|
||||
|
||||
def get_all_book_tags ()-> ResultSet:
|
||||
response = requests.get(allbooks_url)
|
||||
response.raise_for_status()
|
||||
soup = BeautifulSoup(response.content, 'html.parser', from_encoding='utf-8')
|
||||
books = soup.find('dl').find_all('a')
|
||||
return books
|
||||
|
||||
def main():
|
||||
g = GBConvert(sys.argv[1], standalone=True)
|
||||
g.run()
|
||||
|
||||
sys.argv.pop(0)
|
||||
# non-interactive mode
|
||||
if len(sys.argv) > 0 :
|
||||
books = sys.argv
|
||||
# interactive mode using fzf
|
||||
else:
|
||||
delimiter = ';'
|
||||
# create lines for fzf
|
||||
# TODO display author
|
||||
books = [f"{item['title']} {delimiter} {item['url']}" for item in get_all_books()]
|
||||
fzf = FzfPrompt()
|
||||
selection = fzf.prompt(choices=books, fzf_options=r'--exact --with-nth 1 -m -d\;')
|
||||
books = [item.split(';')[1].strip() for item in selection]
|
||||
|
||||
if len(books)==1:
|
||||
GBConvert(books[0], standalone=True).run()
|
||||
else:
|
||||
for book in tqdm(books):
|
||||
GBConvert(book).run()
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
@@ -8,7 +8,7 @@ from convert import GBConvert
|
||||
import utils
|
||||
|
||||
def main():
|
||||
books = utils.get_all_book_urls()
|
||||
books = utils.get_all_book_tags()
|
||||
# NOTE consider making this a map()
|
||||
for book in tqdm(books):
|
||||
book_title = book.get_text()
|
||||
|
||||
@@ -1,34 +0,0 @@
|
||||
from bs4 import BeautifulSoup, ResultSet
|
||||
import requests
|
||||
|
||||
from urllib.parse import urlparse, urljoin
|
||||
import os
|
||||
import json
|
||||
import unicodedata
|
||||
|
||||
allbooks_url ='https://www.projekt-gutenberg.org/info/texte/allworka.html'
|
||||
root_url = '{url.scheme}://{url.netloc}'.format(url = urlparse(allbooks_url))
|
||||
|
||||
|
||||
def get_all_book_urls ()-> ResultSet:
|
||||
response = requests.get(allbooks_url)
|
||||
response.raise_for_status()
|
||||
soup = BeautifulSoup(response.content, 'html.parser', from_encoding='utf-8')
|
||||
books = soup.find('dl').find_all('a')
|
||||
return books
|
||||
|
||||
def main():
|
||||
books = get_all_book_urls()
|
||||
d = []
|
||||
for book in books:
|
||||
book_href = book.get('href')
|
||||
if book_href is not None:
|
||||
book_url = urljoin(allbooks_url, book_href)
|
||||
book_title = book.getText().translate(str.maketrans('','', '\n\t'))
|
||||
d.append({'title': book_title, 'url': book_url})
|
||||
|
||||
json.dump(d, open('dict.json', 'w' ), ensure_ascii=False)
|
||||
print(len(d))
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user