4 Commits
v2.0 ... v2.2.1

Author SHA1 Message Date
eneller
401d02e0ca fix: parameter getdir 2025-04-02 11:26:24 +02:00
eneller
660af7fab0 feat: allow getting directory without download 2025-03-23 23:55:05 +01:00
eneller
c49a1be369 docs: readme 2025-03-20 22:11:12 +01:00
eneller
4267700763 feat: return epub path
errors from wget and pandoc are thrown up
2025-03-16 20:30:42 +01:00
4 changed files with 50 additions and 22 deletions

View File

@@ -1,18 +1,39 @@
# epub2go.py # epub2go.py
web to epub converter for https://projekt-gutenberg.org. Web to ePUB Converter for [projekt-gutenberg.org](https://projekt-gutenberg.org) developed in conjunction with a [web interface](https://github.com/eneller/epub2go-web).
## Installation
Requires: Requires:
- [pandoc](https://pandoc.org/) - [pandoc](https://pandoc.org/)
- [wget](https://www.gnu.org/software/wget/) - [wget](https://www.gnu.org/software/wget/)
- [fzf](https://github.com/junegunn/fzf) (only for interactive mode) - [fzf](https://github.com/junegunn/fzf) (optional, only for interactive mode)
- python (duh) - [python](https://www.python.org/) (duh)
## Usage
Invoke the script using the url of any page of the book you would like to download:
```
epub2go https://www.projekt-gutenberg.org/ibsen/solness/
```
## Installation
Assuming you have a recent version of python installed, run Assuming you have a recent version of python installed, run
``` ```
pip install git+https://github.com/eneller/epub2go.py pip install git+https://github.com/eneller/epub2go.py
``` ```
This will provide the 'epub2go' command. This will provide the `epub2go` command.
## Usage
```
Usage: epub2go [OPTIONS] [ARGS]...
Download ePUBs from https://www.projekt-gutenberg.org/
Provide either 0 arguments to enter interactive mode or an arbitrary number
of URLs to download from
Options:
-d, --debug Set the log level to DEBUG
-s, --silent Disable the progress bar
-p, --path TEXT The path to which files are saved
--no-clean Do not parse html files with blocklist
--help Show this message and exit.
```
Examples:
```bash
epub2go https://www.projekt-gutenberg.org/ibsen/solness/
epub2go # will enter interactive mode
```

View File

@@ -1,6 +1,6 @@
[project] [project]
name = "epub2go" name = "epub2go"
version = "2.0" version = "2.2.1"
description = "EPUB converter using wget, pandoc and python glue" description = "EPUB converter using wget, pandoc and python glue"
readme = "README.md" readme = "README.md"
requires-python = ">=3.12" requires-python = ">=3.12"

View File

@@ -31,6 +31,13 @@ class GBConvert():
self.blocklist = blocklist.read().splitlines() self.blocklist = blocklist.read().splitlines()
self.dir_download = downloaddir self.dir_download = downloaddir
def getDir(self, url):
tocpage = os.path.dirname(url) # ToC website url
parsed_url = urlparse(tocpage)
# directories created by wget recreating the URL
dir_output = os.path.join(self.dir_download, parsed_url.netloc + parsed_url.path )
return dir_output
def download(self, def download(self,
url:str, url:str,
author:str = None, author:str = None,
@@ -39,8 +46,7 @@ class GBConvert():
cleanpages: bool = True, cleanpages: bool = True,
): ):
tocpage = os.path.dirname(url) # ToC website url tocpage = os.path.dirname(url) # ToC website url
url = urlparse(tocpage) dir_output = self.getDir(url)
dir_output = os.path.join(self.dir_download, url.netloc + url.path )# directories created by wget recreating the URL
logger.debug('Downloading to %s, expecting files in in %s', self.dir_download, dir_output) logger.debug('Downloading to %s, expecting files in in %s', self.dir_download, dir_output)
author = author author = author
title = title title = title
@@ -96,7 +102,7 @@ class GBConvert():
f.write(str(soup)) f.write(str(soup))
logger.debug('Removed %d tags from page %s during parsing', count, file_path) logger.debug('Removed %d tags from page %s during parsing', count, file_path)
def create_epub(self, author, title, chapters, dir_output)-> int: def create_epub(self, author, title, chapters, dir_output):
#TODO --epub-cover-image #TODO --epub-cover-image
#TODO toc if it isnt described by <h> tags, e.g. https://www.projekt-gutenberg.org/adlersfe/maskenba/ #TODO toc if it isnt described by <h> tags, e.g. https://www.projekt-gutenberg.org/adlersfe/maskenba/
filename = f'{title} - {author}.epub' filename = f'{title} - {author}.epub'
@@ -109,7 +115,8 @@ class GBConvert():
--metadata author="{author}" \ --metadata author="{author}" \
--epub-title-page=false \ --epub-title-page=false \
{" ".join(chapters)} ''' {" ".join(chapters)} '''
return subprocess.run(shlex.split(command), cwd=dir_output).returncode subprocess.run(shlex.split(command), cwd=dir_output, check=True)
return os.path.abspath(os.path.join(dir_output,filename))
def save_page(self, url): def save_page(self, url):
logger.debug('Saving page at %s', url) logger.debug('Saving page at %s', url)
@@ -121,7 +128,7 @@ class GBConvert():
--tries=5 \ --tries=5 \
--quiet \ --quiet \
{url}''' {url}'''
return subprocess.run(shlex.split(command), cwd=self.dir_download).returncode subprocess.run(shlex.split(command), cwd=self.dir_download, check=True)
# get a list of all books for interactive selection or scraping # get a list of all books for interactive selection or scraping
def get_all_books() -> List[Book]: def get_all_books() -> List[Book]:
@@ -163,7 +170,7 @@ def get_all_books() -> List[Book]:
@click.argument('args', nargs=-1) @click.argument('args', nargs=-1)
def main(args, debug, silent, path, no_clean): def main(args, debug, silent, path, no_clean):
''' '''
Download ePUBs from https://www.projekt-gutenberg.org/ Download ePUBs from https://www.projekt-gutenberg.org/ \n
Provide either 0 arguments to enter interactive mode or an arbitrary number of URLs to download from Provide either 0 arguments to enter interactive mode or an arbitrary number of URLs to download from
''' '''
logging.basicConfig(level=logging.ERROR,format='%(asctime)s - %(levelname)s - %(message)s') logging.basicConfig(level=logging.ERROR,format='%(asctime)s - %(levelname)s - %(message)s')

2
uv.lock generated
View File

@@ -81,7 +81,7 @@ wheels = [
[[package]] [[package]]
name = "epub2go" name = "epub2go"
version = "1.2" version = "2.2"
source = { editable = "." } source = { editable = "." }
dependencies = [ dependencies = [
{ name = "beautifulsoup4" }, { name = "beautifulsoup4" },