5 Commits
v2.1 ... v2.2.3

Author SHA1 Message Date
eneller
75974ae119 fix: slugify filenames 2025-04-06 10:29:19 +02:00
eneller
b3cd49326f feat: prettier logging 2025-04-05 01:42:20 +02:00
eneller
401d02e0ca fix: parameter getdir 2025-04-02 11:26:24 +02:00
eneller
660af7fab0 feat: allow getting directory without download 2025-03-23 23:55:05 +01:00
eneller
c49a1be369 docs: readme 2025-03-20 22:11:12 +01:00
4 changed files with 55 additions and 21 deletions

View File

@@ -1,18 +1,39 @@
# epub2go.py # epub2go.py
web to epub converter for https://projekt-gutenberg.org. Web to ePUB Converter for [projekt-gutenberg.org](https://projekt-gutenberg.org) developed in conjunction with a [web interface](https://github.com/eneller/epub2go-web).
## Installation
Requires: Requires:
- [pandoc](https://pandoc.org/) - [pandoc](https://pandoc.org/)
- [wget](https://www.gnu.org/software/wget/) - [wget](https://www.gnu.org/software/wget/)
- [fzf](https://github.com/junegunn/fzf) (only for interactive mode) - [fzf](https://github.com/junegunn/fzf) (optional, only for interactive mode)
- python (duh) - [python](https://www.python.org/) (duh)
## Usage
Invoke the script using the url of any page of the book you would like to download: Assuming you have a recent version of python installed, run
```
epub2go https://www.projekt-gutenberg.org/ibsen/solness/ ```
pip install git+https://github.com/eneller/epub2go.py
```
This will provide the `epub2go` command.
## Usage
```
Usage: epub2go [OPTIONS] [ARGS]...
Download ePUBs from https://www.projekt-gutenberg.org/
Provide either 0 arguments to enter interactive mode or an arbitrary number
of URLs to download from
Options:
-d, --debug Set the log level to DEBUG
-s, --silent Disable the progress bar
-p, --path TEXT The path to which files are saved
--no-clean Do not parse html files with blocklist
--help Show this message and exit.
```
Examples:
```bash
epub2go https://www.projekt-gutenberg.org/ibsen/solness/
epub2go # will enter interactive mode
``` ```
## Installation
Assuming you have a recent version of python installed, run
```
pip install git+https://github.com/eneller/epub2go.py
```
This will provide the 'epub2go' command.

View File

@@ -1,6 +1,6 @@
[project] [project]
name = "epub2go" name = "epub2go"
version = "2.1" version = "2.2.3"
description = "EPUB converter using wget, pandoc and python glue" description = "EPUB converter using wget, pandoc and python glue"
readme = "README.md" readme = "README.md"
requires-python = ">=3.12" requires-python = ">=3.12"

View File

@@ -7,7 +7,7 @@ from tqdm import tqdm
from pyfzf.pyfzf import FzfPrompt from pyfzf.pyfzf import FzfPrompt
import click import click
import os, subprocess, shlex, logging import os, subprocess, shlex, logging, re
import importlib.resources as pkg_resources import importlib.resources as pkg_resources
from dataclasses import dataclass from dataclasses import dataclass
from typing import List from typing import List
@@ -31,6 +31,13 @@ class GBConvert():
self.blocklist = blocklist.read().splitlines() self.blocklist = blocklist.read().splitlines()
self.dir_download = downloaddir self.dir_download = downloaddir
def getDir(self, url):
tocpage = os.path.dirname(url) # ToC website url
parsed_url = urlparse(tocpage)
# directories created by wget recreating the URL
dir_output = os.path.join(self.dir_download, parsed_url.netloc + parsed_url.path )
return dir_output
def download(self, def download(self,
url:str, url:str,
author:str = None, author:str = None,
@@ -39,8 +46,7 @@ class GBConvert():
cleanpages: bool = True, cleanpages: bool = True,
): ):
tocpage = os.path.dirname(url) # ToC website url tocpage = os.path.dirname(url) # ToC website url
url = urlparse(tocpage) dir_output = self.getDir(url)
dir_output = os.path.join(self.dir_download, url.netloc + url.path )# directories created by wget recreating the URL
logger.debug('Downloading to %s, expecting files in in %s', self.dir_download, dir_output) logger.debug('Downloading to %s, expecting files in in %s', self.dir_download, dir_output)
author = author author = author
title = title title = title
@@ -99,8 +105,7 @@ class GBConvert():
def create_epub(self, author, title, chapters, dir_output): def create_epub(self, author, title, chapters, dir_output):
#TODO --epub-cover-image #TODO --epub-cover-image
#TODO toc if it isnt described by <h> tags, e.g. https://www.projekt-gutenberg.org/adlersfe/maskenba/ #TODO toc if it isnt described by <h> tags, e.g. https://www.projekt-gutenberg.org/adlersfe/maskenba/
filename = f'{title} - {author}.epub' filename = slugify(f'{title} - {author}.epub')
logger.debug('Creating epub as "%s"',filename)
command = f'''pandoc -f html -t epub \ command = f'''pandoc -f html -t epub \
-o "{filename}" \ -o "{filename}" \
--reference-location=section \ --reference-location=section \
@@ -109,6 +114,7 @@ class GBConvert():
--metadata author="{author}" \ --metadata author="{author}" \
--epub-title-page=false \ --epub-title-page=false \
{" ".join(chapters)} ''' {" ".join(chapters)} '''
logger.debug('Calling "%s"', command)
subprocess.run(shlex.split(command), cwd=dir_output, check=True) subprocess.run(shlex.split(command), cwd=dir_output, check=True)
return os.path.abspath(os.path.join(dir_output,filename)) return os.path.abspath(os.path.join(dir_output,filename))
@@ -154,6 +160,13 @@ def get_all_books() -> List[Book]:
books.append(book) books.append(book)
return books return books
def slugify(value, replacement='_'):
value = re.sub(r'[<>:"/\\|?*\x00-\x1F]', replacement, value)
# Remove leading/trailing whitespace or dots
value = value.strip().strip(".")
# Optionally truncate to safe length (e.g. 255 chars for most filesystems)
return value[:255] or "untitled"
# run main cli # run main cli
@click.command() @click.command()
#TODO include images flag #TODO include images flag
@@ -164,7 +177,7 @@ def get_all_books() -> List[Book]:
@click.argument('args', nargs=-1) @click.argument('args', nargs=-1)
def main(args, debug, silent, path, no_clean): def main(args, debug, silent, path, no_clean):
''' '''
Download ePUBs from https://www.projekt-gutenberg.org/ Download ePUBs from https://www.projekt-gutenberg.org/ \n
Provide either 0 arguments to enter interactive mode or an arbitrary number of URLs to download from Provide either 0 arguments to enter interactive mode or an arbitrary number of URLs to download from
''' '''
logging.basicConfig(level=logging.ERROR,format='%(asctime)s - %(levelname)s - %(message)s') logging.basicConfig(level=logging.ERROR,format='%(asctime)s - %(levelname)s - %(message)s')

2
uv.lock generated
View File

@@ -81,7 +81,7 @@ wheels = [
[[package]] [[package]]
name = "epub2go" name = "epub2go"
version = "1.2" version = "2.2"
source = { editable = "." } source = { editable = "." }
dependencies = [ dependencies = [
{ name = "beautifulsoup4" }, { name = "beautifulsoup4" },