Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
75974ae119 | ||
|
|
b3cd49326f | ||
|
|
401d02e0ca | ||
|
|
660af7fab0 | ||
|
|
c49a1be369 |
41
README.md
41
README.md
@@ -1,18 +1,39 @@
|
||||
# epub2go.py
|
||||
web to epub converter for https://projekt-gutenberg.org.
|
||||
Web to ePUB Converter for [projekt-gutenberg.org](https://projekt-gutenberg.org) developed in conjunction with a [web interface](https://github.com/eneller/epub2go-web).
|
||||
|
||||
## Installation
|
||||
Requires:
|
||||
- [pandoc](https://pandoc.org/)
|
||||
- [wget](https://www.gnu.org/software/wget/)
|
||||
- [fzf](https://github.com/junegunn/fzf) (only for interactive mode)
|
||||
- python (duh)
|
||||
## Usage
|
||||
Invoke the script using the url of any page of the book you would like to download:
|
||||
```
|
||||
epub2go https://www.projekt-gutenberg.org/ibsen/solness/
|
||||
```
|
||||
## Installation
|
||||
- [fzf](https://github.com/junegunn/fzf) (optional, only for interactive mode)
|
||||
- [python](https://www.python.org/) (duh)
|
||||
|
||||
Assuming you have a recent version of python installed, run
|
||||
|
||||
```
|
||||
pip install git+https://github.com/eneller/epub2go.py
|
||||
```
|
||||
This will provide the 'epub2go' command.
|
||||
This will provide the `epub2go` command.
|
||||
|
||||
## Usage
|
||||
```
|
||||
Usage: epub2go [OPTIONS] [ARGS]...
|
||||
|
||||
Download ePUBs from https://www.projekt-gutenberg.org/
|
||||
|
||||
Provide either 0 arguments to enter interactive mode or an arbitrary number
|
||||
of URLs to download from
|
||||
|
||||
Options:
|
||||
-d, --debug Set the log level to DEBUG
|
||||
-s, --silent Disable the progress bar
|
||||
-p, --path TEXT The path to which files are saved
|
||||
--no-clean Do not parse html files with blocklist
|
||||
--help Show this message and exit.
|
||||
```
|
||||
|
||||
Examples:
|
||||
```bash
|
||||
epub2go https://www.projekt-gutenberg.org/ibsen/solness/
|
||||
epub2go # will enter interactive mode
|
||||
```
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "epub2go"
|
||||
version = "2.1"
|
||||
version = "2.2.3"
|
||||
description = "EPUB converter using wget, pandoc and python glue"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.12"
|
||||
|
||||
@@ -7,7 +7,7 @@ from tqdm import tqdm
|
||||
from pyfzf.pyfzf import FzfPrompt
|
||||
import click
|
||||
|
||||
import os, subprocess, shlex, logging
|
||||
import os, subprocess, shlex, logging, re
|
||||
import importlib.resources as pkg_resources
|
||||
from dataclasses import dataclass
|
||||
from typing import List
|
||||
@@ -31,6 +31,13 @@ class GBConvert():
|
||||
self.blocklist = blocklist.read().splitlines()
|
||||
self.dir_download = downloaddir
|
||||
|
||||
def getDir(self, url):
|
||||
tocpage = os.path.dirname(url) # ToC website url
|
||||
parsed_url = urlparse(tocpage)
|
||||
# directories created by wget recreating the URL
|
||||
dir_output = os.path.join(self.dir_download, parsed_url.netloc + parsed_url.path )
|
||||
return dir_output
|
||||
|
||||
def download(self,
|
||||
url:str,
|
||||
author:str = None,
|
||||
@@ -39,8 +46,7 @@ class GBConvert():
|
||||
cleanpages: bool = True,
|
||||
):
|
||||
tocpage = os.path.dirname(url) # ToC website url
|
||||
url = urlparse(tocpage)
|
||||
dir_output = os.path.join(self.dir_download, url.netloc + url.path )# directories created by wget recreating the URL
|
||||
dir_output = self.getDir(url)
|
||||
logger.debug('Downloading to %s, expecting files in in %s', self.dir_download, dir_output)
|
||||
author = author
|
||||
title = title
|
||||
@@ -99,8 +105,7 @@ class GBConvert():
|
||||
def create_epub(self, author, title, chapters, dir_output):
|
||||
#TODO --epub-cover-image
|
||||
#TODO toc if it isnt described by <h> tags, e.g. https://www.projekt-gutenberg.org/adlersfe/maskenba/
|
||||
filename = f'{title} - {author}.epub'
|
||||
logger.debug('Creating epub as "%s"',filename)
|
||||
filename = slugify(f'{title} - {author}.epub')
|
||||
command = f'''pandoc -f html -t epub \
|
||||
-o "{filename}" \
|
||||
--reference-location=section \
|
||||
@@ -109,6 +114,7 @@ class GBConvert():
|
||||
--metadata author="{author}" \
|
||||
--epub-title-page=false \
|
||||
{" ".join(chapters)} '''
|
||||
logger.debug('Calling "%s"', command)
|
||||
subprocess.run(shlex.split(command), cwd=dir_output, check=True)
|
||||
return os.path.abspath(os.path.join(dir_output,filename))
|
||||
|
||||
@@ -154,6 +160,13 @@ def get_all_books() -> List[Book]:
|
||||
books.append(book)
|
||||
return books
|
||||
|
||||
def slugify(value, replacement='_'):
|
||||
value = re.sub(r'[<>:"/\\|?*\x00-\x1F]', replacement, value)
|
||||
# Remove leading/trailing whitespace or dots
|
||||
value = value.strip().strip(".")
|
||||
# Optionally truncate to safe length (e.g. 255 chars for most filesystems)
|
||||
return value[:255] or "untitled"
|
||||
|
||||
# run main cli
|
||||
@click.command()
|
||||
#TODO include images flag
|
||||
@@ -164,7 +177,7 @@ def get_all_books() -> List[Book]:
|
||||
@click.argument('args', nargs=-1)
|
||||
def main(args, debug, silent, path, no_clean):
|
||||
'''
|
||||
Download ePUBs from https://www.projekt-gutenberg.org/
|
||||
Download ePUBs from https://www.projekt-gutenberg.org/ \n
|
||||
Provide either 0 arguments to enter interactive mode or an arbitrary number of URLs to download from
|
||||
'''
|
||||
logging.basicConfig(level=logging.ERROR,format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
|
||||
Reference in New Issue
Block a user