feat: allow getting directory without download

docs: readme
feat: return epub path
2025-03-23 23:55:05 +01:00 · 2025-03-20 22:11:12 +01:00 · 2025-03-16 20:30:42 +01:00
4 changed files with 50 additions and 22 deletions
--- a/README.md
+++ b/README.md
@@ -1,18 +1,39 @@
 # epub2go.py
-web to epub converter for https://projekt-gutenberg.org.
+Web to ePUB Converter for [projekt-gutenberg.org](https://projekt-gutenberg.org)  developed in conjunction with a [web interface](https://github.com/eneller/epub2go-web).
 ## Installation
 Requires:
 - [pandoc](https://pandoc.org/)
 - [wget](https://www.gnu.org/software/wget/)
- [fzf](https://github.com/junegunn/fzf) (only for interactive mode)
+- [fzf](https://github.com/junegunn/fzf) (optional, only for interactive mode)
- python (duh)
+- [python](https://www.python.org/) (duh)
-## Usage
+
-Invoke the script using the url of any page of the book you would like to download:
+Assuming you have a recent version of python installed, run
-``` 
+
-epub2go https://www.projekt-gutenberg.org/ibsen/solness/
+```
 pip install git+https://github.com/eneller/epub2go.py
 ```
 This will provide the `epub2go` command.
 ## Usage
 ```
 Usage: epub2go [OPTIONS] [ARGS]...
  Download ePUBs from https://www.projekt-gutenberg.org/
  Provide either 0 arguments to enter interactive mode or an arbitrary number
  of URLs to download from
 Options:
  -d, --debug      Set the log level to DEBUG
  -s, --silent     Disable the progress bar
  -p, --path TEXT  The path to which files are saved
  --no-clean       Do not parse html files with blocklist
  --help           Show this message and exit.
 ```
 Examples:
 ```bash
 epub2go https://www.projekt-gutenberg.org/ibsen/solness/
 epub2go # will enter interactive mode
 ```
 ## Installation
   Assuming you have a recent version of python installed, run
   ```
   pip install git+https://github.com/eneller/epub2go.py
   ```
   This will provide the 'epub2go' command.
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "epub2go"
-version = "2.0"
+version = "2.2"
 description = "EPUB converter using wget, pandoc and python glue"
 readme = "README.md"
 requires-python = ">=3.12"
@@ -22,4 +22,4 @@ include-package-data = true
 requires = ["setuptools>=64", "setuptools_scm>=8"]
 [tool.setuptools_scm]
-# can be empty if no extra settings are needed, presence enables setuptools-scm
+# can be empty if no extra settings are needed, presence enables setuptools-scm
--- a/src/epub2go/convert.py
+++ b/src/epub2go/convert.py
@@ -31,6 +31,13 @@ class GBConvert():
            self.blocklist = blocklist.read().splitlines()
        self.dir_download = downloaddir
    def getDir(self, url):
        tocpage = os.path.dirname(url) # ToC website url
        parsed_url = urlparse(tocpage)
        # directories created by wget recreating the URL
        dir_output = os.path.join(self.dir_download, parsed_url.netloc + parsed_url.path )
        return dir_output
    def download(self,
        url:str,
        author:str = None,
@@ -39,8 +46,7 @@ class GBConvert():
        cleanpages: bool = True,
    ):
        tocpage = os.path.dirname(url) # ToC website url
-        url = urlparse(tocpage)
+        dir_output = self.getDir()
        dir_output = os.path.join(self.dir_download, url.netloc + url.path )# directories created by wget recreating the URL
        logger.debug('Downloading to %s, expecting files in in %s', self.dir_download, dir_output)
        author = author
        title = title
@@ -96,7 +102,7 @@ class GBConvert():
            f.write(str(soup))
        logger.debug('Removed %d tags from page %s during parsing', count, file_path)
-    def create_epub(self, author, title, chapters, dir_output)-> int:
+    def create_epub(self, author, title, chapters, dir_output):
        #TODO --epub-cover-image
        #TODO toc if it isnt described by <h> tags, e.g. https://www.projekt-gutenberg.org/adlersfe/maskenba/
        filename = f'{title} - {author}.epub'
@@ -109,7 +115,8 @@ class GBConvert():
                    --metadata author="{author}" \
                    --epub-title-page=false \
                    {" ".join(chapters)} '''
-        return subprocess.run(shlex.split(command), cwd=dir_output).returncode
+        subprocess.run(shlex.split(command), cwd=dir_output, check=True)
        return os.path.abspath(os.path.join(dir_output,filename))
    def save_page(self, url):
        logger.debug('Saving page at %s', url)
@@ -121,7 +128,7 @@ class GBConvert():
                    --tries=5 \
                    --quiet \
                    {url}'''
-        return subprocess.run(shlex.split(command), cwd=self.dir_download).returncode
+        subprocess.run(shlex.split(command), cwd=self.dir_download, check=True)
 # get a list of all books for interactive selection or scraping
 def get_all_books() -> List[Book]:
@@ -163,7 +170,7 @@ def get_all_books() -> List[Book]:
@click.argument('args', nargs=-1)
 def main(args, debug, silent, path, no_clean):
    '''
-    Download ePUBs from https://www.projekt-gutenberg.org/
+    Download ePUBs from https://www.projekt-gutenberg.org/ \n
    Provide either 0 arguments to enter interactive mode or an arbitrary number of URLs to download from
    '''
    logging.basicConfig(level=logging.ERROR,format='%(asctime)s - %(levelname)s - %(message)s')
--- a/uv.lock
+++ b/uv.lock
@@ -81,7 +81,7 @@ wheels = [
 [[package]]
 name = "epub2go"
-version = "1.2"
+version = "2.2"
 source = { editable = "." }
 dependencies = [
    { name = "beautifulsoup4" },
Author	SHA1	Message	Date
eneller	660af7fab0	feat: allow getting directory without download	2025-03-23 23:55:05 +01:00
eneller	c49a1be369	docs: readme	2025-03-20 22:11:12 +01:00
eneller	4267700763	feat: return epub path errors from wget and pandoc are thrown up	2025-03-16 20:30:42 +01:00