begin django webserver

This commit is contained in:
eneller
2025-02-25 03:38:27 +01:00
parent daddb58c3c
commit 7f488c638c
4 changed files with 56 additions and 6 deletions

1
.gitignore vendored
View File

@@ -418,3 +418,4 @@ wheels/
*.css
*.js
*.txt
*.json

View File

@@ -2,6 +2,7 @@ import requests
from tqdm import tqdm
import os
from urllib.parse import urljoin
from convert import GBConvert
import utils
@@ -13,7 +14,7 @@ def main():
book_title = book.get_text()
book_url_relative = book.get('href')
if book_url_relative is not None:
book_url = utils.root_url + os.path.dirname(book_url_relative)[5:]
book_url = urljoin(allbooks_url, book_href)
GBConvert(book_url).run()

View File

@@ -1,14 +1,34 @@
from bs4 import BeautifulSoup
from bs4 import BeautifulSoup, ResultSet
import requests
from urllib.parse import urlparse
from urllib.parse import urlparse, urljoin
import os
import json
import unicodedata
allbooks_url ='https://www.projekt-gutenberg.org/info/texte/allworka.html'
root_url = '{url.scheme}://{url.netloc}'.format(url = urlparse(allbooks_url))
def get_all_book_urls():
def get_all_book_urls ()-> ResultSet:
response = requests.get(allbooks_url)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
soup = BeautifulSoup(response.content, 'html.parser', from_encoding='utf-8')
books = soup.find('dl').find_all('a')
return books
return books
def main():
books = get_all_book_urls()
d = []
for book in books:
book_href = book.get('href')
if book_href is not None:
book_url = urljoin(allbooks_url, book_href)
book_title = book.getText().translate(str.maketrans('','', '\n\t'))
d.append({'title': book_title, 'url': book_url})
json.dump(d, open('dict.json', 'w' ), ensure_ascii=False)
print(len(d))
if __name__ == '__main__':
main()

28
src/epub2go/web.py Normal file
View File

@@ -0,0 +1,28 @@
# run using `django-admin runserver --pythonpath=. --settings=web`
from django.urls import path
from django.http import HttpResponse
from django.shortcuts import redirect, render
import requests
import utils
import json
DEBUG = True
ROOT_URLCONF = __name__
SECRET_KEY='1'
TEMPLATES = [
{
'BACKEND': 'django.template.backends.django.DjangoTemplates',
'DIRS': [
'templates/'
],
},
]
def home(request):
title = 'epub2go'
items = json.load(open('dict.json', 'r'))
return render(request, 'index.html', locals())
urlpatterns = [
path('', home, name='homepage'),
]