begin django webserver
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -418,3 +418,4 @@ wheels/
|
||||
*.css
|
||||
*.js
|
||||
*.txt
|
||||
*.json
|
||||
|
||||
@@ -2,6 +2,7 @@ import requests
|
||||
from tqdm import tqdm
|
||||
|
||||
import os
|
||||
from urllib.parse import urljoin
|
||||
|
||||
from convert import GBConvert
|
||||
import utils
|
||||
@@ -13,7 +14,7 @@ def main():
|
||||
book_title = book.get_text()
|
||||
book_url_relative = book.get('href')
|
||||
if book_url_relative is not None:
|
||||
book_url = utils.root_url + os.path.dirname(book_url_relative)[5:]
|
||||
book_url = urljoin(allbooks_url, book_href)
|
||||
GBConvert(book_url).run()
|
||||
|
||||
|
||||
|
||||
@@ -1,14 +1,34 @@
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4 import BeautifulSoup, ResultSet
|
||||
import requests
|
||||
|
||||
from urllib.parse import urlparse
|
||||
from urllib.parse import urlparse, urljoin
|
||||
import os
|
||||
import json
|
||||
import unicodedata
|
||||
|
||||
allbooks_url ='https://www.projekt-gutenberg.org/info/texte/allworka.html'
|
||||
root_url = '{url.scheme}://{url.netloc}'.format(url = urlparse(allbooks_url))
|
||||
|
||||
def get_all_book_urls():
|
||||
|
||||
def get_all_book_urls ()-> ResultSet:
|
||||
response = requests.get(allbooks_url)
|
||||
response.raise_for_status()
|
||||
soup = BeautifulSoup(response.content, 'html.parser')
|
||||
soup = BeautifulSoup(response.content, 'html.parser', from_encoding='utf-8')
|
||||
books = soup.find('dl').find_all('a')
|
||||
return books
|
||||
return books
|
||||
|
||||
def main():
|
||||
books = get_all_book_urls()
|
||||
d = []
|
||||
for book in books:
|
||||
book_href = book.get('href')
|
||||
if book_href is not None:
|
||||
book_url = urljoin(allbooks_url, book_href)
|
||||
book_title = book.getText().translate(str.maketrans('','', '\n\t'))
|
||||
d.append({'title': book_title, 'url': book_url})
|
||||
|
||||
json.dump(d, open('dict.json', 'w' ), ensure_ascii=False)
|
||||
print(len(d))
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
28
src/epub2go/web.py
Normal file
28
src/epub2go/web.py
Normal file
@@ -0,0 +1,28 @@
|
||||
# run using `django-admin runserver --pythonpath=. --settings=web`
|
||||
from django.urls import path
|
||||
from django.http import HttpResponse
|
||||
from django.shortcuts import redirect, render
|
||||
import requests
|
||||
|
||||
import utils
|
||||
import json
|
||||
DEBUG = True
|
||||
ROOT_URLCONF = __name__
|
||||
SECRET_KEY='1'
|
||||
TEMPLATES = [
|
||||
{
|
||||
'BACKEND': 'django.template.backends.django.DjangoTemplates',
|
||||
'DIRS': [
|
||||
'templates/'
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
def home(request):
|
||||
title = 'epub2go'
|
||||
items = json.load(open('dict.json', 'r'))
|
||||
return render(request, 'index.html', locals())
|
||||
|
||||
urlpatterns = [
|
||||
path('', home, name='homepage'),
|
||||
]
|
||||
Reference in New Issue
Block a user