begin django webserver
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -418,3 +418,4 @@ wheels/
|
|||||||
*.css
|
*.css
|
||||||
*.js
|
*.js
|
||||||
*.txt
|
*.txt
|
||||||
|
*.json
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ import requests
|
|||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
from convert import GBConvert
|
from convert import GBConvert
|
||||||
import utils
|
import utils
|
||||||
@@ -13,7 +14,7 @@ def main():
|
|||||||
book_title = book.get_text()
|
book_title = book.get_text()
|
||||||
book_url_relative = book.get('href')
|
book_url_relative = book.get('href')
|
||||||
if book_url_relative is not None:
|
if book_url_relative is not None:
|
||||||
book_url = utils.root_url + os.path.dirname(book_url_relative)[5:]
|
book_url = urljoin(allbooks_url, book_href)
|
||||||
GBConvert(book_url).run()
|
GBConvert(book_url).run()
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,14 +1,34 @@
|
|||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup, ResultSet
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse, urljoin
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
import unicodedata
|
||||||
|
|
||||||
allbooks_url ='https://www.projekt-gutenberg.org/info/texte/allworka.html'
|
allbooks_url ='https://www.projekt-gutenberg.org/info/texte/allworka.html'
|
||||||
root_url = '{url.scheme}://{url.netloc}'.format(url = urlparse(allbooks_url))
|
root_url = '{url.scheme}://{url.netloc}'.format(url = urlparse(allbooks_url))
|
||||||
|
|
||||||
def get_all_book_urls():
|
|
||||||
|
def get_all_book_urls ()-> ResultSet:
|
||||||
response = requests.get(allbooks_url)
|
response = requests.get(allbooks_url)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
soup = BeautifulSoup(response.content, 'html.parser')
|
soup = BeautifulSoup(response.content, 'html.parser', from_encoding='utf-8')
|
||||||
books = soup.find('dl').find_all('a')
|
books = soup.find('dl').find_all('a')
|
||||||
return books
|
return books
|
||||||
|
|
||||||
|
def main():
|
||||||
|
books = get_all_book_urls()
|
||||||
|
d = []
|
||||||
|
for book in books:
|
||||||
|
book_href = book.get('href')
|
||||||
|
if book_href is not None:
|
||||||
|
book_url = urljoin(allbooks_url, book_href)
|
||||||
|
book_title = book.getText().translate(str.maketrans('','', '\n\t'))
|
||||||
|
d.append({'title': book_title, 'url': book_url})
|
||||||
|
|
||||||
|
json.dump(d, open('dict.json', 'w' ), ensure_ascii=False)
|
||||||
|
print(len(d))
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
28
src/epub2go/web.py
Normal file
28
src/epub2go/web.py
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
# run using `django-admin runserver --pythonpath=. --settings=web`
|
||||||
|
from django.urls import path
|
||||||
|
from django.http import HttpResponse
|
||||||
|
from django.shortcuts import redirect, render
|
||||||
|
import requests
|
||||||
|
|
||||||
|
import utils
|
||||||
|
import json
|
||||||
|
DEBUG = True
|
||||||
|
ROOT_URLCONF = __name__
|
||||||
|
SECRET_KEY='1'
|
||||||
|
TEMPLATES = [
|
||||||
|
{
|
||||||
|
'BACKEND': 'django.template.backends.django.DjangoTemplates',
|
||||||
|
'DIRS': [
|
||||||
|
'templates/'
|
||||||
|
],
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def home(request):
|
||||||
|
title = 'epub2go'
|
||||||
|
items = json.load(open('dict.json', 'r'))
|
||||||
|
return render(request, 'index.html', locals())
|
||||||
|
|
||||||
|
urlpatterns = [
|
||||||
|
path('', home, name='homepage'),
|
||||||
|
]
|
||||||
Reference in New Issue
Block a user