-
Notifications
You must be signed in to change notification settings - Fork 0
/
wikipedia_cached.py
94 lines (73 loc) · 2.47 KB
/
wikipedia_cached.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import ujson as json
import os
import logging
import atexit
import time
import functools
import sys
from wikipedia import page as page_1, PageError
logger = logging.getLogger()
filename = 'output/pages_cache.json'
pages = None
if not os.path.exists('output/'):
os.makedirs('output/')
def save_pages():
try:
logging.info("wiki.save_pages(): Starting... number of pages = {!r}".format(len(pages)))
open(filename, 'w').close() # clear file
with open(filename, 'w') as f:
json.dump(pages.copy(), f, indent=2, sort_keys=True)
logging.info("wiki.save_pages(): Saved pages to {!r}".format(filename))
except (BrokenPipeError, IOError):
logging.exception('wiki.save_pages(): BrokenPipeError')
time.sleep(0.5)
logging.exception("wiki.save_pages(): Failed")
save_pages()
except KeyboardInterrupt:
sys.stdout.flush()
save_pages()
except:
time.sleep(0.5)
logging.exception("wiki.save_pages(): Failed")
save_pages()
def ensure_save_pages(f):
@functools.wraps(f)
def g(*a, **ka):
try:
return f()
finally:
save_pages()
return g
def initialize_main(dict_object):
global pages
pages = dict_object
try:
if os.stat(filename).st_size == 0: os.unlink(filename)
with open(filename, 'r') as f: pages.update(json.load(f))
logger.info("wiki.initialize_main(): Loaded pages from {!r}".format(filename))
except FileNotFoundError as e:
logger.info("wiki.initialize_main(): Cache is not present: {!r}".format(filename))
except json.JSONDecodeError:
logger.exception("JSONDecodeError")
finally:
atexit.register(save_pages)
def initialize_sub(dict_object):
global pages
pages = dict_object
logger.debug("wiki.initialize_sub() called.")
class Page:
def __init__(self, html, links, title):
self._html, self.links, self.title = html, links, title
def html(self):
return self._html
def page(title, auto_suggest=True):
global pages
assert pages is not None, "Not initialized."
assert isinstance(title, str)
title1 = json.dumps([title, auto_suggest])
if pages.get(title1, None) != None:
return Page(*pages[title1])
logging.debug("wiki.page: Cache miss: {!r}".format(title))
page = page_1(title, auto_suggest=auto_suggest)
pages[title1] = (page.html(), page.links, page.title)
return Page(*pages[title1])