Enable precomputing of calendars to serve it faster when request by a client

This commit is contained in:
Jules 2021-09-10 20:26:41 +02:00
parent bd04a1b286
commit c768d7856f
2 changed files with 154 additions and 87 deletions

View file

@ -10,6 +10,7 @@ import arrow
import requests import requests
from ics import Calendar from ics import Calendar
from tatsu.exceptions import FailedParse from tatsu.exceptions import FailedParse
from tools.tools import horodate, process
def cache(entry: dict, scheduler: sched.scheduler = None) -> None: def cache(entry: dict, scheduler: sched.scheduler = None) -> None:
@ -63,89 +64,53 @@ def cache(entry: dict, scheduler: sched.scheduler = None) -> None:
scheduler.enter(delay=delay, priority=1, action=cache, argument=(entry, scheduler)) scheduler.enter(delay=delay, priority=1, action=cache, argument=(entry, scheduler))
def get_from_cache(entry: dict) -> Calendar: def precompute(config: str, scheduler: sched.scheduler = None) -> None:
"""Retrieve the entry from cache. If the entry is not found, an exception is raised """Precompute a configuration file result to serve it faster when it is requested. This function
should be used with a scheduler to be repeated over time.
:param config: name of the configuration file to precompute the result for
:type config: str
:param entry: representation of the entry to cache. This is the Python representation of the corresponding entry scheduler used to relaunch the precomputing task in the future. If not scheduler is specified,
in the config file the task will not be relaunched
:type entry: dict :type scheduler: sched.scheduler
:return: the corresponding calendar in cache
:rtype: Calendar
:raises FileNotfoundError: if the entry has not been cached before
""" """
try:
cal = process(os.path.basename(config), False)
path = "app/cache/" + os.path.basename(config).rstrip('.json') + ".ics"
open(path, 'w').writelines(cal)
print(arrow.now().format("YYYY-MM-DD HH:mm:ss"), "Precomputed", os.path.basename(config).rstrip('.json'))
url = entry['url'] except Exception as e:
path = "app/cache/" + sha256(url.encode()).hexdigest() + ".ics" with open("error " + arrow.now().format("YYYY-MM-DD HH:mm:ss")+".txt", 'w') as file:
if not os.path.isfile(path): file.write(arrow.now().format("YYYY-MM-DD HH:mm:ss") + "\nCould not precompute : " + str(config))
print("Not cached") file.write(str(e))
raise FileNotFoundError("The calendar is not cached") file.write(str(traceback.format_exc()))
finally:
with open(path, 'r') as file: if scheduler is not None:
data = file.read() delay = get_min_cache(config)
delay *= 60
return Calendar(imports=data) scheduler.enter(delay=delay, priority=1, action=precompute, argument=(config, scheduler))
def load_cal(entry: dict) -> Calendar: def get_min_cache(path: str) -> float:
"""Load the calendar from the cache or from remote according to the entry. If the calendar is supposed to be in """Get the minimum caching time of all the entries in a config file.
cached but could not be found in cache, an error is thrown
:param path: path of the config file to use
:type path: str
:param entry: representation of the entry to cache. This is the Python representation of the corresponding entry :return: float number representing the smallest caching time.
in the config file
:type entry: dict
:return: the calendar corresponding to the entry
:rtype: Calendar
:raises FileNotfoundError: if the entry was supposed to be cached but has not been cached before
""" """
result = float('inf')
if "cache" in entry and entry["cache"]: with open(path, 'r') as config_file:
print("Getting", entry["name"], "from cache") file = json.loads(config_file.read())
return get_from_cache(entry)
else: for entry in file:
print("Getting", entry["name"], "from remote") if 'cache' in entry and entry['cache'] < result:
r = requests.get(entry["url"], allow_redirects=True) result = entry['cache']
if "encoding" in entry:
cal = Calendar(imports=r.content.decode(encoding=entry["encoding"]))
else:
cal = Calendar(imports=r.content.decode())
cal = horodate(cal, 'Downloaded at') return result
return cal
def horodate(cal: Calendar, prefix='') -> Calendar:
"""Add a new line at the end of the description of every event in the calendar with the current time prefixed by
the prefix parameter and a space
The date is added with the following format: YYYY-MM-DD HH:mm:ss
:param cal: calendar to process
:type cal: Calendar
:param prefix: the prefix to add in front of the date
:type prefix: str
:return: the modified calendar
:rtype: Calendar
"""
now = arrow.now().format("YYYY-MM-DD HH:mm:ss")
for event in cal.events:
event.description = event.description + '\n' + prefix + ' ' + now \
if event.description is not None else prefix + ' ' + now
return cal
def start_scheduler(scheduler: sched.scheduler) -> None: def start_scheduler(scheduler: sched.scheduler) -> None:
@ -168,6 +133,9 @@ def start_scheduler(scheduler: sched.scheduler) -> None:
if 'cache' in entry: if 'cache' in entry:
scheduler.enter(delay=0, priority=1, action=cache, argument=(entry, scheduler)) scheduler.enter(delay=0, priority=1, action=cache, argument=(entry, scheduler))
if get_min_cache(file) < float('inf'):
scheduler.enter(delay=get_min_cache(file)*60, priority=1, action=precompute, argument=(file, scheduler))
scheduler.run() scheduler.run()

View file

@ -73,11 +73,14 @@ Only the url and the name field are mandatory.
import json import json
import re import re
import arrow
import os
from hashlib import sha256
from typing import List from typing import List
import requests
from ics import Calendar from ics import Calendar
from pathvalidate import sanitize_filename from pathvalidate import sanitize_filename
from tools.caching import load_cal
def filtering(cal: Calendar, filters: dict, field_name: str) -> Calendar: def filtering(cal: Calendar, filters: dict, field_name: str) -> Calendar:
@ -299,11 +302,12 @@ def merge(cals: List[Calendar]) -> Calendar:
return result return result
def process(path: str) -> Calendar: def process(path: str, from_cache: bool = True) -> Calendar:
"""Open a config file from the specified path, download the calendars, """Open a config file from the specified path, download the calendars,
apply the filters, modify and merge the calendars as specified in the config file apply the filters, modify and merge the calendars as specified in the config file
:param from_cache:
:param path: name of the file to open. The file should be in the config/ folder :param path: name of the file to open. The file should be in the config/ folder
:type path: str :type path: str
@ -311,7 +315,14 @@ def process(path: str) -> Calendar:
:return: the resulting calendar :return: the resulting calendar
:rtype: Calendar :rtype: Calendar
""" """
print("app/cache/" + sanitize_filename(path).rstrip(".json") + ".ics")
if from_cache and os.path.isfile("app/cache/" + sanitize_filename(path).rstrip(".json") + ".ics"):
with open("app/cache/" + sanitize_filename(path).rstrip(".json") + ".ics") as file:
data = file.read()
print("Serving precomputed file")
return data #Calendar(imports=data)
else:
o = "app/config/" + sanitize_filename(path) o = "app/config/" + sanitize_filename(path)
print("Try to open " + o) print("Try to open " + o)
file = open(o, "r") file = open(o, "r")
@ -333,3 +344,91 @@ def process(path: str) -> Calendar:
data.append(cal) data.append(cal)
return merge(data) return merge(data)
def get_from_cache(entry: dict) -> Calendar:
"""Retrieve the entry from cache. If the entry is not found, an exception is raised
:param entry: representation of the entry to cache. This is the Python representation of the corresponding entry
in the config file
:type entry: dict
:return: the corresponding calendar in cache
:rtype: Calendar
:raises FileNotfoundError: if the entry has not been cached before
"""
url = entry['url']
path = "app/cache/" + sha256(url.encode()).hexdigest() + ".ics"
if not os.path.isfile(path):
print("Not cached")
raise FileNotFoundError("The calendar is not cached")
with open(path, 'r') as file:
data = file.read()
return Calendar(imports=data)
def load_cal(entry: dict) -> Calendar:
"""Load the calendar from the cache or from remote according to the entry. If the calendar is supposed to be in
cached but could not be found in cache, an error is thrown
:param entry: representation of the entry to cache. This is the Python representation of the corresponding entry
in the config file
:type entry: dict
:return: the calendar corresponding to the entry
:rtype: Calendar
:raises FileNotfoundError: if the entry was supposed to be cached but has not been cached before
"""
if "cache" in entry and entry["cache"]:
print("Getting", entry["name"], "from cache")
try:
return get_from_cache(entry)
except FileNotFoundError:
return Calendar()
else:
print("Getting", entry["name"], "from remote")
r = requests.get(entry["url"], allow_redirects=True)
if "encoding" in entry:
cal = Calendar(imports=r.content.decode(encoding=entry["encoding"]))
else:
cal = Calendar(imports=r.content.decode())
cal = horodate(cal, 'Downloaded at')
return cal
def horodate(cal: Calendar, prefix='') -> Calendar:
"""Add a new line at the end of the description of every event in the calendar with the current time prefixed by
the prefix parameter and a space
The date is added with the following format: YYYY-MM-DD HH:mm:ss
:param cal: calendar to process
:type cal: Calendar
:param prefix: the prefix to add in front of the date
:type prefix: str
:return: the modified calendar
:rtype: Calendar
"""
now = arrow.now().format("YYYY-MM-DD HH:mm:ss")
for event in cal.events:
event.description = event.description + '\n' + prefix + ' ' + now \
if event.description is not None else prefix + ' ' + now
return cal