From 9c61cc62343e9d848645c1948648e19b8b1468b8 Mon Sep 17 00:00:00 2001 From: Jules Dejaeghere Date: Sat, 24 Oct 2020 23:42:12 +0200 Subject: [PATCH] Implement caching of calendar files TODO: - Documentation - Adjust README --- .gitignore | 2 +- app/server.py | 17 +++++++-- app/tools/caching.py | 91 ++++++++++++++++++++++++++++++++++++++++++++ app/tools/tools.py | 16 +++----- requirements.txt | 3 +- 5 files changed, 113 insertions(+), 16 deletions(-) create mode 100644 app/tools/caching.py diff --git a/.gitignore b/.gitignore index 93ba534..a054264 100644 --- a/.gitignore +++ b/.gitignore @@ -131,4 +131,4 @@ dmypy.json # Pyre type checker .pyre/ app/config/calendar.json -app/config/calendar.json +/app/cache/ diff --git a/app/server.py b/app/server.py index e733e2b..3677685 100644 --- a/app/server.py +++ b/app/server.py @@ -1,4 +1,5 @@ from tools.tools import * +from tools.caching import * from flask import Flask, make_response app = Flask(__name__) @@ -10,10 +11,18 @@ def main(calendar): print("Opening " + conf) - result = str(process(conf)) - response = make_response(result, 200) - response.headers["Content-Disposition"] = "attachment; filename=calendar.ics" + try: + result = str(process(conf)) + response = make_response(result, 200) + response.headers["Content-Disposition"] = "attachment; filename=calendar.ics" + except FileNotFoundError: + response = make_response("Calendar not cached", 425) + return response -app.run(host='0.0.0.0', port=8088) +# TODO find better way to launch periodic caching +# Maybe try with https://docs.python.org/3/library/sched.html +thread = CacheThread() +thread.start() +app.run(host='0.0.0.0', port=8088) \ No newline at end of file diff --git a/app/tools/caching.py b/app/tools/caching.py new file mode 100644 index 0000000..8a12948 --- /dev/null +++ b/app/tools/caching.py @@ -0,0 +1,91 @@ +import json +import os +import threading +import time +from hashlib import sha256 + +import arrow +import requests +from ics import Calendar + + +def cache(entry: dict) -> None: + if not os.path.isdir('cache'): + os.mkdir('cache') + + url = entry['url'] + path = "cache/" + sha256(url.encode()).hexdigest() + ".ics" + + r = requests.get(entry["url"], allow_redirects=True) + if "encoding" in entry: + cal = Calendar(imports=r.content.decode(encoding=entry["encoding"])) + else: + cal = Calendar(imports=r.content.decode()) + + cal = horodate(cal, 'Cached at') + open(path, 'w').writelines(cal) + + +def get_from_cache(entry: dict) -> Calendar: + url = entry['url'] + path = "cache/" + sha256(url.encode()).hexdigest() + ".ics" + if not os.path.isfile(path): + print("Not cached") + raise FileNotFoundError("The calendar is not cached") + + with open(path, 'r') as file: + data = file.read() + + return Calendar(imports=data) + + +def load_cal(entry: dict) -> Calendar: + if "cache" in entry and entry["cache"]: + print("Getting", entry["name"], "from cache") + return get_from_cache(entry) + + else: + print("Getting", entry["name"], "from remote") + r = requests.get(entry["url"], allow_redirects=True) + if "encoding" in entry: + cal = Calendar(imports=r.content.decode(encoding=entry["encoding"])) + else: + cal = Calendar(imports=r.content.decode()) + + cal = horodate(cal, 'Downloaded at') + return cal + + +def horodate(cal: Calendar, prefix='') -> Calendar: + now = arrow.now().format("YYYY-MM-DD HH:mm:ss") + for event in cal.events: + event.description = event.description + '\n' + prefix + ' ' + now \ + if event.description is not None else prefix + ' ' + now + + return cal + + +def background_cache() -> None: + path = "config" + files = [os.path.join(path, f) for f in os.listdir(path) + if os.path.isfile(os.path.join(path, f)) and f.endswith('.json')] + + for file in files: + with open(file, 'r') as config_file: + config = json.loads(config_file.read()) + + for entry in config: + if 'cache' in entry and entry['cache']: + cache(entry) + print('Cache renewed', arrow.now().format("YYYY-MM-DD HH:mm:ss")) + + +class CacheThread(threading.Thread): + def __init__(self): + threading.Thread.__init__(self) + + def run(self): + print("Starting cache process") + while True: + background_cache() + time.sleep(10*60) diff --git a/app/tools/tools.py b/app/tools/tools.py index a551e76..030cdd3 100644 --- a/app/tools/tools.py +++ b/app/tools/tools.py @@ -73,11 +73,11 @@ Only the url and the name field are mandatory. import json import re +from typing import List -import requests from ics import Calendar from pathvalidate import sanitize_filename -from typing import List +from tools.caching import load_cal def filtering(cal: Calendar, filters: dict, field_name: str) -> Calendar: @@ -241,11 +241,11 @@ def modify_text(cal: Calendar, modify: dict, field_name: str) -> Calendar: if event.name is not None else change["addSuffix"] elif field_name == "description": - event.name = event.description + change["addSuffix"] \ + event.description = event.description + change["addSuffix"] \ if event.description is not None else change["addSuffix"] elif field_name == "location": - event.name = event.location + change["addSuffix"] \ + event.location = event.location + change["addSuffix"] \ if event.location is not None else change["addSuffix"] return cal @@ -321,12 +321,8 @@ def process(path: str) -> Calendar: data = [] for entry in config: - print("Getting " + entry["name"]) - r = requests.get(entry["url"], allow_redirects=True) - if "encoding" in entry: - cal = Calendar(imports=r.content.decode(encoding=entry["encoding"])) - else: - cal = Calendar(imports=r.content.decode()) + + cal = load_cal(entry) if "filters" in entry: cal = apply_filters(cal, entry["filters"]) diff --git a/requirements.txt b/requirements.txt index 4de1d77..a143602 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ requests~=2.22.0 ics~=0.7 pathvalidate~=2.3.0 -flask~=1.1.1 \ No newline at end of file +flask~=1.1.1 +arrow~=0.14.7 \ No newline at end of file