From 9c61cc62343e9d848645c1948648e19b8b1468b8 Mon Sep 17 00:00:00 2001 From: Jules Dejaeghere Date: Sat, 24 Oct 2020 23:42:12 +0200 Subject: [PATCH 1/7] Implement caching of calendar files TODO: - Documentation - Adjust README --- .gitignore | 2 +- app/server.py | 17 +++++++-- app/tools/caching.py | 91 ++++++++++++++++++++++++++++++++++++++++++++ app/tools/tools.py | 16 +++----- requirements.txt | 3 +- 5 files changed, 113 insertions(+), 16 deletions(-) create mode 100644 app/tools/caching.py diff --git a/.gitignore b/.gitignore index 93ba534..a054264 100644 --- a/.gitignore +++ b/.gitignore @@ -131,4 +131,4 @@ dmypy.json # Pyre type checker .pyre/ app/config/calendar.json -app/config/calendar.json +/app/cache/ diff --git a/app/server.py b/app/server.py index e733e2b..3677685 100644 --- a/app/server.py +++ b/app/server.py @@ -1,4 +1,5 @@ from tools.tools import * +from tools.caching import * from flask import Flask, make_response app = Flask(__name__) @@ -10,10 +11,18 @@ def main(calendar): print("Opening " + conf) - result = str(process(conf)) - response = make_response(result, 200) - response.headers["Content-Disposition"] = "attachment; filename=calendar.ics" + try: + result = str(process(conf)) + response = make_response(result, 200) + response.headers["Content-Disposition"] = "attachment; filename=calendar.ics" + except FileNotFoundError: + response = make_response("Calendar not cached", 425) + return response -app.run(host='0.0.0.0', port=8088) +# TODO find better way to launch periodic caching +# Maybe try with https://docs.python.org/3/library/sched.html +thread = CacheThread() +thread.start() +app.run(host='0.0.0.0', port=8088) \ No newline at end of file diff --git a/app/tools/caching.py b/app/tools/caching.py new file mode 100644 index 0000000..8a12948 --- /dev/null +++ b/app/tools/caching.py @@ -0,0 +1,91 @@ +import json +import os +import threading +import time +from hashlib import sha256 + +import arrow +import requests +from ics import Calendar + + +def cache(entry: dict) -> None: + if not os.path.isdir('cache'): + os.mkdir('cache') + + url = entry['url'] + path = "cache/" + sha256(url.encode()).hexdigest() + ".ics" + + r = requests.get(entry["url"], allow_redirects=True) + if "encoding" in entry: + cal = Calendar(imports=r.content.decode(encoding=entry["encoding"])) + else: + cal = Calendar(imports=r.content.decode()) + + cal = horodate(cal, 'Cached at') + open(path, 'w').writelines(cal) + + +def get_from_cache(entry: dict) -> Calendar: + url = entry['url'] + path = "cache/" + sha256(url.encode()).hexdigest() + ".ics" + if not os.path.isfile(path): + print("Not cached") + raise FileNotFoundError("The calendar is not cached") + + with open(path, 'r') as file: + data = file.read() + + return Calendar(imports=data) + + +def load_cal(entry: dict) -> Calendar: + if "cache" in entry and entry["cache"]: + print("Getting", entry["name"], "from cache") + return get_from_cache(entry) + + else: + print("Getting", entry["name"], "from remote") + r = requests.get(entry["url"], allow_redirects=True) + if "encoding" in entry: + cal = Calendar(imports=r.content.decode(encoding=entry["encoding"])) + else: + cal = Calendar(imports=r.content.decode()) + + cal = horodate(cal, 'Downloaded at') + return cal + + +def horodate(cal: Calendar, prefix='') -> Calendar: + now = arrow.now().format("YYYY-MM-DD HH:mm:ss") + for event in cal.events: + event.description = event.description + '\n' + prefix + ' ' + now \ + if event.description is not None else prefix + ' ' + now + + return cal + + +def background_cache() -> None: + path = "config" + files = [os.path.join(path, f) for f in os.listdir(path) + if os.path.isfile(os.path.join(path, f)) and f.endswith('.json')] + + for file in files: + with open(file, 'r') as config_file: + config = json.loads(config_file.read()) + + for entry in config: + if 'cache' in entry and entry['cache']: + cache(entry) + print('Cache renewed', arrow.now().format("YYYY-MM-DD HH:mm:ss")) + + +class CacheThread(threading.Thread): + def __init__(self): + threading.Thread.__init__(self) + + def run(self): + print("Starting cache process") + while True: + background_cache() + time.sleep(10*60) diff --git a/app/tools/tools.py b/app/tools/tools.py index a551e76..030cdd3 100644 --- a/app/tools/tools.py +++ b/app/tools/tools.py @@ -73,11 +73,11 @@ Only the url and the name field are mandatory. import json import re +from typing import List -import requests from ics import Calendar from pathvalidate import sanitize_filename -from typing import List +from tools.caching import load_cal def filtering(cal: Calendar, filters: dict, field_name: str) -> Calendar: @@ -241,11 +241,11 @@ def modify_text(cal: Calendar, modify: dict, field_name: str) -> Calendar: if event.name is not None else change["addSuffix"] elif field_name == "description": - event.name = event.description + change["addSuffix"] \ + event.description = event.description + change["addSuffix"] \ if event.description is not None else change["addSuffix"] elif field_name == "location": - event.name = event.location + change["addSuffix"] \ + event.location = event.location + change["addSuffix"] \ if event.location is not None else change["addSuffix"] return cal @@ -321,12 +321,8 @@ def process(path: str) -> Calendar: data = [] for entry in config: - print("Getting " + entry["name"]) - r = requests.get(entry["url"], allow_redirects=True) - if "encoding" in entry: - cal = Calendar(imports=r.content.decode(encoding=entry["encoding"])) - else: - cal = Calendar(imports=r.content.decode()) + + cal = load_cal(entry) if "filters" in entry: cal = apply_filters(cal, entry["filters"]) diff --git a/requirements.txt b/requirements.txt index 4de1d77..a143602 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ requests~=2.22.0 ics~=0.7 pathvalidate~=2.3.0 -flask~=1.1.1 \ No newline at end of file +flask~=1.1.1 +arrow~=0.14.7 \ No newline at end of file From 3b653f7bdc90acbee574bce4391291b7aca0023f Mon Sep 17 00:00:00 2001 From: Jules Dejaeghere Date: Fri, 30 Oct 2020 13:18:10 +0100 Subject: [PATCH 2/7] Improve caching system --- app/tools/caching.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/app/tools/caching.py b/app/tools/caching.py index 8a12948..02cca63 100644 --- a/app/tools/caching.py +++ b/app/tools/caching.py @@ -76,7 +76,10 @@ def background_cache() -> None: for entry in config: if 'cache' in entry and entry['cache']: - cache(entry) + try: + cache(entry) + except: + print("Could not cache", entry) print('Cache renewed', arrow.now().format("YYYY-MM-DD HH:mm:ss")) @@ -88,4 +91,4 @@ class CacheThread(threading.Thread): print("Starting cache process") while True: background_cache() - time.sleep(10*60) + time.sleep(10 * 60) From 7a9455f6fb6d168dbde687d1d0e1de32c0d5eae9 Mon Sep 17 00:00:00 2001 From: Jules Dejaeghere Date: Sat, 31 Oct 2020 21:03:58 +0100 Subject: [PATCH 3/7] Change working directory --- app/server.py | 4 ++-- app/tools/caching.py | 2 +- app/tools/tools.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/app/server.py b/app/server.py index 3677685..8da4bd2 100644 --- a/app/server.py +++ b/app/server.py @@ -1,5 +1,5 @@ -from tools.tools import * -from tools.caching import * +from app.tools.tools import * +from app.tools.caching import * from flask import Flask, make_response app = Flask(__name__) diff --git a/app/tools/caching.py b/app/tools/caching.py index 02cca63..776797a 100644 --- a/app/tools/caching.py +++ b/app/tools/caching.py @@ -66,7 +66,7 @@ def horodate(cal: Calendar, prefix='') -> Calendar: def background_cache() -> None: - path = "config" + path = "app/config" files = [os.path.join(path, f) for f in os.listdir(path) if os.path.isfile(os.path.join(path, f)) and f.endswith('.json')] diff --git a/app/tools/tools.py b/app/tools/tools.py index 030cdd3..bdff209 100644 --- a/app/tools/tools.py +++ b/app/tools/tools.py @@ -77,7 +77,7 @@ from typing import List from ics import Calendar from pathvalidate import sanitize_filename -from tools.caching import load_cal +from app.tools.caching import load_cal def filtering(cal: Calendar, filters: dict, field_name: str) -> Calendar: @@ -312,7 +312,7 @@ def process(path: str) -> Calendar: :rtype: Calendar """ - o = "config/" + sanitize_filename(path) + o = "app/config/" + sanitize_filename(path) print("Try to open " + o) file = open(o, "r") config = json.loads(file.read()) From 73a12c55b2d89034267c1dfffc8e8821a905ca86 Mon Sep 17 00:00:00 2001 From: Jules Dejaeghere Date: Sat, 31 Oct 2020 21:25:29 +0100 Subject: [PATCH 4/7] Add documentation for the app.tools.caching module --- app/server.py | 7 ++--- app/tools/caching.py | 62 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 3 deletions(-) diff --git a/app/server.py b/app/server.py index 8da4bd2..cc2645f 100644 --- a/app/server.py +++ b/app/server.py @@ -1,7 +1,8 @@ -from app.tools.tools import * -from app.tools.caching import * from flask import Flask, make_response +from app.tools.caching import * +from app.tools.tools import * + app = Flask(__name__) @@ -25,4 +26,4 @@ def main(calendar): # Maybe try with https://docs.python.org/3/library/sched.html thread = CacheThread() thread.start() -app.run(host='0.0.0.0', port=8088) \ No newline at end of file +app.run(host='0.0.0.0', port=8088) diff --git a/app/tools/caching.py b/app/tools/caching.py index 776797a..c08f5c5 100644 --- a/app/tools/caching.py +++ b/app/tools/caching.py @@ -10,6 +10,17 @@ from ics import Calendar def cache(entry: dict) -> None: + """Cache an .ics feed in the app/cache directory. + Different entries with the same URL will be cached in the same file. + The cached calendar contains a new line in the description with the current time when cached prefixed by the + 'Cached at' mention + + + :param entry: representation of the entry to cache. This is the Python representation of the corresponding entry + in the config file + :type entry: dict + """ + if not os.path.isdir('cache'): os.mkdir('cache') @@ -27,6 +38,21 @@ def cache(entry: dict) -> None: def get_from_cache(entry: dict) -> Calendar: + """Retrieve the entry from cache. If the entry is not found, an exception is raised + + + :param entry: representation of the entry to cache. This is the Python representation of the corresponding entry + in the config file + :type entry: dict + + + :return: the corresponding calendar in cache + :rtype: Calendar + + + :raises FileNotfoundError: if the entry has not been cached before + """ + url = entry['url'] path = "cache/" + sha256(url.encode()).hexdigest() + ".ics" if not os.path.isfile(path): @@ -40,6 +66,22 @@ def get_from_cache(entry: dict) -> Calendar: def load_cal(entry: dict) -> Calendar: + """Load the calendar from the cache or from remote according to the entry. If the calendar is supposed to be in + cached but could not be found in cache, an error is thrown + + + :param entry: representation of the entry to cache. This is the Python representation of the corresponding entry + in the config file + :type entry: dict + + + :return: the calendar corresponding to the entry + :rtype: Calendar + + + :raises FileNotfoundError: if the entry was supposed to be cached but has not been cached before + """ + if "cache" in entry and entry["cache"]: print("Getting", entry["name"], "from cache") return get_from_cache(entry) @@ -57,6 +99,21 @@ def load_cal(entry: dict) -> Calendar: def horodate(cal: Calendar, prefix='') -> Calendar: + """Add a new line at the end of the description of every event in the calendar with the current time prefixed by + the prefix parameter and a space + The date is added with the following format: YYYY-MM-DD HH:mm:ss + + + :param cal: calendar to process + :type cal: Calendar + + :param prefix: the prefix to add in front of the date + :type prefix: str + + + :return: the modified calendar + :rtype: Calendar + """ now = arrow.now().format("YYYY-MM-DD HH:mm:ss") for event in cal.events: event.description = event.description + '\n' + prefix + ' ' + now \ @@ -66,6 +123,9 @@ def horodate(cal: Calendar, prefix='') -> Calendar: def background_cache() -> None: + """Start the caching of every config file found in the app/config directory + """ + path = "app/config" files = [os.path.join(path, f) for f in os.listdir(path) if os.path.isfile(os.path.join(path, f)) and f.endswith('.json')] @@ -84,6 +144,8 @@ def background_cache() -> None: class CacheThread(threading.Thread): + """Child class of the threading.Thread class to run the caching process every 10 minutes + """ def __init__(self): threading.Thread.__init__(self) From b93a5ae814092bb6315aaf7fe18096211b874dc1 Mon Sep 17 00:00:00 2001 From: Jules Dejaeghere Date: Sat, 31 Oct 2020 21:32:12 +0100 Subject: [PATCH 5/7] Update working directory in the app.tools.caching module --- app/tools/caching.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/app/tools/caching.py b/app/tools/caching.py index c08f5c5..40fb944 100644 --- a/app/tools/caching.py +++ b/app/tools/caching.py @@ -21,11 +21,11 @@ def cache(entry: dict) -> None: :type entry: dict """ - if not os.path.isdir('cache'): - os.mkdir('cache') + if not os.path.isdir('app/cache'): + os.mkdir('app/cache') url = entry['url'] - path = "cache/" + sha256(url.encode()).hexdigest() + ".ics" + path = "app/cache/" + sha256(url.encode()).hexdigest() + ".ics" r = requests.get(entry["url"], allow_redirects=True) if "encoding" in entry: @@ -54,7 +54,7 @@ def get_from_cache(entry: dict) -> Calendar: """ url = entry['url'] - path = "cache/" + sha256(url.encode()).hexdigest() + ".ics" + path = "app/cache/" + sha256(url.encode()).hexdigest() + ".ics" if not os.path.isfile(path): print("Not cached") raise FileNotFoundError("The calendar is not cached") From dff7b57c794d936296cbf0d9336d6b09b8e9f212 Mon Sep 17 00:00:00 2001 From: Jules Dejaeghere Date: Sun, 1 Nov 2020 21:57:49 +0100 Subject: [PATCH 6/7] Improve caching method: using sched.scheduler --- app/server.py | 5 ++--- app/tools/caching.py | 52 +++++++++++++++++++++++++++++--------------- 2 files changed, 37 insertions(+), 20 deletions(-) diff --git a/app/server.py b/app/server.py index cc2645f..a801145 100644 --- a/app/server.py +++ b/app/server.py @@ -1,6 +1,6 @@ from flask import Flask, make_response -from app.tools.caching import * +from app.tools.caching import CacheThread from app.tools.tools import * app = Flask(__name__) @@ -22,8 +22,7 @@ def main(calendar): return response -# TODO find better way to launch periodic caching -# Maybe try with https://docs.python.org/3/library/sched.html thread = CacheThread() thread.start() + app.run(host='0.0.0.0', port=8088) diff --git a/app/tools/caching.py b/app/tools/caching.py index 40fb944..8c7502a 100644 --- a/app/tools/caching.py +++ b/app/tools/caching.py @@ -1,5 +1,6 @@ import json import os +import sched import threading import time from hashlib import sha256 @@ -9,16 +10,21 @@ import requests from ics import Calendar -def cache(entry: dict) -> None: +def cache(entry: dict, scheduler: sched.scheduler = None) -> None: """Cache an .ics feed in the app/cache directory. Different entries with the same URL will be cached in the same file. The cached calendar contains a new line in the description with the current time when cached prefixed by the 'Cached at' mention + :param entry: representation of the entry to cache. This is the Python representation of the corresponding entry in the config file :type entry: dict + + :param scheduler: scheduler used to relaunch the caching task in the future. If not scheduler is specified, + the task will not be relaunched + :type scheduler: sched.scheduler """ if not os.path.isdir('app/cache'): @@ -27,14 +33,25 @@ def cache(entry: dict) -> None: url = entry['url'] path = "app/cache/" + sha256(url.encode()).hexdigest() + ".ics" - r = requests.get(entry["url"], allow_redirects=True) - if "encoding" in entry: - cal = Calendar(imports=r.content.decode(encoding=entry["encoding"])) + try: + r = requests.get(entry["url"], allow_redirects=True) + except Exception as e: + print(arrow.now().format("YYYY-MM-DD HH:mm:ss"), "Could not cache", entry) + print(e) else: - cal = Calendar(imports=r.content.decode()) + if "encoding" in entry: + cal = Calendar(imports=r.content.decode(encoding=entry["encoding"])) + else: + cal = Calendar(imports=r.content.decode()) - cal = horodate(cal, 'Cached at') - open(path, 'w').writelines(cal) + cal = horodate(cal, 'Cached at') + open(path, 'w').writelines(cal) + print(arrow.now().format("YYYY-MM-DD HH:mm:ss"), "Cached", entry['name']) + finally: + if scheduler is not None: + delay = entry['cache'] if entry['cache'] > 0 else 10 + delay *= 60 + scheduler.enter(delay=delay, priority=1, action=cache, argument=(entry, scheduler)) def get_from_cache(entry: dict) -> Calendar: @@ -122,8 +139,12 @@ def horodate(cal: Calendar, prefix='') -> Calendar: return cal -def background_cache() -> None: +def start_scheduler(scheduler: sched.scheduler) -> None: """Start the caching of every config file found in the app/config directory + + + :param scheduler: scheduler object to use to schedule the caching + :type scheduler: sched.scheduler """ path = "app/config" @@ -135,22 +156,19 @@ def background_cache() -> None: config = json.loads(config_file.read()) for entry in config: - if 'cache' in entry and entry['cache']: - try: - cache(entry) - except: - print("Could not cache", entry) - print('Cache renewed', arrow.now().format("YYYY-MM-DD HH:mm:ss")) + if 'cache' in entry: + scheduler.enter(delay=0, priority=1, action=cache, argument=(entry, scheduler)) + + scheduler.run() class CacheThread(threading.Thread): """Child class of the threading.Thread class to run the caching process every 10 minutes """ + def __init__(self): threading.Thread.__init__(self) def run(self): print("Starting cache process") - while True: - background_cache() - time.sleep(10 * 60) + start_scheduler(sched.scheduler(time.time, time.sleep)) From a6d4b3b113f17fcd1421978d79a3583e3a92f48a Mon Sep 17 00:00:00 2001 From: Jules Dejaeghere Date: Tue, 3 Nov 2020 21:59:01 +0100 Subject: [PATCH 7/7] Update readme and the config sample to add the cache setting --- README.md | 12 +++++++----- config-sample.txt | 1 + 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 78356ad..f32d1f2 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ The JSON configuration file should look like the following. { "url":"str", "name":"str", + "cache": 10, "encoding":"str", "filters":{ "name":{ @@ -60,16 +61,17 @@ The JSON configuration file should look like the following. Only the `url` and the `name` field are mandatory. - `url`: specify the url to find the calendar - `name`: name to identify the calendar +- `cache`: if present cache the remote calendar according to the interval set in minutes - `encoding`: specify the encoding to use - + - `filters`: structure defining the filters to apply to the calendar - `name`: filters to apply to the name field of the events - `description`: filters to apply to the name field of the events - `exclude`: RegEx to describe the events to exclude - cannot be specified with includeOnly - `includeOnly`: RegEx to describe the events to include - cannot be specified with exclude -- `ignoreCase`: if true the RegEx will ignore the case of the field - +- `ignoreCase`: if true the RegEx will ignore the case of the field + - `modify`: structure defining the modifications to the events of the calendar - `time`: describe the modifications to apply to the timing of the event @@ -80,11 +82,11 @@ Only the `url` and the `name` field are mandatory. - `location`: modification to apply to the location of the events - `addPrefix`: string to add at the beginning of the field - `addSuffix`: string to add at the end of the field - + If multiple calendars are specified in the configuration list, their events will be merged in the resulting ics feed. ## Usage Once the config file is created, the corresponding HTTP endpoint is accessible. For example, if the file `app/config/my-calendar.json` contains the configuration, the HTTP endpoint will be `http://localhost:8088/my-calendar`. ## Limitations -Currently, the application only merges events of the ics feeds, the alarms and todos are not supported. There is no mechanism to handle the case where an incoming feed becomes unavailable. +Currently, the application only merges events of the ics feeds, the alarms and todos are not supported. diff --git a/config-sample.txt b/config-sample.txt index ca85daf..30108ce 100644 --- a/config-sample.txt +++ b/config-sample.txt @@ -1,6 +1,7 @@ [ {"url": "str", "name": "str", + "cache": 10, "encoding": "str", "filters": { "name": {"exclude": "RegEx", "includeOnly": "RegEx", "ignoreCase": true},