diff --git a/.gitignore b/.gitignore index 93ba534..a054264 100644 --- a/.gitignore +++ b/.gitignore @@ -131,4 +131,4 @@ dmypy.json # Pyre type checker .pyre/ app/config/calendar.json -app/config/calendar.json +/app/cache/ diff --git a/README.md b/README.md index 78356ad..f32d1f2 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ The JSON configuration file should look like the following. { "url":"str", "name":"str", + "cache": 10, "encoding":"str", "filters":{ "name":{ @@ -60,16 +61,17 @@ The JSON configuration file should look like the following. Only the `url` and the `name` field are mandatory. - `url`: specify the url to find the calendar - `name`: name to identify the calendar +- `cache`: if present cache the remote calendar according to the interval set in minutes - `encoding`: specify the encoding to use - + - `filters`: structure defining the filters to apply to the calendar - `name`: filters to apply to the name field of the events - `description`: filters to apply to the name field of the events - `exclude`: RegEx to describe the events to exclude - cannot be specified with includeOnly - `includeOnly`: RegEx to describe the events to include - cannot be specified with exclude -- `ignoreCase`: if true the RegEx will ignore the case of the field - +- `ignoreCase`: if true the RegEx will ignore the case of the field + - `modify`: structure defining the modifications to the events of the calendar - `time`: describe the modifications to apply to the timing of the event @@ -80,11 +82,11 @@ Only the `url` and the `name` field are mandatory. - `location`: modification to apply to the location of the events - `addPrefix`: string to add at the beginning of the field - `addSuffix`: string to add at the end of the field - + If multiple calendars are specified in the configuration list, their events will be merged in the resulting ics feed. ## Usage Once the config file is created, the corresponding HTTP endpoint is accessible. For example, if the file `app/config/my-calendar.json` contains the configuration, the HTTP endpoint will be `http://localhost:8088/my-calendar`. ## Limitations -Currently, the application only merges events of the ics feeds, the alarms and todos are not supported. There is no mechanism to handle the case where an incoming feed becomes unavailable. +Currently, the application only merges events of the ics feeds, the alarms and todos are not supported. diff --git a/app/server.py b/app/server.py index e733e2b..a801145 100644 --- a/app/server.py +++ b/app/server.py @@ -1,6 +1,8 @@ -from tools.tools import * from flask import Flask, make_response +from app.tools.caching import CacheThread +from app.tools.tools import * + app = Flask(__name__) @@ -10,10 +12,17 @@ def main(calendar): print("Opening " + conf) - result = str(process(conf)) - response = make_response(result, 200) - response.headers["Content-Disposition"] = "attachment; filename=calendar.ics" + try: + result = str(process(conf)) + response = make_response(result, 200) + response.headers["Content-Disposition"] = "attachment; filename=calendar.ics" + except FileNotFoundError: + response = make_response("Calendar not cached", 425) + return response +thread = CacheThread() +thread.start() + app.run(host='0.0.0.0', port=8088) diff --git a/app/tools/caching.py b/app/tools/caching.py new file mode 100644 index 0000000..8c7502a --- /dev/null +++ b/app/tools/caching.py @@ -0,0 +1,174 @@ +import json +import os +import sched +import threading +import time +from hashlib import sha256 + +import arrow +import requests +from ics import Calendar + + +def cache(entry: dict, scheduler: sched.scheduler = None) -> None: + """Cache an .ics feed in the app/cache directory. + Different entries with the same URL will be cached in the same file. + The cached calendar contains a new line in the description with the current time when cached prefixed by the + 'Cached at' mention + + + + :param entry: representation of the entry to cache. This is the Python representation of the corresponding entry + in the config file + :type entry: dict + + :param scheduler: scheduler used to relaunch the caching task in the future. If not scheduler is specified, + the task will not be relaunched + :type scheduler: sched.scheduler + """ + + if not os.path.isdir('app/cache'): + os.mkdir('app/cache') + + url = entry['url'] + path = "app/cache/" + sha256(url.encode()).hexdigest() + ".ics" + + try: + r = requests.get(entry["url"], allow_redirects=True) + except Exception as e: + print(arrow.now().format("YYYY-MM-DD HH:mm:ss"), "Could not cache", entry) + print(e) + else: + if "encoding" in entry: + cal = Calendar(imports=r.content.decode(encoding=entry["encoding"])) + else: + cal = Calendar(imports=r.content.decode()) + + cal = horodate(cal, 'Cached at') + open(path, 'w').writelines(cal) + print(arrow.now().format("YYYY-MM-DD HH:mm:ss"), "Cached", entry['name']) + finally: + if scheduler is not None: + delay = entry['cache'] if entry['cache'] > 0 else 10 + delay *= 60 + scheduler.enter(delay=delay, priority=1, action=cache, argument=(entry, scheduler)) + + +def get_from_cache(entry: dict) -> Calendar: + """Retrieve the entry from cache. If the entry is not found, an exception is raised + + + :param entry: representation of the entry to cache. This is the Python representation of the corresponding entry + in the config file + :type entry: dict + + + :return: the corresponding calendar in cache + :rtype: Calendar + + + :raises FileNotfoundError: if the entry has not been cached before + """ + + url = entry['url'] + path = "app/cache/" + sha256(url.encode()).hexdigest() + ".ics" + if not os.path.isfile(path): + print("Not cached") + raise FileNotFoundError("The calendar is not cached") + + with open(path, 'r') as file: + data = file.read() + + return Calendar(imports=data) + + +def load_cal(entry: dict) -> Calendar: + """Load the calendar from the cache or from remote according to the entry. If the calendar is supposed to be in + cached but could not be found in cache, an error is thrown + + + :param entry: representation of the entry to cache. This is the Python representation of the corresponding entry + in the config file + :type entry: dict + + + :return: the calendar corresponding to the entry + :rtype: Calendar + + + :raises FileNotfoundError: if the entry was supposed to be cached but has not been cached before + """ + + if "cache" in entry and entry["cache"]: + print("Getting", entry["name"], "from cache") + return get_from_cache(entry) + + else: + print("Getting", entry["name"], "from remote") + r = requests.get(entry["url"], allow_redirects=True) + if "encoding" in entry: + cal = Calendar(imports=r.content.decode(encoding=entry["encoding"])) + else: + cal = Calendar(imports=r.content.decode()) + + cal = horodate(cal, 'Downloaded at') + return cal + + +def horodate(cal: Calendar, prefix='') -> Calendar: + """Add a new line at the end of the description of every event in the calendar with the current time prefixed by + the prefix parameter and a space + The date is added with the following format: YYYY-MM-DD HH:mm:ss + + + :param cal: calendar to process + :type cal: Calendar + + :param prefix: the prefix to add in front of the date + :type prefix: str + + + :return: the modified calendar + :rtype: Calendar + """ + now = arrow.now().format("YYYY-MM-DD HH:mm:ss") + for event in cal.events: + event.description = event.description + '\n' + prefix + ' ' + now \ + if event.description is not None else prefix + ' ' + now + + return cal + + +def start_scheduler(scheduler: sched.scheduler) -> None: + """Start the caching of every config file found in the app/config directory + + + :param scheduler: scheduler object to use to schedule the caching + :type scheduler: sched.scheduler + """ + + path = "app/config" + files = [os.path.join(path, f) for f in os.listdir(path) + if os.path.isfile(os.path.join(path, f)) and f.endswith('.json')] + + for file in files: + with open(file, 'r') as config_file: + config = json.loads(config_file.read()) + + for entry in config: + if 'cache' in entry: + scheduler.enter(delay=0, priority=1, action=cache, argument=(entry, scheduler)) + + scheduler.run() + + +class CacheThread(threading.Thread): + """Child class of the threading.Thread class to run the caching process every 10 minutes + """ + + def __init__(self): + threading.Thread.__init__(self) + + def run(self): + print("Starting cache process") + start_scheduler(sched.scheduler(time.time, time.sleep)) diff --git a/app/tools/tools.py b/app/tools/tools.py index a551e76..bdff209 100644 --- a/app/tools/tools.py +++ b/app/tools/tools.py @@ -73,11 +73,11 @@ Only the url and the name field are mandatory. import json import re +from typing import List -import requests from ics import Calendar from pathvalidate import sanitize_filename -from typing import List +from app.tools.caching import load_cal def filtering(cal: Calendar, filters: dict, field_name: str) -> Calendar: @@ -241,11 +241,11 @@ def modify_text(cal: Calendar, modify: dict, field_name: str) -> Calendar: if event.name is not None else change["addSuffix"] elif field_name == "description": - event.name = event.description + change["addSuffix"] \ + event.description = event.description + change["addSuffix"] \ if event.description is not None else change["addSuffix"] elif field_name == "location": - event.name = event.location + change["addSuffix"] \ + event.location = event.location + change["addSuffix"] \ if event.location is not None else change["addSuffix"] return cal @@ -312,7 +312,7 @@ def process(path: str) -> Calendar: :rtype: Calendar """ - o = "config/" + sanitize_filename(path) + o = "app/config/" + sanitize_filename(path) print("Try to open " + o) file = open(o, "r") config = json.loads(file.read()) @@ -321,12 +321,8 @@ def process(path: str) -> Calendar: data = [] for entry in config: - print("Getting " + entry["name"]) - r = requests.get(entry["url"], allow_redirects=True) - if "encoding" in entry: - cal = Calendar(imports=r.content.decode(encoding=entry["encoding"])) - else: - cal = Calendar(imports=r.content.decode()) + + cal = load_cal(entry) if "filters" in entry: cal = apply_filters(cal, entry["filters"]) diff --git a/config-sample.txt b/config-sample.txt index ca85daf..30108ce 100644 --- a/config-sample.txt +++ b/config-sample.txt @@ -1,6 +1,7 @@ [ {"url": "str", "name": "str", + "cache": 10, "encoding": "str", "filters": { "name": {"exclude": "RegEx", "includeOnly": "RegEx", "ignoreCase": true}, diff --git a/requirements.txt b/requirements.txt index 4de1d77..a143602 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ requests~=2.22.0 ics~=0.7 pathvalidate~=2.3.0 -flask~=1.1.1 \ No newline at end of file +flask~=1.1.1 +arrow~=0.14.7 \ No newline at end of file