Merge pull request #1 from jdejaegh/caching

Add a caching system in case where remote calendar is unreachable
This commit is contained in:
Jules 2020-11-03 22:20:21 +01:00 committed by GitHub
commit 1c4f9f2be2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 205 additions and 22 deletions

2
.gitignore vendored
View file

@ -131,4 +131,4 @@ dmypy.json
# Pyre type checker
.pyre/
app/config/calendar.json
app/config/calendar.json
/app/cache/

View file

@ -17,6 +17,7 @@ The JSON configuration file should look like the following.
{
"url":"str",
"name":"str",
"cache": 10,
"encoding":"str",
"filters":{
"name":{
@ -60,16 +61,17 @@ The JSON configuration file should look like the following.
Only the `url` and the `name` field are mandatory.
- `url`: specify the url to find the calendar
- `name`: name to identify the calendar
- `cache`: if present cache the remote calendar according to the interval set in minutes
- `encoding`: specify the encoding to use
- `filters`: structure defining the filters to apply to the calendar
- `name`: filters to apply to the name field of the events
- `description`: filters to apply to the name field of the events
- `exclude`: RegEx to describe the events to exclude - cannot be specified with includeOnly
- `includeOnly`: RegEx to describe the events to include - cannot be specified with exclude
- `ignoreCase`: if true the RegEx will ignore the case of the field
- `ignoreCase`: if true the RegEx will ignore the case of the field
- `modify`: structure defining the modifications to the events of the calendar
- `time`: describe the modifications to apply to the timing of the event
@ -80,11 +82,11 @@ Only the `url` and the `name` field are mandatory.
- `location`: modification to apply to the location of the events
- `addPrefix`: string to add at the beginning of the field
- `addSuffix`: string to add at the end of the field
If multiple calendars are specified in the configuration list, their events will be merged in the resulting ics feed.
## Usage
Once the config file is created, the corresponding HTTP endpoint is accessible. For example, if the file `app/config/my-calendar.json` contains the configuration, the HTTP endpoint will be `http://localhost:8088/my-calendar`.
## Limitations
Currently, the application only merges events of the ics feeds, the alarms and todos are not supported. There is no mechanism to handle the case where an incoming feed becomes unavailable.
Currently, the application only merges events of the ics feeds, the alarms and todos are not supported.

View file

@ -1,6 +1,8 @@
from tools.tools import *
from flask import Flask, make_response
from app.tools.caching import CacheThread
from app.tools.tools import *
app = Flask(__name__)
@ -10,10 +12,17 @@ def main(calendar):
print("Opening " + conf)
result = str(process(conf))
response = make_response(result, 200)
response.headers["Content-Disposition"] = "attachment; filename=calendar.ics"
try:
result = str(process(conf))
response = make_response(result, 200)
response.headers["Content-Disposition"] = "attachment; filename=calendar.ics"
except FileNotFoundError:
response = make_response("Calendar not cached", 425)
return response
thread = CacheThread()
thread.start()
app.run(host='0.0.0.0', port=8088)

174
app/tools/caching.py Normal file
View file

@ -0,0 +1,174 @@
import json
import os
import sched
import threading
import time
from hashlib import sha256
import arrow
import requests
from ics import Calendar
def cache(entry: dict, scheduler: sched.scheduler = None) -> None:
"""Cache an .ics feed in the app/cache directory.
Different entries with the same URL will be cached in the same file.
The cached calendar contains a new line in the description with the current time when cached prefixed by the
'Cached at' mention
:param entry: representation of the entry to cache. This is the Python representation of the corresponding entry
in the config file
:type entry: dict
:param scheduler: scheduler used to relaunch the caching task in the future. If not scheduler is specified,
the task will not be relaunched
:type scheduler: sched.scheduler
"""
if not os.path.isdir('app/cache'):
os.mkdir('app/cache')
url = entry['url']
path = "app/cache/" + sha256(url.encode()).hexdigest() + ".ics"
try:
r = requests.get(entry["url"], allow_redirects=True)
except Exception as e:
print(arrow.now().format("YYYY-MM-DD HH:mm:ss"), "Could not cache", entry)
print(e)
else:
if "encoding" in entry:
cal = Calendar(imports=r.content.decode(encoding=entry["encoding"]))
else:
cal = Calendar(imports=r.content.decode())
cal = horodate(cal, 'Cached at')
open(path, 'w').writelines(cal)
print(arrow.now().format("YYYY-MM-DD HH:mm:ss"), "Cached", entry['name'])
finally:
if scheduler is not None:
delay = entry['cache'] if entry['cache'] > 0 else 10
delay *= 60
scheduler.enter(delay=delay, priority=1, action=cache, argument=(entry, scheduler))
def get_from_cache(entry: dict) -> Calendar:
"""Retrieve the entry from cache. If the entry is not found, an exception is raised
:param entry: representation of the entry to cache. This is the Python representation of the corresponding entry
in the config file
:type entry: dict
:return: the corresponding calendar in cache
:rtype: Calendar
:raises FileNotfoundError: if the entry has not been cached before
"""
url = entry['url']
path = "app/cache/" + sha256(url.encode()).hexdigest() + ".ics"
if not os.path.isfile(path):
print("Not cached")
raise FileNotFoundError("The calendar is not cached")
with open(path, 'r') as file:
data = file.read()
return Calendar(imports=data)
def load_cal(entry: dict) -> Calendar:
"""Load the calendar from the cache or from remote according to the entry. If the calendar is supposed to be in
cached but could not be found in cache, an error is thrown
:param entry: representation of the entry to cache. This is the Python representation of the corresponding entry
in the config file
:type entry: dict
:return: the calendar corresponding to the entry
:rtype: Calendar
:raises FileNotfoundError: if the entry was supposed to be cached but has not been cached before
"""
if "cache" in entry and entry["cache"]:
print("Getting", entry["name"], "from cache")
return get_from_cache(entry)
else:
print("Getting", entry["name"], "from remote")
r = requests.get(entry["url"], allow_redirects=True)
if "encoding" in entry:
cal = Calendar(imports=r.content.decode(encoding=entry["encoding"]))
else:
cal = Calendar(imports=r.content.decode())
cal = horodate(cal, 'Downloaded at')
return cal
def horodate(cal: Calendar, prefix='') -> Calendar:
"""Add a new line at the end of the description of every event in the calendar with the current time prefixed by
the prefix parameter and a space
The date is added with the following format: YYYY-MM-DD HH:mm:ss
:param cal: calendar to process
:type cal: Calendar
:param prefix: the prefix to add in front of the date
:type prefix: str
:return: the modified calendar
:rtype: Calendar
"""
now = arrow.now().format("YYYY-MM-DD HH:mm:ss")
for event in cal.events:
event.description = event.description + '\n' + prefix + ' ' + now \
if event.description is not None else prefix + ' ' + now
return cal
def start_scheduler(scheduler: sched.scheduler) -> None:
"""Start the caching of every config file found in the app/config directory
:param scheduler: scheduler object to use to schedule the caching
:type scheduler: sched.scheduler
"""
path = "app/config"
files = [os.path.join(path, f) for f in os.listdir(path)
if os.path.isfile(os.path.join(path, f)) and f.endswith('.json')]
for file in files:
with open(file, 'r') as config_file:
config = json.loads(config_file.read())
for entry in config:
if 'cache' in entry:
scheduler.enter(delay=0, priority=1, action=cache, argument=(entry, scheduler))
scheduler.run()
class CacheThread(threading.Thread):
"""Child class of the threading.Thread class to run the caching process every 10 minutes
"""
def __init__(self):
threading.Thread.__init__(self)
def run(self):
print("Starting cache process")
start_scheduler(sched.scheduler(time.time, time.sleep))

View file

@ -73,11 +73,11 @@ Only the url and the name field are mandatory.
import json
import re
from typing import List
import requests
from ics import Calendar
from pathvalidate import sanitize_filename
from typing import List
from app.tools.caching import load_cal
def filtering(cal: Calendar, filters: dict, field_name: str) -> Calendar:
@ -241,11 +241,11 @@ def modify_text(cal: Calendar, modify: dict, field_name: str) -> Calendar:
if event.name is not None else change["addSuffix"]
elif field_name == "description":
event.name = event.description + change["addSuffix"] \
event.description = event.description + change["addSuffix"] \
if event.description is not None else change["addSuffix"]
elif field_name == "location":
event.name = event.location + change["addSuffix"] \
event.location = event.location + change["addSuffix"] \
if event.location is not None else change["addSuffix"]
return cal
@ -312,7 +312,7 @@ def process(path: str) -> Calendar:
:rtype: Calendar
"""
o = "config/" + sanitize_filename(path)
o = "app/config/" + sanitize_filename(path)
print("Try to open " + o)
file = open(o, "r")
config = json.loads(file.read())
@ -321,12 +321,8 @@ def process(path: str) -> Calendar:
data = []
for entry in config:
print("Getting " + entry["name"])
r = requests.get(entry["url"], allow_redirects=True)
if "encoding" in entry:
cal = Calendar(imports=r.content.decode(encoding=entry["encoding"]))
else:
cal = Calendar(imports=r.content.decode())
cal = load_cal(entry)
if "filters" in entry:
cal = apply_filters(cal, entry["filters"])

View file

@ -1,6 +1,7 @@
[
{"url": "str",
"name": "str",
"cache": 10,
"encoding": "str",
"filters": {
"name": {"exclude": "RegEx", "includeOnly": "RegEx", "ignoreCase": true},

View file

@ -1,4 +1,5 @@
requests~=2.22.0
ics~=0.7
pathvalidate~=2.3.0
flask~=1.1.1
flask~=1.1.1
arrow~=0.14.7