mirror of
https://github.com/jdejaegh/cartopy-tor-relays.git
synced 2025-06-26 21:15:40 +02:00
Add code
This commit is contained in:
commit
3a68a988c1
7 changed files with 363 additions and 0 deletions
160
.gitignore
vendored
Normal file
160
.gitignore
vendored
Normal file
|
@ -0,0 +1,160 @@
|
||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
share/python-wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.nox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
*.py,cover
|
||||||
|
.hypothesis/
|
||||||
|
.pytest_cache/
|
||||||
|
cover/
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
local_settings.py
|
||||||
|
db.sqlite3
|
||||||
|
db.sqlite3-journal
|
||||||
|
|
||||||
|
# Flask stuff:
|
||||||
|
instance/
|
||||||
|
.webassets-cache
|
||||||
|
|
||||||
|
# Scrapy stuff:
|
||||||
|
.scrapy
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
.pybuilder/
|
||||||
|
target/
|
||||||
|
|
||||||
|
# Jupyter Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# IPython
|
||||||
|
profile_default/
|
||||||
|
ipython_config.py
|
||||||
|
|
||||||
|
# pyenv
|
||||||
|
# For a library or package, you might want to ignore these files since the code is
|
||||||
|
# intended to run in multiple environments; otherwise, check them in:
|
||||||
|
# .python-version
|
||||||
|
|
||||||
|
# pipenv
|
||||||
|
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||||
|
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||||
|
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||||
|
# install all needed dependencies.
|
||||||
|
#Pipfile.lock
|
||||||
|
|
||||||
|
# poetry
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||||
|
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||||
|
# commonly ignored for libraries.
|
||||||
|
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||||
|
#poetry.lock
|
||||||
|
|
||||||
|
# pdm
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||||
|
#pdm.lock
|
||||||
|
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||||
|
# in version control.
|
||||||
|
# https://pdm.fming.dev/#use-with-ide
|
||||||
|
.pdm.toml
|
||||||
|
|
||||||
|
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||||
|
__pypackages__/
|
||||||
|
|
||||||
|
# Celery stuff
|
||||||
|
celerybeat-schedule
|
||||||
|
celerybeat.pid
|
||||||
|
|
||||||
|
# SageMath parsed files
|
||||||
|
*.sage.py
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.env
|
||||||
|
.venv
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
|
||||||
|
# Spyder project settings
|
||||||
|
.spyderproject
|
||||||
|
.spyproject
|
||||||
|
|
||||||
|
# Rope project settings
|
||||||
|
.ropeproject
|
||||||
|
|
||||||
|
# mkdocs documentation
|
||||||
|
/site
|
||||||
|
|
||||||
|
# mypy
|
||||||
|
.mypy_cache/
|
||||||
|
.dmypy.json
|
||||||
|
dmypy.json
|
||||||
|
|
||||||
|
# Pyre type checker
|
||||||
|
.pyre/
|
||||||
|
|
||||||
|
# pytype static type analyzer
|
||||||
|
.pytype/
|
||||||
|
|
||||||
|
# Cython debug symbols
|
||||||
|
cython_debug/
|
||||||
|
|
||||||
|
# PyCharm
|
||||||
|
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||||
|
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||||
|
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||||
|
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||||
|
#.idea/
|
21
LICENSE
Normal file
21
LICENSE
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2024 Jules Dejaeghere
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
54
README.md
Normal file
54
README.md
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
# Map Tor relays using Cartopy
|
||||||
|
Create a map showing the geographic location of the Tor relays
|
||||||
|
|
||||||
|
## Setup
|
||||||
|
|
||||||
|
1. Create a venv: `python -m venv venv && source venv/bin/activate`
|
||||||
|
2. Install requirements: `pip install -r requirements.txt`
|
||||||
|
|
||||||
|
## Get the data to create the map
|
||||||
|
|
||||||
|
You'll need two files:
|
||||||
|
1. The Tor consensus you want to use. Download one at: https://metrics.torproject.org/collector/recent/relay-descriptors/consensuses/
|
||||||
|
2. The _GeoLite2 City_ file from MaxMind (not the CSV format). See their website to create an account and get the file: https://dev.maxmind.com/geoip/geolite2-free-geolocation-data
|
||||||
|
|
||||||
|
## Run the program
|
||||||
|
|
||||||
|
The program takes two arguments: the filename of the consensus and the filename of the GeoLite2 mmdb file.
|
||||||
|
|
||||||
|
```shell
|
||||||
|
python map.py 2024-03-27-13-00-00-consensus GeoLite2-City.mmdb
|
||||||
|
```
|
||||||
|
|
||||||
|
A third (optional) parameter can control the density of the clusters. The default is 1.5 and generally gives nice maps.
|
||||||
|
The higher the value, the bigger the clusters.
|
||||||
|
|
||||||
|
```shell
|
||||||
|
python map.py 2024-03-27-13-00-00-consensus GeoLite2-City.mmdb 1.5
|
||||||
|
```
|
||||||
|
|
||||||
|
## Using OSM data (optional)
|
||||||
|
|
||||||
|
It is possible to use data from OpenStreetMap as background (instead of the default cartopy image).
|
||||||
|
|
||||||
|
Check the code and the `TODO` comment to enable that. If you use OSM data and Mapbox as suggested in the comment,
|
||||||
|
please use attribute it properly.
|
||||||
|
|
||||||
|
The following attribution line is generally enough:
|
||||||
|
|
||||||
|
> © <a href='https://www.mapbox.com/about/maps/'>Mapbox</a> © <a href='http://www.openstreetmap.org/copyright'>OpenStreetMap</a> <strong><a href='https://www.mapbox.com/map-feedback/' target='_blank'>Improve this map</a></strong>
|
||||||
|
|
||||||
|
## Attribution
|
||||||
|
|
||||||
|
This attribution or a similar should be included when you use this script with MaxMind data.
|
||||||
|
|
||||||
|
> This product includes GeoLite2 Data created by MaxMind, available from https://www.maxmind.com
|
||||||
|
|
||||||
|
## Examples of maps
|
||||||
|
|
||||||
|
### Default cartopy background
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
### With a custom style from MapBox
|
||||||
|

|
BIN
img/map.png
Normal file
BIN
img/map.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 950 KiB |
BIN
img/map_osm.png
Normal file
BIN
img/map_osm.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 2.9 MiB |
123
map.py
Normal file
123
map.py
Normal file
|
@ -0,0 +1,123 @@
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import matplotlib.colors
|
||||||
|
import geoip2.database
|
||||||
|
from sklearn.cluster import DBSCAN
|
||||||
|
import matplotlib.gridspec as gridspec
|
||||||
|
from cartopy.io.img_tiles import *
|
||||||
|
from math import log
|
||||||
|
import fire
|
||||||
|
|
||||||
|
|
||||||
|
def cluster_coordinates(coordinates, eps=1.5, min_samples=1):
|
||||||
|
"""
|
||||||
|
Use DBSCAN to cluster points and have a readable map
|
||||||
|
:param coordinates: list of points (lat, lon)
|
||||||
|
:param eps: control the density of the cluster
|
||||||
|
:param min_samples: minimum number of samples in a cluster
|
||||||
|
"""
|
||||||
|
dbscan = DBSCAN(eps=eps, min_samples=min_samples)
|
||||||
|
dbscan.fit(coordinates)
|
||||||
|
labels = dbscan.labels_
|
||||||
|
cluster_centers = []
|
||||||
|
cluster_counts = []
|
||||||
|
unique_labels = set(labels)
|
||||||
|
for label in unique_labels:
|
||||||
|
if label == -1:
|
||||||
|
continue
|
||||||
|
cluster_mask = (labels == label)
|
||||||
|
cluster_points = coordinates[cluster_mask]
|
||||||
|
cluster_centers.append(np.mean(cluster_points, axis=0))
|
||||||
|
cluster_counts.append(np.sum(cluster_mask))
|
||||||
|
|
||||||
|
cluster_points = coordinates[(labels == -1)]
|
||||||
|
cluster_centers += list(cluster_points)
|
||||||
|
cluster_counts += [1] * len(cluster_points)
|
||||||
|
r = list(zip(cluster_centers, cluster_counts))
|
||||||
|
return r, max(cluster_counts), min(cluster_counts)
|
||||||
|
|
||||||
|
|
||||||
|
def geo_ip(ip, reader):
|
||||||
|
"""
|
||||||
|
Geocode IP address using the given reader
|
||||||
|
:param ip: IP address
|
||||||
|
:param reader: a geoip2.database.Reader
|
||||||
|
:return: [lon, lat] location
|
||||||
|
"""
|
||||||
|
response = reader.city(ip)
|
||||||
|
return [response.location.longitude, response.location.latitude]
|
||||||
|
|
||||||
|
|
||||||
|
def get_ip_from_consensus(filename):
|
||||||
|
"""
|
||||||
|
Get the IP addresses of the relays present in the consensus at filename
|
||||||
|
:param filename: filename of the consensus
|
||||||
|
:return: list of IP of the relays in the consensus
|
||||||
|
"""
|
||||||
|
result = []
|
||||||
|
with open(filename, 'r') as file:
|
||||||
|
for line in file:
|
||||||
|
if line.startswith("r "):
|
||||||
|
fields = line.split()
|
||||||
|
if len(fields) >= 7:
|
||||||
|
result.append(fields[6])
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def main(consensus_file, geoip_data_file, eps=1.5):
|
||||||
|
"""
|
||||||
|
Create a map based on the consensus_file and geoip_data_file
|
||||||
|
:param consensus_file: filename of a Tor consensus, see https://metrics.torproject.org/collector/recent/relay-descriptors/consensuses/
|
||||||
|
:param geoip_data_file: MaxMind mmdb filename, see https://dev.maxmind.com/geoip/geolite2-free-geolocation-data
|
||||||
|
:param eps: control the density of the cluster on the map
|
||||||
|
"""
|
||||||
|
print('Reading consensus file')
|
||||||
|
ips = get_ip_from_consensus(consensus_file)
|
||||||
|
print(f'Found {len(ips)} relays')
|
||||||
|
points = list()
|
||||||
|
print('Geocoding IP addresses')
|
||||||
|
reader = geoip2.database.Reader(geoip_data_file)
|
||||||
|
for ip in ips:
|
||||||
|
points.append(geo_ip(ip, reader))
|
||||||
|
points = np.array(points)
|
||||||
|
points, vmax, vmin = cluster_coordinates(points, eps=eps)
|
||||||
|
|
||||||
|
fig = plt.figure(figsize=(10, 5))
|
||||||
|
gs = gridspec.GridSpec(2, 1, height_ratios=[1, 0.05], figure=fig)
|
||||||
|
ax = fig.add_subplot(gs[0], projection=ccrs.PlateCarree())
|
||||||
|
|
||||||
|
ax.stock_img()
|
||||||
|
ax.coastlines()
|
||||||
|
|
||||||
|
# TODO if you want to use OSM data with Mapbox, create an account and a custom style on Mapbox.
|
||||||
|
# Then, fill the credentials below, comment the ax.stock_img() and ax.coastlines() lines and
|
||||||
|
# uncomment the lines below
|
||||||
|
# see https://docs.mapbox.com/help/tutorials/create-a-custom-style/
|
||||||
|
# osm_tiles = MapboxStyleTiles(
|
||||||
|
# access_token='',
|
||||||
|
# map_id='',
|
||||||
|
# username='',
|
||||||
|
# cache=False)
|
||||||
|
# ax.add_image(osm_tiles, 4)
|
||||||
|
|
||||||
|
cmap = plt.cm.hot
|
||||||
|
norm = matplotlib.colors.LogNorm(vmin=vmin, vmax=vmax)
|
||||||
|
for pos, count in points:
|
||||||
|
ax.plot(pos[0], pos[1], 'o', markersize=max(4 * log(count, 10), 2), transform=ccrs.PlateCarree(),
|
||||||
|
color=cmap(norm(count)))
|
||||||
|
|
||||||
|
ax.set_global()
|
||||||
|
plt.box(False)
|
||||||
|
ax.set_extent([-170, 180, -60, 85], crs=ccrs.PlateCarree())
|
||||||
|
cb_ax = fig.add_subplot(gs[1])
|
||||||
|
|
||||||
|
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
|
||||||
|
cbar = plt.colorbar(sm, cax=cb_ax, orientation='horizontal')
|
||||||
|
cbar.set_label('Number of relays')
|
||||||
|
|
||||||
|
plt.tight_layout()
|
||||||
|
print('Saving map as map.png')
|
||||||
|
plt.savefig('map.png', dpi=300)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
fire.Fire(main)
|
5
requirements.txt
Normal file
5
requirements.txt
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
matplotlib
|
||||||
|
geoip2
|
||||||
|
scikit-learn
|
||||||
|
cartopy
|
||||||
|
fire
|
Loading…
Add table
Reference in a new issue