import seaborn as sns
import pandas as pd
import datetime
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
SELECT
count(id) AS num_changesets,
sum(num_changes) AS osm_changes,
count(distinct(uid)) AS num_users
FROM changesets
WHERE lower(tags['comment']) like '%golf%'
num_changesets | osm_changes | num_users |
---|---|---|
72,595 | 16,863,001 | 9,595 |
It looks like 9500 users have submitted changeses that probably have to do with golf courses by this very rudimentary analysis.
Let's do the same query but over time:
SELECT
date_trunc('day', created_at) AS day,
count(id) AS num_changesets,
sum(num_changes) AS osm_changes,
array_agg(distinct(tags['comment'])) AS comments,
count(distinct(uid)) AS num_users
FROM changesets
WHERE lower(tags['comment']) like '%golf%'
GROUP BY date_trunc('day', created_at)
df = pd.read_csv('/Users/jenningsanderson/Downloads/4dfb7575-0dc4-46a9-8f46-e7a27680ffe6.csv')
df['date'] = df.day.apply(pd.Timestamp)
df.set_index('date', inplace=True)
df.head(2)
sns.set_style("darkgrid")
ax = df.num_changesets.plot(figsize=(15,8), style='.')
ax.set_title("Number of changesets with 'golf' in the comments", fontsize=16);
ax.set_xlabel("Date"); ax.set_ylabel("Changeset Count (cumulative)");
ax = df.num_users.plot(figsize=(15,8))
ax.set_xlabel("Date"), ax.set_ylabel("Number of mappers each day")
ax.set_xlim(datetime.date(2018,1,1));
ax.set_title("Number of distinct mappers each day submitting Changets with `golf` in the comment");
ax = (df.osm_changes / df.num_users) .plot(figsize=(15,8))
ax.set_xlabel("Date"), ax.set_ylabel("Average number of OSM changes / mapper")
# ax.set_xlim(datetime.date(2018,1,1));
ax.set_title("Average number of OSM changes submitted per mapper each day in changesets with `golf` in the comment");
ax = (df.osm_changes / df.num_users) .plot(figsize=(15,8))
ax.set_xlabel("Date"), ax.set_ylabel("Average number of OSM changes / mapper")
ax.set_xlim(datetime.date(2018,1,1)); ax.set_ylim(0,3500);
ax.set_title("Average number of OSM changes submitted per mapper each day in changesets with `golf` in the comment");