In [1]:
import seaborn as sns
import pandas as pd
import datetime

%config InlineBackend.figure_format = 'retina'
%matplotlib inline

How about some Golf Courses?¶

SELECT 
  count(id) AS num_changesets, 
  sum(num_changes) AS osm_changes, 
  count(distinct(uid)) AS num_users
FROM changesets
WHERE lower(tags['comment']) like '%golf%'
num_changesets osm_changes num_users
72,595 16,863,001 9,595

It looks like 9500 users have submitted changeses that probably have to do with golf courses by this very rudimentary analysis.

Let's do the same query but over time:

SELECT 
  date_trunc('day', created_at) AS day, 
  count(id) AS num_changesets, 
  sum(num_changes) AS osm_changes, 
  array_agg(distinct(tags['comment'])) AS comments,
  count(distinct(uid)) AS num_users
FROM changesets
WHERE lower(tags['comment']) like '%golf%'
GROUP BY  date_trunc('day', created_at)
In [2]:
df = pd.read_csv('/Users/jenningsanderson/Downloads/4dfb7575-0dc4-46a9-8f46-e7a27680ffe6.csv')
df['date'] = df.day.apply(pd.Timestamp)
df.set_index('date', inplace=True)
df.head(2)
Out[2]:
day num_changesets osm_changes comments num_users
date
2012-11-29 2012-11-29 00:00:00.000 13 5877 [Golfclub Schmidmühlen, added detail to golfco... 7
2018-02-04 2018-02-04 00:00:00.000 14 5075 [Adding building - Eudunda Golf Club, Traced a... 9
In [3]:
sns.set_style("darkgrid")
ax = df.num_changesets.plot(figsize=(15,8), style='.')
ax.set_title("Number of changesets with 'golf' in the comments", fontsize=16);
ax.set_xlabel("Date"); ax.set_ylabel("Changeset Count (cumulative)");
In [4]:
ax = df.num_users.plot(figsize=(15,8))
ax.set_xlabel("Date"), ax.set_ylabel("Number of mappers each day")
ax.set_xlim(datetime.date(2018,1,1));
ax.set_title("Number of distinct mappers each day submitting Changets with `golf` in the comment");
In [5]:
ax =  (df.osm_changes / df.num_users) .plot(figsize=(15,8))
ax.set_xlabel("Date"), ax.set_ylabel("Average number of OSM changes / mapper")
# ax.set_xlim(datetime.date(2018,1,1));
ax.set_title("Average number of OSM changes submitted per mapper each day in changesets with `golf` in the comment");
In [6]:
ax =  (df.osm_changes / df.num_users) .plot(figsize=(15,8))
ax.set_xlabel("Date"), ax.set_ylabel("Average number of OSM changes / mapper")
ax.set_xlim(datetime.date(2018,1,1)); ax.set_ylim(0,3500);
ax.set_title("Average number of OSM changes submitted per mapper each day in changesets with `golf` in the comment");