import seaborn as sns
import pandas as pd
import datetime

%config InlineBackend.figure_format = 'retina'
%matplotlib inline

How about some Golf Courses?¶

SELECT 
  count(id) AS num_changesets, 
  sum(num_changes) AS osm_changes, 
  count(distinct(uid)) AS num_users
FROM changesets
WHERE lower(tags['comment']) like '%golf%'

num_changesets	osm_changes	num_users
72,595	16,863,001	9,595

It looks like 9500 users have submitted changeses that probably have to do with golf courses by this very rudimentary analysis.

Let's do the same query but over time:

SELECT 
  date_trunc('day', created_at) AS day, 
  count(id) AS num_changesets, 
  sum(num_changes) AS osm_changes, 
  array_agg(distinct(tags['comment'])) AS comments,
  count(distinct(uid)) AS num_users
FROM changesets
WHERE lower(tags['comment']) like '%golf%'
GROUP BY  date_trunc('day', created_at)

df = pd.read_csv('/Users/jenningsanderson/Downloads/4dfb7575-0dc4-46a9-8f46-e7a27680ffe6.csv')
df['date'] = df.day.apply(pd.Timestamp)
df.set_index('date', inplace=True)
df.head(2)

sns.set_style("darkgrid")
ax = df.num_changesets.plot(figsize=(15,8), style='.')
ax.set_title("Number of changesets with 'golf' in the comments", fontsize=16);
ax.set_xlabel("Date"); ax.set_ylabel("Changeset Count (cumulative)");

ax = df.num_users.plot(figsize=(15,8))
ax.set_xlabel("Date"), ax.set_ylabel("Number of mappers each day")
ax.set_xlim(datetime.date(2018,1,1));
ax.set_title("Number of distinct mappers each day submitting Changets with `golf` in the comment");

ax =  (df.osm_changes / df.num_users) .plot(figsize=(15,8))
ax.set_xlabel("Date"), ax.set_ylabel("Average number of OSM changes / mapper")
# ax.set_xlim(datetime.date(2018,1,1));
ax.set_title("Average number of OSM changes submitted per mapper each day in changesets with `golf` in the comment");

ax =  (df.osm_changes / df.num_users) .plot(figsize=(15,8))
ax.set_xlabel("Date"), ax.set_ylabel("Average number of OSM changes / mapper")
ax.set_xlim(datetime.date(2018,1,1)); ax.set_ylim(0,3500);
ax.set_title("Average number of OSM changes submitted per mapper each day in changesets with `golf` in the comment");

	day	num_changesets	osm_changes	comments	num_users
date
2012-11-29	2012-11-29 00:00:00.000	13	5877	[Golfclub Schmidmühlen, added detail to golfco...	7
2018-02-04	2018-02-04 00:00:00.000	14	5075	[Adding building - Eudunda Golf Club, Traced a...	9