Continuing our exploration of MusicBrainz data, this time with works (i.e. musical compositions).
The setup required to repeat these operations is explained in the introduction notebook. In case graphs do not appear in this page you can refer to the static version.
%run startup.ipy
I was recently listening to András Schiff "Schiff on Beethoven" lectures, in which he was commenting about performers playing the first movement of the "Moonlight" sonata too slowly (compared to what is written on the music sheet)... so let's take this example.
I will start by fetching from the database more or less the equivalent of the work overview page:
moonlight_mbid = '11e7e520-f430-306c-90b8-183cbf3cc761'
recordings = sql("""
SELECT a.name AS artist,
to_date(to_char(l.begin_date_year, '9999') ||
to_char(COALESCE(l.end_date_month, 1), '99') ||
to_char(COALESCE(l.end_date_day, 1), '99'), 'YYYY MM DD') AS start,
to_date(to_char(l.end_date_year, '9999') ||
to_char(COALESCE(l.end_date_month, 1), '99') ||
to_char(COALESCE(l.end_date_day, 1), '99'), 'YYYY MM DD') AS end,
r.gid AS mbid,
r.length * interval '1ms' AS duration
FROM work AS w
JOIN l_recording_work AS lrw ON w.id = lrw.entity1
JOIN recording AS r ON r.id = lrw.entity0
LEFT OUTER JOIN l_artist_recording AS lar ON r.id = lar.entity1
LEFT OUTER JOIN artist AS a ON a.id = lar.entity0
LEFT OUTER JOIN link AS l ON l.id = lar.link
WHERE w.gid = %(moonlight_mbid)s
ORDER BY start;
""", moonlight_mbid=moonlight_mbid)
recordings.duration = recordings.duration.fillna(0).apply(lambda t: t.round('s'))
recordings['seconds'] = recordings.duration.apply(lambda s: s.total_seconds())
recordings.duration = recordings.duration.apply(lambda s: str(s).replace('0 days 00:', ''))
recordings['url'] = recordings.mbid.apply(mb_recording_link)
recordings.drop('mbid', axis=1, inplace=True)
print('Number of recordings: %d' % len(recordings))
iplot(ff.create_table(recordings.head(10)[['start', 'artist', 'duration', 'url']]))
print("Number of recordings: ", len(recordings))
print("Number of recordings with known performer: ", len(recordings.artist[recordings.artist.notnull()]))
print("Number of distinct performers: ", len(set(recordings.artist)))
recordings.groupby('artist').count().url.sort_values(ascending=False).head(20)
Note that some recordings can be counted twice if the no one confirmed they were identical (Tomšič or Barenboim for example are included in a lot of compilation and some of the recordings should be merged).
# Transform into JSON
data = [{'start': line.start.isoformat(),
'content': line.artist,
} for _, line in recordings.iterrows() if line.start]
# Send to Javascript
import json
from IPython.display import Javascript
Javascript("""window.recordingData={};""".format(json.dumps(data, indent=4)))
%%html
<link rel="stylesheet" type="text/css" href="https://cdnjs.cloudflare.com/ajax/libs/vis/4.20.1/vis-timeline-graph2d.min.css" />
%%javascript
element.append('<div id="vis-container" style="width: 100%;height: 100%;"></div>');
requirejs.config({
paths: {
vis: '//cdnjs.cloudflare.com/ajax/libs/vis/4.20.1/vis'
}
});
require(['vis'], function(vis){
var data = new vis.DataSet(window.recordingData);
var options = {
editable: false
};
// create the timeline
var container = document.getElementById('vis-container');
var timeline = new vis.Timeline(container, data, options);
})
recordings_by_duration = recordings.sort_values(by='seconds', ascending=True)
# attribute unique random color to each pianist (black=unknown)
from hashlib import md5
color = recordings_by_duration.artist.apply(lambda s: '#' + md5(bytes(s, 'utf-8')).hexdigest()[:6] if s else '#000000')
# legend displayed on mouse hover
text = (recordings_by_duration.artist
+ '<br>'
+ recordings_by_duration.start.apply(lambda s: s.strftime('%Y-%m-%d') if pandas.notnull(s) else 'date unknown')
+ '<br>'
+ recordings_by_duration.duration.astype(str))
iplot(go.Figure(
data=[go.Bar(
y=recordings_by_duration.seconds,
text=text,
hoverinfo='text',
marker={'color': color},
)],
layout={
'title': 'Moonlight sonata recordings by duration',
'xaxis1': {'title': 'Recording (arbitrary) number'},
'yaxis1': {'title': 'Duration (seconds)'},
},
))
Apart from outliers, the average duration seems to be around minutes. Some of the slowest pianists are Solomon, Barenboim (1966), and Gulda, and some of the fastest Gould, Schiff and Schnabel.
moonlight_mbid = '11e7e520-f430-306c-90b8-183cbf3cc761'
recordings_by_place = sql("""
SELECT a.name AS artist,
p.name AS place,
p.coordinates,
to_date(to_char(l.begin_date_year, '9999') ||
to_char(COALESCE(l.end_date_month, 1), '99') ||
to_char(COALESCE(l.end_date_day, 1), '99'), 'YYYY MM DD') AS start,
to_date(to_char(l.end_date_year, '9999') ||
to_char(COALESCE(l.end_date_month, 1), '99') ||
to_char(COALESCE(l.end_date_day, 1), '99'), 'YYYY MM DD') AS end,
r.gid AS mbid,
r.length * interval '1ms' AS duration
FROM work AS w
JOIN l_recording_work AS lrw ON w.id = lrw.entity1
JOIN recording AS r ON r.id = lrw.entity0
JOIN l_place_recording AS lpr ON r.id = lpr.entity1
JOIN place AS p ON p.id = lpr.entity0
LEFT OUTER JOIN l_artist_recording AS lar ON r.id = lar.entity1
LEFT OUTER JOIN artist AS a ON a.id = lar.entity0
LEFT OUTER JOIN link AS l ON l.id = lar.link
WHERE w.gid = %(moonlight_mbid)s
AND p.coordinates IS NOT NULL
ORDER BY start;
""", moonlight_mbid=moonlight_mbid)
#recordings['url'] = recordings.mbid.apply(mb_recording_link)
recordings_by_place['lat'] = recordings_by_place.coordinates.apply(lambda t: eval(t)[0])
recordings_by_place['lon'] = recordings_by_place.coordinates.apply(lambda t: eval(t)[1])
recordings_by_place.drop('coordinates', axis=1, inplace=True)
import datetime
recordings_by_place['text'] = recordings_by_place[['place', 'artist', 'start']].apply(
lambda s: '<br>'.join([el.isoformat() if isinstance(el, datetime.date) else el or '' for el in s]), axis=1)
recordings_by_place.head()
iplot({'data': [{'lat': recordings_by_place.lat,
'lon': recordings_by_place.lon,
'text': recordings_by_place.text,
'type': 'scattergeo',
}],
'layout': {'geo': {'showland': True,
'showcountries': True,
'projection': {'type': 'stereographic'},
},
'title': 'Recordings by place',
}
})