I would like in this notebook to create the data structure necessary for a "teaching family tree", i.e. a graph of relations "X taught Y".
The setup required to repeat these operations is explained in the introduction notebook. In case graphs do not appear in this page you can refer to the static version.
%run startup.ipy
Let's start simply by finding Heinrich Neuhaus's teachers and students
artist_name = 'Heinrich Neuhaus'
artist = sql("""
SELECT a.id,
a.gid AS mbid,
a.name,
a.sort_name,
to_date(to_char(a.begin_date_year, '9999') ||
to_char(a.begin_date_month, '99') ||
to_char(a.begin_date_day, '99'), 'YYYY MM DD') AS start,
to_date(to_char(a.end_date_year, '9999') ||
to_char(a.end_date_month, '99') ||
to_char(a.end_date_day, '99'), 'YYYY MM DD') AS end
FROM artist AS a
JOIN artist_alias AS aa ON aa.artist=a.id
WHERE aa.name = %(artist_name)s;
""", artist_name=artist_name)
artist_mbid = str(artist['mbid'][0])
artist['url'] = artist.mbid.apply(mb_artist_link)
artist.drop('mbid', axis=1, inplace=True)
iplot(ff.create_table(artist[['sort_name', 'start', 'end', 'url']]))
Let's explore MusicBrainz relations to find the proper "X taught Y":
relation = sql("""
SELECT id, name, description
FROM link_type
WHERE description ILIKE '%%teach%%';
""")
relation_id = relation['id'][0]
print(relation.iloc[0].id, relation.iloc[0].description)
print(relation.iloc[1].id, relation.iloc[1].description)
So the one we need is relation id=847 (the other one is for events).
Let's now find the direct students:
students = sql("""
SELECT student.name,
student.sort_name,
student.gid AS mbid
FROM artist AS student
JOIN l_artist_artist AS laa ON laa.entity1 = student.id
JOIN artist AS teacher ON laa.entity0 = teacher.id
JOIN link ON link.id = laa.link
WHERE teacher.gid = %(artist_mbid)s
AND link.link_type = 847;
""", artist_mbid=artist_mbid)
students['url'] = students.mbid.apply(mb_artist_link)
students.mbid = students.mbid.astype(str)
iplot(ff.create_table(students[['name', 'sort_name', 'url']]))
So we have a few of them in MusicBrainz.
teachers = sql("""
SELECT teacher.name,
teacher.sort_name,
teacher.gid AS mbid
FROM artist AS student
JOIN l_artist_artist AS laa ON laa.entity1 = student.id
JOIN artist AS teacher ON laa.entity0 = teacher.id
JOIN link ON link.id = laa.link
WHERE student.gid = %(artist_mbid)s
AND link.link_type = 847;
""", artist_mbid=artist_mbid)
teachers['url'] = teachers.mbid.apply(mb_artist_link)
teachers.mbid = teachers.mbid.astype(str)
iplot(ff.create_table(teachers[['name', 'sort_name', 'url']]))
Now I want to use the visjs library to display a graph. My graph requires two javascript arrays:
First we create these structures in Python:
from pprint import pprint
nodes = []
nodes.append({'id': artist_mbid, 'label' : artist_name})
nodes.extend([{'id': student.mbid, 'label' : student.sort_name}
for student in students.itertuples()])
nodes.extend([{'id': teacher.mbid, 'label' : teacher.sort_name}
for teacher in teachers.itertuples()])
print('5 first nodes:')
pprint(nodes[:5])
edges = []
edges.extend([{'from': artist_mbid, 'to' : student.mbid}
for student in students.itertuples()])
edges.extend([{'from': teacher.mbid, 'to' : artist_mbid}
for teacher in teachers.itertuples()])
print('5 first edges:')
pprint(edges[:5])
...and we convert them as JavaScript array stored on the global object:
# Send to Javascript
import json
from IPython.display import Javascript
Javascript("""
window.edgeData1=%s;
window.nodeData1=%s;
""" % (json.dumps(edges), json.dumps(nodes)))
%%javascript
element.append('<div id="vis-container1" style="width: 100%; height: 500px;"></div>');
requirejs.config({
paths: {
vis: '//cdnjs.cloudflare.com/ajax/libs/vis/4.20.1/vis'
}
});
require(['vis'], function(vis) {
// create the timeline
var container = document.getElementById('vis-container1');
var data = {
nodes: new vis.DataSet(nodeData1),
edges: new vis.DataSet(edgeData1)
};
var options = {
layout: {
hierarchical: {
sortMethod: 'directed'
}
},
edges: {
smooth: true,
arrows: {to: true}
}
};
var network = new vis.Network(container, data, options);
})
If you cannot see the graph above, a static version is available on github.io.
It would be nice to have our code recursively, to go back to the "original" teacher up to the latest student of this "family" (as far as MusicBrainz knows).
def teachers_from_mbid(mbid):
df = sql("""
SELECT teacher.name,
teacher.sort_name,
teacher.gid AS mbid
FROM artist AS student
JOIN l_artist_artist AS laa ON laa.entity1 = student.id
JOIN artist AS teacher ON laa.entity0 = teacher.id
JOIN link ON link.id = laa.link
WHERE student.gid = %(mbid)s
AND link.link_type = 847;
""", mbid=mbid)
df.mbid = df.mbid.astype(str)
return df
def students_from_mbid(mbid):
df = sql("""
SELECT student.name,
student.sort_name,
student.gid AS mbid
FROM artist AS student
JOIN l_artist_artist AS laa ON laa.entity1 = student.id
JOIN artist AS teacher ON laa.entity0 = teacher.id
JOIN link ON link.id = laa.link
WHERE teacher.gid = %(mbid)s
AND link.link_type = 847;
""", mbid=mbid)
df.mbid = df.mbid.astype(str)
return df
artist_mbid = '2b075237-6e90-4e78-a4f8-a66170c682fe' # Neuhaus
We try to apply the 'find new teachers' function on known teachers. We of course avoid calling the function on teachers we already analyzed.
mbids_to_do = [artist_mbid]
nodes = {artist_mbid: {'id': artist_mbid, 'label': artist_name}}
edges = []
while mbids_to_do:
mbid = mbids_to_do.pop()
df = teachers_from_mbid(mbid)
for artist in df.itertuples():
mbids_to_do.append(artist.mbid)
if artist.mbid not in nodes:
nodes[artist.mbid] = {'id': artist.mbid, 'label': artist.name}
edges.append({'from': artist.mbid, 'to': mbid})
print("Nodes (persons): ", len(nodes))
print("Edges (links): ", len(edges))
# Send to Javascript
import json
from IPython.display import Javascript
Javascript("""
window.edgeData2=%s;
window.nodeData2=%s;
""" % (json.dumps(edges), json.dumps(list(nodes.values()))))
%%javascript
element.append('<div id="vis-container2" style="width: 100%; height: 1000px;"></div>');
requirejs.config({
paths: {
vis: '//cdnjs.cloudflare.com/ajax/libs/vis/4.20.1/vis'
}
});
require(['vis'], function(vis) {
// create the timeline
var container = document.getElementById('vis-container2');
var data = {
nodes: new vis.DataSet(nodeData2),
edges: new vis.DataSet(edgeData2)
};
var options = {
layout: {
hierarchical: {
sortMethod: 'directed'
}
},
edges: {
smooth: true,
arrows: {to: true}
}
};
var network = new vis.Network(container, data, options);
})
So if we look for all indirect teachers of Neuhaus, we reach around 50 persons (nodes), once we remove all possible duplicates. The resulting graph (if you don't see it above) is on github.io.
Neuhaus is of course the last node. Looks like Bach, Beethoven, Mozart, Chopin and Liszt are here...
What about students?
mbids_to_do = [artist_mbid]
nodes = {artist_mbid: {'id': artist_mbid, 'label' : artist_name}}
edges = []
while mbids_to_do:
mbid = mbids_to_do.pop()
df = students_from_mbid(mbid)
for artist in df.itertuples():
mbids_to_do.append(artist.mbid)
if artist.mbid not in nodes:
nodes[artist.mbid] = {'id': artist.mbid, 'label' : artist.name}
edges.append({'from': mbid, 'to' : artist.mbid})
print("Nodes (persons): ", len(nodes))
print("Edges (links): ", len(edges))
# Send to Javascript
import json
from IPython.display import Javascript
Javascript("""
window.edgeData3=%s;
window.nodeData3=%s;
""" % (json.dumps(edges), json.dumps(list(nodes.values()))))
%%javascript
element.append('<div id="vis-container3" style="width: 100%; height: 400px;"></div>');
requirejs.config({
paths: {
vis: '//cdnjs.cloudflare.com/ajax/libs/vis/4.20.1/vis'
}
});
require(['vis'], function(vis) {
// create the timeline
var container = document.getElementById('vis-container3');
var data = {
nodes: new vis.DataSet(nodeData3),
edges: new vis.DataSet(edgeData3)
};
var options = {
layout: {
hierarchical: {
sortMethod: 'directed'
}
},
edges: {
smooth: true,
arrows: {to: true}
}
};
var network = new vis.Network(container, data, options);
})
Also available is on github.io.
We could continue by looking at both teachers and students at the same time... but in this case we have to be very careful about loops that would make our code look for the same artists several times or even infinitely (edit: of course my code had a bug and ran in an infinite loop regarding Martinů and another artist who both taught and were taught by each other).
Let's look at a very influent french piano teacher:
artist_name = 'Alfred Cortot'
artist = sql("""
SELECT a.id,
a.gid AS mbid,
a.name,
to_date(to_char(a.begin_date_year, '9999') ||
to_char(a.begin_date_month, '99') ||
to_char(a.begin_date_day, '99'), 'YYYY MM DD') AS start,
to_date(to_char(a.end_date_year, '9999') ||
to_char(a.end_date_month, '99') ||
to_char(a.end_date_day, '99'), 'YYYY MM DD') AS end
FROM artist AS a
WHERE a.name = %(artist_name)s;
""", artist_name=artist_name)
artist_mbid = str(artist['mbid'][0])
artist['url'] = artist.mbid.apply(mb_artist_link)
artist.drop('mbid', axis=1, inplace=True)
iplot(ff.create_table(artist[['name', 'start', 'end', 'url']]))
mbids_to_do = [artist_mbid]
nodes = {artist_mbid: {'id': artist_mbid, 'label': artist_name}}
edges = []
while mbids_to_do:
mbid = mbids_to_do.pop()
df = teachers_from_mbid(mbid)
for artist in df.itertuples():
mbids_to_do.append(artist.mbid)
if artist.mbid not in nodes:
nodes[artist.mbid] = {'id': artist.mbid, 'label': artist.name}
edges.append({'from': artist.mbid, 'to': mbid})
mbids_to_do = [artist_mbid]
while mbids_to_do:
mbid = mbids_to_do.pop()
df = students_from_mbid(mbid)
for artist in df.itertuples():
if artist.mbid not in nodes:
mbids_to_do.append(artist.mbid)
nodes[artist.mbid] = {'id': artist.mbid, 'label': artist.name}
edges.append({'from': mbid, 'to': artist.mbid})
print("Nodes (persons): ", len(nodes))
print("Edges (links): ", len(edges))
# Send to Javascript
import json
from IPython.display import Javascript
Javascript("""
window.edgeData4=%s;
window.nodeData4=%s;
""" % (json.dumps(edges), json.dumps(list(nodes.values()))))
%%javascript
element.append('<div id="vis-container4" style="width: 100%; height: 600px;"></div>');
requirejs.config({
paths: {
vis: '//cdnjs.cloudflare.com/ajax/libs/vis/4.20.1/vis'
}
});
require(['vis'], function(vis) {
// create the timeline
var container = document.getElementById('vis-container4');
var data = {
nodes: new vis.DataSet(nodeData4),
edges: new vis.DataSet(edgeData4)
};
var options = {
layout: {
hierarchical: {
sortMethod: 'directed'
}
},
edges: {
smooth: true,
arrows: {to: true}
}
};
var network = new vis.Network(container, data, options);
})
Also available is on github.io. Looks like Cortot belongs to the Chopin school of piano...