%run -i ../startup.py
ENTITY_TYPE = 'event'
Last notebook update: 2021-01-29
Importing libs
Defining database parameters
Defining *sql* helper function
Last database update: 2021-01-13
Defining *sparql* helper function
Wikidata entities:
https://www.wikidata.org/wiki/Q182832 concert
https://www.wikidata.org/wiki/Q1573906 concert tour
Wikidata properties:
https://www.wikidata.org/wiki/Property:P276 location
https://www.wikidata.org/wiki/Property:P580 start time
https://www.wikidata.org/wiki/Property:P585 point in time
https://www.wikidata.org/wiki/Property:P710 participant
https://www.wikidata.org/wiki/Property:P1651 youtube ID
https://www.wikidata.org/wiki/Property:P6423 MusicBrainz event ID
examples
https://www.wikidata.org/wiki/Q898154
https://www.wikidata.org/wiki/Q25408640
sparql("""
SELECT (COUNT(?event) AS ?cnt)
WHERE {
?event wdt:P31 wd:Q182832 .
SERVICE wikibase:label { bd:serviceParam wikibase:language "en" }
}
""")
cnt | |
---|---|
0 | 5233 |
# entity types
sparql("""
SELECT distinct (count(?event) as ?cnt) ?ins ?insLabel
WHERE {
?event wdt:P31 ?ins;
wdt:P6423 ?mbid.
SERVICE wikibase:label { bd:serviceParam wikibase:language "en" }
}
group by ?ins ?insLabel
order by DESC(?cnt)
""")
cnt | ins | insLabel | |
---|---|---|---|
0 | 26 | Q27968043 | festival edition |
1 | 12 | Q1151125 | Pinkpop Festival |
2 | 2 | Q276 | Eurovision Song Contest |
3 | 2 | Q868557 | music festival |
4 | 2 | Q182832 | concert |
5 | 1 | Q979949 | International Eucharistic Congress |
6 | 1 | Q132241 | festival |
7 | 1 | Q618779 | award |
# linked to MB event
links_from_wd = sparql("""
SELECT (?event AS ?wd) ?mbid ?eventLabel
WHERE {
?event wdt:P6423 ?mbid .
SERVICE wikibase:label { bd:serviceParam wikibase:language "en" }
}
ORDER BY ASC(?eventLabel)
""")
links_from_wd.rename(columns={'eventLabel': 'name'}, inplace=True)
print('Count:', len(links_from_wd))
display_df(links_from_wd.head())
Count: 37
wd | mbid | name | |
---|---|---|---|
0 | Q12682738 | 07ebfee6-4372-4906-8ca5-d82edcc0b1c8 | Q12682738 |
1 | Q16913112 | 7ba13795-6dc7-450b-b4ff-052c6adc844e | Q16913112 |
2 | Q61088805 | 4ec5dc62-9348-41e0-a0ca-949129d6258b | Q61088805 |
3 | Q61117076 | acaea9ba-9de5-4518-acc7-3dc38924fb38 | Q61117076 |
4 | Q171784 | 3b38dc60-f35f-4335-96cc-e1e2e13e1ffd | Eurovision Song Contest 1956 |
Event before 1920:
sparql("""
SELECT (?event AS ?wd) ?date ?eventLabel ?mbidam
WHERE {
?event wdt:P31* wd:Q182832 .
OPTIONAL { ?event wdt:P6423 ?mbid . }
?event wdt:P585 ?date .
FILTER (year(?date) < 1920) .
SERVICE wikibase:label { bd:serviceParam wikibase:language "en" }
}
ORDER BY ASC(?date)
""")
wd | date | eventLabel | |
---|---|---|---|
0 | Q1748028 | 1783-03-23T00:00:00Z | Q1748028 |
1 | Q19979612 | 1808-12-22T00:00:00Z | Beethoven concert of 22 December 1808 |
2 | Q62736575 | 1825-03-21T00:00:00Z | British première of Beethoven's Symphony No. 9 |
3 | Q59811862 | 1902-01-27T00:00:00Z | Q59811862 |
4 | Q2291501 | 1913-03-31T00:00:00Z | Skandalkonzert |
links_from_mb = sql("""
SELECT
url.url AS wd,
event.gid AS mbid,
event.name
FROM event
JOIN l_event_url AS leu ON leu.entity0 = event.id
JOIN url ON leu.entity1 = url.id
WHERE
url.url LIKE '%%wikidata.org%%'
ORDER BY event.name
;
""")
links_from_mb.wd = links_from_mb.wd.apply(lambda s: s.split('/')[-1])
links_from_mb.mbid = links_from_mb.mbid.apply(str)
print('Count:', len(links_from_mb))
display_df(links_from_mb.head())
Count: 302
wd | mbid | name | |
---|---|---|---|
0 | Q4617578 | 5eb9c50e-7ef2-42d3-90db-2a76dcdb99bb | 10th International Jean Sibelius Violin Competition |
1 | Q2636776 | 1d488776-dc23-4bb3-ae13-3ca71999fbe5 | 11th International Jean Sibelius Violin Competition |
2 | Q4581712 | da2651a2-c945-43f4-8e5b-099767fee350 | 1984年度十大勁歌金曲頒獎典禮 |
3 | Q4582345 | 30d9210b-afd6-4e9b-ba43-eeb8429239d4 | 1985年度十大勁歌金曲頒獎典禮 |
4 | Q4583059 | db242565-e5e3-43ec-acfa-cbec827f8154 | 1986年度十大勁歌金曲頒獎典禮 |
duplicate_wd = links_from_mb[[
'wd', 'mbid', 'name']].groupby('wd').filter(
lambda row: len(row.mbid) > 1).sort_values('wd')
print('Count:', len(duplicate_wd))
display_df(duplicate_wd.head())
Count: 36
wd | mbid | name | |
---|---|---|---|
0 | Q1074246 | b6139248-912f-4bf0-a3c3-42b91ac89c7c | Animelo Summer Live 2015 -THE GATE- |
1 | Q1074246 | 2fcfb07b-dfbb-4a71-a0c6-cf0d2bd4b243 | Animelo Summer Live 2016 刻-TOKI- |
2 | Q1074246 | 29f70590-45b4-4bd3-a2ad-260fe6b04944 | Animelo Summer Live 2017 -THE CARD- |
3 | Q1133749 | 57881f3d-0d3e-4188-9c85-59c25ec3515a | Rock in Rio 4 |
4 | Q1133749 | 884b21f8-0d67-4433-a774-e9d7ecde2807 | Rock in Rio 5 |
duplicate_mb = links_from_mb[['wd', 'mbid', 'name']].groupby('mbid').filter(
lambda row: len(row.mbid) > 1).sort_values('mbid')
print('Count:', len(duplicate_mb))
display_df(duplicate_mb.head())
Count: 2
wd | mbid | name | |
---|---|---|---|
0 | Q7300590 | c6665c04-a110-4aa7-84f4-ae314218b060 | Reading Festival 2012 |
1 | Q1137962 | c6665c04-a110-4aa7-84f4-ae314218b060 | Reading Festival 2012 |
merge = pd.merge(links_from_wd, links_from_mb,
on=['wd', 'mbid'], suffixes=('_wd', '_mb'),
how='outer', indicator=True)
display_df(merge.head())
wd | mbid | name_wd | name_mb | _merge | |
---|---|---|---|---|---|
0 | Q12682738 | 07ebfee6-4372-4906-8ca5-d82edcc0b1c8 | Q12682738 | Juara Lagu ’86 | both |
1 | Q16913112 | 7ba13795-6dc7-450b-b4ff-052c6adc844e | Q16913112 | XXII Congreso Eucarístico Internacional | both |
2 | Q61088805 | 4ec5dc62-9348-41e0-a0ca-949129d6258b | Q61088805 | NaN | left_only |
3 | Q61117076 | acaea9ba-9de5-4518-acc7-3dc38924fb38 | Q61117076 | Anugerah Juara Lagu 33 | both |
4 | Q171784 | 3b38dc60-f35f-4335-96cc-e1e2e13e1ffd | Eurovision Song Contest 1956 | Eurovision Song Contest 1956 | both |
# link in mb but missing in wd
links_to_add_to_wd = merge.loc[lambda x : x['_merge']=='right_only'][[
'name_mb', 'mbid', 'wd']]
print('Count:', len(links_to_add_to_wd))
display_df(links_to_add_to_wd.head())
Count: 267
name_mb | mbid | wd | |
---|---|---|---|
0 | 10th International Jean Sibelius Violin Competition | 5eb9c50e-7ef2-42d3-90db-2a76dcdb99bb | Q4617578 |
1 | 11th International Jean Sibelius Violin Competition | 1d488776-dc23-4bb3-ae13-3ca71999fbe5 | Q2636776 |
2 | 1984年度十大勁歌金曲頒獎典禮 | da2651a2-c945-43f4-8e5b-099767fee350 | Q4581712 |
3 | 1985年度十大勁歌金曲頒獎典禮 | 30d9210b-afd6-4e9b-ba43-eeb8429239d4 | Q4582345 |
4 | 1986年度十大勁歌金曲頒獎典禮 | db242565-e5e3-43ec-acfa-cbec827f8154 | Q4583059 |
# link in wd but missing in mb
links_to_add_to_mb = merge.loc[lambda x : x['_merge']=='left_only'][[
'name_wd', 'wd', 'mbid']]
links_to_add_to_mb['edit_link'] = links_to_add_to_mb.apply(
mb_event_edit_wd_link, axis=1)
print('Count:', len(links_to_add_to_mb))
display_df(links_to_add_to_mb.head())
Count: 2
name_wd | wd | mbid | edit_link | |
---|---|---|---|---|
0 | Q61088805 | Q61088805 | 4ec5dc62-9348-41e0-a0ca-949129d6258b | edit |
1 | Jazz à Juan | Q744640 | 7e1e18af-5793-4f44-bd7b-0a364b45acb2 | edit |
import jinja2
template = jinja2.Template("""
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Alignment of MusicBrainz and Wikidata Events</title>
<link href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous">
</head>
<body style="margin: 20px;">
<h1>Alignment of MusicBrainz and Wikidata Events</h1>
<p>Latest MB database update: </p>
<p>Latest update: </p>
<ol>
<li>
<a href="#wddup">MusicBrainz entities sharing a Wikidata link</a>
( rows)
</li>
<li>
<a href="#mbdup">Wikidata entities sharing a MusicBrainz link</a>
( rows)
</li>
<li>
<a href="#wd2mb">Add missing Wikidata links to MusicBrainz</a>
( rows)
</li>
<li>
<a href="#mb2wd">Add missing MusicBrainz links to Wikidata</a>
( rows)
</li>
</ol>
<h2 id="wddup">MusicBrainz entities sharing a Wikidata link</h2>
<h2 id="mbdup">Wikidata entities sharing a MusicBrainz link</h2>
<h2 id="wd2mb">Add missing Wikidata links to MusicBrainz</h2>
<h2 id="mb2wd">Add missing MusicBrainz links to Wikidata</h2>
</body>
</html>
""")
with open('../docs/wd-events-report.html', 'w') as f:
f.write(template.render(**globals())
.replace('<', '<').replace('>', '>')
.replace('class="dataframe"', 'class="table table-striped table-hover table-sm"')
.replace('thead', 'thead class="thead-light"'))