Count links in either direction between MusicBrainz and Wikidata
from pprint import pprint
%run -i ../startup.py
Last notebook update: 2021-01-31
Importing libs
Defining database parameters
Defining *sql* helper function
Last database update: 2021-01-13
Defining *sparql* helper function
links_from_mb = {}
for entity_type in MB_ENTITIES:
links_from_mb[entity_type] = mb_entity_count(entity_type)
pprint(links_from_mb)
{'area': 118295,
'artist': 200132,
'event': 302,
'instrument': 890,
'label': 7033,
'place': 7901,
'recording': 2,
'release': 1,
'release_group': 115750,
'series': 1367,
'work': 31842}
links_from_wd = {}
for entity_type, prop in WD_MB_LINK_PROPERTIES.items():
links_from_wd[entity_type] = wd_entity_count(entity_type, prop)
pprint(links_from_wd)
{'area': 118295,
'artist': 200132,
'event': 302,
'instrument': 890,
'label': 7033,
'place': 7901,
'recording': 2,
'release': 1,
'release_group': 115750,
'series': 1367,
'work': 31842}
link_count = pd.DataFrame({'from_mb': links_from_mb, 'from_wd': links_from_wd})
link_count
from_mb | from_wd | |
---|---|---|
area | 118295 | 28472 |
artist | 200132 | 220497 |
event | 302 | 37 |
instrument | 890 | 875 |
label | 7033 | 7772 |
place | 7901 | 7265 |
recording | 2 | 8198 |
release | 1 | 1064 |
release_group | 115750 | 174976 |
series | 1367 | 1229 |
work | 31842 | 34406 |
import jinja2
template = jinja2.Template("""
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Alignment of MusicBrainz and Wikidata entities</title>
<link href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous">
</head>
<body style="margin: 20px;">
<h1>Alignment of MusicBrainz and Wikidata entities</h1>
<p>Latest MB database update: </p>
<p>Latest update: </p>
<h2>Count links in either direction between MusicBrainz and Wikidata</h2>
</body>
</html>
""")
with open('../docs/wd-statistics-report.html', 'w') as f:
f.write(template.render(**globals())
.replace('<', '<').replace('>', '>')
.replace('class="dataframe"', 'class="table table-striped table-hover table-sm"')
.replace('thead', 'thead class="thead-light"'))