musicbrainz-sparql

Wikidata / MusicBrainz links statistics

Count links in either direction between MusicBrainz and Wikidata

from pprint import pprint
%run -i ../startup.py
Last notebook update: 2021-01-31
Importing libs
Defining database parameters
Defining *sql* helper function
Last database update: 2021-01-13

Defining *sparql* helper function
links_from_mb = {}
for entity_type in MB_ENTITIES:
    links_from_mb[entity_type] = mb_entity_count(entity_type)

pprint(links_from_mb)
{'area': 118295,
 'artist': 200132,
 'event': 302,
 'instrument': 890,
 'label': 7033,
 'place': 7901,
 'recording': 2,
 'release': 1,
 'release_group': 115750,
 'series': 1367,
 'work': 31842}
links_from_wd = {}
for entity_type, prop in WD_MB_LINK_PROPERTIES.items():
    links_from_wd[entity_type] = wd_entity_count(entity_type, prop)
   
pprint(links_from_wd)
{'area': 118295,
 'artist': 200132,
 'event': 302,
 'instrument': 890,
 'label': 7033,
 'place': 7901,
 'recording': 2,
 'release': 1,
 'release_group': 115750,
 'series': 1367,
 'work': 31842}
link_count = pd.DataFrame({'from_mb': links_from_mb, 'from_wd': links_from_wd})
link_count
from_mb from_wd
area 118295 28472
artist 200132 220497
event 302 37
instrument 890 875
label 7033 7772
place 7901 7265
recording 2 8198
release 1 1064
release_group 115750 174976
series 1367 1229
work 31842 34406
import jinja2

template = jinja2.Template("""
<!doctype html>

<html lang="en">
  <head>
    <meta charset="utf-8">
    <title>Alignment of MusicBrainz and Wikidata entities</title>
    <link href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous">
  </head>

  <body style="margin: 20px;">
    <h1>Alignment of MusicBrainz and Wikidata entities</h1>

    <p>Latest MB database update: </p>
    <p>Latest update: </p>

    <h2>Count links in either direction between MusicBrainz and Wikidata</h2>
    
  </body>
</html>
""")

with open('../docs/wd-statistics-report.html', 'w') as f:
    f.write(template.render(**globals())
            .replace('&lt;', '<').replace('&gt;', '>')
            .replace('class="dataframe"', 'class="table table-striped table-hover table-sm"')
            .replace('thead', 'thead class="thead-light"'))