musicbrainz-sparql

%run -i ../startup.py
ENTITY_TYPE = 'work'
endpoint='http://data.bnf.fr/sparql'
Last notebook update: 2021-02-13
Importing libs
Defining database parameters
Defining *sql* helper function
Last database update: 2021-01-13

Defining *sparql* helper function

Works from BNF

# linked to MB works
bnf_entity_count(ENTITY_TYPE)
2839
links_from_bnf = sparql(f"""
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>

SELECT (?work AS ?bnf) ?mbid ?name
WHERE 
""", endpoint='http://data.bnf.fr/sparql')

print('Count:', len(links_from_bnf))
display_df(links_from_bnf.head())
Count: 2839
bnf mbid name
0 cb11944701x 9c73bb0c-c20c-4fc9-828e-baf9a3e2892c Vita nuova
1 cb11939496m d8e99500-f056-4d82-b819-971e713ca84a Der Zauberberg
2 cb13927446n 43b3d2c0-210e-3124-a97c-e97dd9564f1e L'Internationale
3 cb13927446n 45dd2d44-95b3-4cfa-8484-45416a7db8e7 L'Internationale
4 cb13920574g 0b1f0523-969a-44eb-8ee5-001b0f30343b Otello
links_from_mb = sql("""
SELECT
    url.url AS bnf,
    work.gid AS mbid,
    work.name
FROM work
JOIN l_url_work AS luw ON luw.entity1 = work.id
JOIN url               ON luw.entity0 = url.id
WHERE
    url.url LIKE '%%bnf.fr%%'
ORDER BY work.name
;
""")
links_from_mb.bnf = links_from_mb.bnf.apply(lambda s: s.split('/')[-1])
links_from_mb.mbid = links_from_mb.mbid.apply(str)

print('Count:', len(links_from_mb))
display_df(links_from_mb.head())
Count: 1002
bnf mbid name
0 cb13912155c 26a22ba5-cff7-478e-bf92-ad594d4f590f "El Fuego", ensalada for 4 voices
1 cb13912906c e130ee21-7930-4c17-8ccf-cb0192cd3ba4 "Ohne Titel"
2 cb166320720 e8c10bc4-c39e-461a-b077-9b59a95774ee "Wer ist so würdig als du", Wq. 222, H. 831
3 cb166009413 38f09a04-1808-4777-b710-a27beb95e73c 12 chansons Op. 11 : 12.Embarquez-vous!
4 cb139121990 32663f37-e1d4-489f-b429-0da57dbb293b 15 portraits d'enfants d'Auguste Renoir

Data alignment

merge = pd.merge(links_from_bnf, links_from_mb, 
                 on=['bnf', 'mbid'], suffixes=('_bnf', '_mb'),
                 how='outer', indicator=True)
display_df(merge.head())
bnf mbid name_bnf edit_link name_mb _merge
0 cb11944701x 9c73bb0c-c20c-4fc9-828e-baf9a3e2892c Vita nuova edit NaN left_only
1 cb11939496m d8e99500-f056-4d82-b819-971e713ca84a Der Zauberberg edit NaN left_only
2 cb13927446n 43b3d2c0-210e-3124-a97c-e97dd9564f1e L'Internationale edit NaN left_only
3 cb13927446n 45dd2d44-95b3-4cfa-8484-45416a7db8e7 L'Internationale edit NaN left_only
4 cb13920574g 0b1f0523-969a-44eb-8ee5-001b0f30343b Otello edit NaN left_only
# link in mb but missing in bnf
links_to_add_to_bnf = merge.loc[lambda x : x['_merge']=='right_only'][[
    'name_mb', 'mbid', 'bnf']]

print('Count:', len(links_to_add_to_bnf))
display_df(links_to_add_to_bnf.head())
Count: 721
name_mb mbid bnf
0 "El Fuego", ensalada for 4 voices 26a22ba5-cff7-478e-bf92-ad594d4f590f cb13912155c
1 "Ohne Titel" e130ee21-7930-4c17-8ccf-cb0192cd3ba4 cb13912906c
2 "Wer ist so würdig als du", Wq. 222, H. 831 e8c10bc4-c39e-461a-b077-9b59a95774ee cb166320720
3 12 chansons Op. 11 : 12.Embarquez-vous! 38f09a04-1808-4777-b710-a27beb95e73c cb166009413
4 15 portraits d'enfants d'Auguste Renoir 32663f37-e1d4-489f-b429-0da57dbb293b cb139121990
# link in bnf but missing in mb
links_to_add_to_mb = merge.loc[lambda x : x['_merge']=='left_only'][[
    'name_bnf', 'bnf', 'mbid']]
links_to_add_to_mb['edit_link'] = links_to_add_to_mb.apply(
    mb_work_edit_bnf_link, axis=1)

print('Count:', len(links_to_add_to_mb))
display_df(links_to_add_to_mb.head())
Count: 2558
name_bnf bnf mbid edit_link
0 Vita nuova cb11944701x 9c73bb0c-c20c-4fc9-828e-baf9a3e2892c edit
1 Der Zauberberg cb11939496m d8e99500-f056-4d82-b819-971e713ca84a edit
2 L'Internationale cb13927446n 43b3d2c0-210e-3124-a97c-e97dd9564f1e edit
3 L'Internationale cb13927446n 45dd2d44-95b3-4cfa-8484-45416a7db8e7 edit
4 Otello cb13920574g 0b1f0523-969a-44eb-8ee5-001b0f30343b edit

Report

import jinja2

template = jinja2.Template("""
<!doctype html>

<html lang="en">
  <head>
    <meta charset="utf-8">
    <title>Alignment of MusicBrainz and BNF Works</title>
    <link href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous">
  </head>

  <body style="margin: 20px;">
    <h1>Alignment of MusicBrainz and BNF Works</h1>

    <p>Latest MB database update: </p>
    <p>Latest update: </p>

    <ol>
      <li>
        <a href="#bnf2mb">Add missing BNF links to MusicBrainz</a>
        ( rows)
      </li>
      <li>
        <a href="#mb2bnf">Add missing MusicBrainz links to BNF</a>
        ( rows)
      </li>

    </ol>
    
    <h2 id="bnf2mb">Add missing BNF links to MusicBrainz</h2>
    

    <h2 id="mb2bnf">Add missing MusicBrainz links to BNF</h2>
    
    
  </body>
</html>
""")

with open('../docs/bnf-works-report.html', 'w') as f:
    f.write(template.render(**globals())
            .replace('&lt;', '<').replace('&gt;', '>')
            .replace('class="dataframe"', 'class="table table-striped table-hover table-sm"')
            .replace('thead', 'thead class="thead-light"'))