%run -i ../startup.py
ENTITY_TYPE = 'work'
endpoint='http://data.bnf.fr/sparql'
Last notebook update: 2021-02-13
Importing libs
Defining database parameters
Defining *sql* helper function
Last database update: 2021-01-13
Defining *sparql* helper function
# linked to MB works
bnf_entity_count(ENTITY_TYPE)
2839
links_from_bnf = sparql(f"""
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
SELECT (?work AS ?bnf) ?mbid ?name
WHERE
""", endpoint='http://data.bnf.fr/sparql')
print('Count:', len(links_from_bnf))
display_df(links_from_bnf.head())
Count: 2839
bnf | mbid | name | |
---|---|---|---|
0 | cb11944701x | 9c73bb0c-c20c-4fc9-828e-baf9a3e2892c | Vita nuova |
1 | cb11939496m | d8e99500-f056-4d82-b819-971e713ca84a | Der Zauberberg |
2 | cb13927446n | 43b3d2c0-210e-3124-a97c-e97dd9564f1e | L'Internationale |
3 | cb13927446n | 45dd2d44-95b3-4cfa-8484-45416a7db8e7 | L'Internationale |
4 | cb13920574g | 0b1f0523-969a-44eb-8ee5-001b0f30343b | Otello |
links_from_mb = sql("""
SELECT
url.url AS bnf,
work.gid AS mbid,
work.name
FROM work
JOIN l_url_work AS luw ON luw.entity1 = work.id
JOIN url ON luw.entity0 = url.id
WHERE
url.url LIKE '%%bnf.fr%%'
ORDER BY work.name
;
""")
links_from_mb.bnf = links_from_mb.bnf.apply(lambda s: s.split('/')[-1])
links_from_mb.mbid = links_from_mb.mbid.apply(str)
print('Count:', len(links_from_mb))
display_df(links_from_mb.head())
Count: 1002
bnf | mbid | name | |
---|---|---|---|
0 | cb13912155c | 26a22ba5-cff7-478e-bf92-ad594d4f590f | "El Fuego", ensalada for 4 voices |
1 | cb13912906c | e130ee21-7930-4c17-8ccf-cb0192cd3ba4 | "Ohne Titel" |
2 | cb166320720 | e8c10bc4-c39e-461a-b077-9b59a95774ee | "Wer ist so würdig als du", Wq. 222, H. 831 |
3 | cb166009413 | 38f09a04-1808-4777-b710-a27beb95e73c | 12 chansons Op. 11 : 12.Embarquez-vous! |
4 | cb139121990 | 32663f37-e1d4-489f-b429-0da57dbb293b | 15 portraits d'enfants d'Auguste Renoir |
merge = pd.merge(links_from_bnf, links_from_mb,
on=['bnf', 'mbid'], suffixes=('_bnf', '_mb'),
how='outer', indicator=True)
display_df(merge.head())
bnf | mbid | name_bnf | edit_link | name_mb | _merge | |
---|---|---|---|---|---|---|
0 | cb11944701x | 9c73bb0c-c20c-4fc9-828e-baf9a3e2892c | Vita nuova | edit | NaN | left_only |
1 | cb11939496m | d8e99500-f056-4d82-b819-971e713ca84a | Der Zauberberg | edit | NaN | left_only |
2 | cb13927446n | 43b3d2c0-210e-3124-a97c-e97dd9564f1e | L'Internationale | edit | NaN | left_only |
3 | cb13927446n | 45dd2d44-95b3-4cfa-8484-45416a7db8e7 | L'Internationale | edit | NaN | left_only |
4 | cb13920574g | 0b1f0523-969a-44eb-8ee5-001b0f30343b | Otello | edit | NaN | left_only |
# link in mb but missing in bnf
links_to_add_to_bnf = merge.loc[lambda x : x['_merge']=='right_only'][[
'name_mb', 'mbid', 'bnf']]
print('Count:', len(links_to_add_to_bnf))
display_df(links_to_add_to_bnf.head())
Count: 721
name_mb | mbid | bnf | |
---|---|---|---|
0 | "El Fuego", ensalada for 4 voices | 26a22ba5-cff7-478e-bf92-ad594d4f590f | cb13912155c |
1 | "Ohne Titel" | e130ee21-7930-4c17-8ccf-cb0192cd3ba4 | cb13912906c |
2 | "Wer ist so würdig als du", Wq. 222, H. 831 | e8c10bc4-c39e-461a-b077-9b59a95774ee | cb166320720 |
3 | 12 chansons Op. 11 : 12.Embarquez-vous! | 38f09a04-1808-4777-b710-a27beb95e73c | cb166009413 |
4 | 15 portraits d'enfants d'Auguste Renoir | 32663f37-e1d4-489f-b429-0da57dbb293b | cb139121990 |
# link in bnf but missing in mb
links_to_add_to_mb = merge.loc[lambda x : x['_merge']=='left_only'][[
'name_bnf', 'bnf', 'mbid']]
links_to_add_to_mb['edit_link'] = links_to_add_to_mb.apply(
mb_work_edit_bnf_link, axis=1)
print('Count:', len(links_to_add_to_mb))
display_df(links_to_add_to_mb.head())
Count: 2558
name_bnf | bnf | mbid | edit_link | |
---|---|---|---|---|
0 | Vita nuova | cb11944701x | 9c73bb0c-c20c-4fc9-828e-baf9a3e2892c | edit |
1 | Der Zauberberg | cb11939496m | d8e99500-f056-4d82-b819-971e713ca84a | edit |
2 | L'Internationale | cb13927446n | 43b3d2c0-210e-3124-a97c-e97dd9564f1e | edit |
3 | L'Internationale | cb13927446n | 45dd2d44-95b3-4cfa-8484-45416a7db8e7 | edit |
4 | Otello | cb13920574g | 0b1f0523-969a-44eb-8ee5-001b0f30343b | edit |
import jinja2
template = jinja2.Template("""
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Alignment of MusicBrainz and BNF Works</title>
<link href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous">
</head>
<body style="margin: 20px;">
<h1>Alignment of MusicBrainz and BNF Works</h1>
<p>Latest MB database update: </p>
<p>Latest update: </p>
<ol>
<li>
<a href="#bnf2mb">Add missing BNF links to MusicBrainz</a>
( rows)
</li>
<li>
<a href="#mb2bnf">Add missing MusicBrainz links to BNF</a>
( rows)
</li>
</ol>
<h2 id="bnf2mb">Add missing BNF links to MusicBrainz</h2>
<h2 id="mb2bnf">Add missing MusicBrainz links to BNF</h2>
</body>
</html>
""")
with open('../docs/bnf-works-report.html', 'w') as f:
f.write(template.render(**globals())
.replace('<', '<').replace('>', '>')
.replace('class="dataframe"', 'class="table table-striped table-hover table-sm"')
.replace('thead', 'thead class="thead-light"'))