168 lines
3.7 KiB
YAML
168 lines
3.7 KiB
YAML
generated: '2025-11-11T16:45:00.000000+00:00'
|
|
project: GLAM Data Extraction
|
|
schema_version: v0.2.1
|
|
last_update: 'Phase 2 Brazil enrichment complete: +40 Wikidata IDs (13.7% → 32.5%, +18.9pp). Overall dataset: 7,619/13,502 (56.4%)'
|
|
unified_dataset:
|
|
total_institutions: 13502
|
|
countries_covered: 18
|
|
wikidata_coverage:
|
|
count: 7619
|
|
percentage: 56.4
|
|
geocoding_coverage:
|
|
count: 8229
|
|
percentage: 60.9
|
|
enrichment_needs:
|
|
total_candidates: 13461
|
|
needs_wikidata: 5883
|
|
needs_coordinates: 5324
|
|
needs_website: 2089
|
|
needs_description: 13012
|
|
by_country:
|
|
AR:
|
|
total: 2
|
|
wikidata_coverage:
|
|
count: 1
|
|
percentage: 50.0
|
|
geocoding_coverage:
|
|
count: 2
|
|
percentage: 100.0
|
|
BE:
|
|
total: 7
|
|
wikidata_coverage:
|
|
count: 7
|
|
percentage: 100.0
|
|
geocoding_coverage:
|
|
count: 7
|
|
percentage: 100.0
|
|
note: 'Manual enrichment Nov 2025: All 7 EU institutions enriched with Wikidata and VIAF'
|
|
BR:
|
|
total: 212
|
|
wikidata_coverage:
|
|
count: 69
|
|
percentage: 32.5
|
|
geocoding_coverage:
|
|
count: 97
|
|
percentage: 45.75
|
|
note: 'Phase 2 enrichment (2025-11-11): SPARQL batch query added 40 Wikidata IDs (+18.9pp improvement). 10/10 top matches were 100% confidence (perfect name matches: Museu Nacional, MASP, Instituto Moreira Salles). 70% fuzzy threshold with Portuguese normalization.'
|
|
CL:
|
|
total: 180
|
|
wikidata_coverage:
|
|
count: 97
|
|
percentage: 53.89
|
|
geocoding_coverage:
|
|
count: 168
|
|
percentage: 93.33
|
|
DK:
|
|
total: 1
|
|
wikidata_coverage:
|
|
count: 1
|
|
percentage: 100.0
|
|
geocoding_coverage:
|
|
count: 1
|
|
percentage: 100.0
|
|
DZ:
|
|
total: 19
|
|
wikidata_coverage:
|
|
count: 13
|
|
percentage: 68.42
|
|
geocoding_coverage:
|
|
count: 0
|
|
percentage: 0.0
|
|
GB:
|
|
total: 4
|
|
wikidata_coverage:
|
|
count: 1
|
|
percentage: 25.0
|
|
geocoding_coverage:
|
|
count: 0
|
|
percentage: 0.0
|
|
GE:
|
|
total: 14
|
|
wikidata_coverage:
|
|
count: 12
|
|
percentage: 85.7
|
|
geocoding_coverage:
|
|
count: 0
|
|
percentage: 0.0
|
|
note: 'Enriched in Task 6: 13 institutions updated with Wikidata'
|
|
IT:
|
|
total: 3
|
|
wikidata_coverage:
|
|
count: 1
|
|
percentage: 33.33
|
|
geocoding_coverage:
|
|
count: 3
|
|
percentage: 100.0
|
|
JP:
|
|
total: 12065
|
|
wikidata_coverage:
|
|
count: 7091
|
|
percentage: 58.77
|
|
geocoding_coverage:
|
|
count: 7091
|
|
percentage: 58.77
|
|
LU:
|
|
total: 1
|
|
wikidata_coverage:
|
|
count: 1
|
|
percentage: 100.0
|
|
geocoding_coverage:
|
|
count: 1
|
|
percentage: 100.0
|
|
note: 'Manual enrichment Nov 2025: Court of Justice of the European Union enriched with Wikidata Q4951 and VIAF 124913422'
|
|
LY:
|
|
total: 48
|
|
wikidata_coverage:
|
|
count: 37
|
|
percentage: 77.08
|
|
geocoding_coverage:
|
|
count: 0
|
|
percentage: 0.0
|
|
MX:
|
|
total: 226
|
|
wikidata_coverage:
|
|
count: 34
|
|
percentage: 15.04
|
|
geocoding_coverage:
|
|
count: 167
|
|
percentage: 73.89
|
|
NL:
|
|
total: 622
|
|
wikidata_coverage:
|
|
count: 193
|
|
percentage: 31.03
|
|
geocoding_coverage:
|
|
count: 621
|
|
percentage: 99.84
|
|
RU:
|
|
total: 1
|
|
wikidata_coverage:
|
|
count: 1
|
|
percentage: 100.0
|
|
geocoding_coverage:
|
|
count: 1
|
|
percentage: 100.0
|
|
TN:
|
|
total: 69
|
|
wikidata_coverage:
|
|
count: 52
|
|
percentage: 75.4
|
|
geocoding_coverage:
|
|
count: 52
|
|
percentage: 75.4
|
|
US:
|
|
total: 7
|
|
wikidata_coverage:
|
|
count: 0
|
|
percentage: 0.0
|
|
geocoding_coverage:
|
|
count: 7
|
|
percentage: 100.0
|
|
VN:
|
|
total: 21
|
|
wikidata_coverage:
|
|
count: 8
|
|
percentage: 38.1
|
|
geocoding_coverage:
|
|
count: 0
|
|
percentage: 0.0
|