glam/reports/kb_libraries_enrichment_stats.json
kempersc 30162e6526 Add script to validate KB library entries and generate enrichment report
- Implemented a Python script to validate KB library YAML files for required fields and data quality.
- Analyzed enrichment coverage from Wikidata and Google Maps, generating statistics.
- Created a comprehensive markdown report summarizing validation results and enrichment quality.
- Included error handling for file loading and validation processes.
- Generated JSON statistics for further analysis.
2025-11-28 14:48:33 +01:00

81 lines
No EOL
1.6 KiB
JSON

{
"timestamp": "2025-11-28T12:28:14.209262+00:00",
"total_entries": 149,
"validation": {
"valid": 149,
"with_issues": 0,
"file_errors": 0
},
"wikidata": {
"success": 114,
"not_found": 35,
"coverage_pct": 76.5,
"match_methods": {
"isil_code_match": 64,
"fuzzy_name_match": 50
},
"with_coordinates": 68,
"with_inception": 11,
"with_viaf": 3,
"with_website": 114
},
"google_maps": {
"success": 149,
"not_found": 0,
"coverage_pct": 100.0,
"with_coordinates": 149,
"with_address": 149,
"with_phone": 146,
"with_website": 143,
"with_opening_hours": 145,
"with_rating": 147,
"business_statuses": {
"OPERATIONAL": 147,
"CLOSED_TEMPORARILY": 1,
"CLOSED_PERMANENTLY": 1
},
"provinces": {
"Zuid-Holland": 25,
"Noord-Holland": 16,
"Zeeland": 4,
"Friesland": 6,
"Overijssel": 23,
"Groningen": 3,
"Noord-Brabant": 18,
"Utrecht": 9,
"Drenthe": 5,
"Flevoland": 3,
"Limburg": 13,
"Gelderland": 18,
"Sint Eustatius": 1,
"Saba": 1,
"Bonaire": 1
}
},
"geographic": {
"unique_cities": 132,
"provinces_covered": 15,
"top_cities": {
"Deventer": 5,
"Den Haag": 4,
"Groningen": 3,
"Assen": 3,
"Middelburg": 2,
"Leeuwarden": 2,
"Heerlen": 2,
"Hoofddorp": 2,
"Lelystad": 2,
"Rotterdam": 2,
"Amsterdam": 1,
"Tilburg": 1,
"Houten": 1,
"Utrecht": 1,
"Grave": 1,
"Schiedam": 1,
"Maastricht": 1,
"Haarlem": 1,
"Eindhoven": 1,
"Enschede": 1
}
}
}