glam/export_bulgaria_rdf.py
2025-11-19 23:25:22 +01:00

146 lines
5.3 KiB
Python

#!/usr/bin/env python3
"""Export Bulgarian institutions to JSON-LD and RDF Turtle."""
import json
import yaml
from datetime import datetime, timezone
def yaml_to_jsonld(yaml_file: str, jsonld_file: str):
"""Convert YAML to JSON-LD."""
print(f"Converting {yaml_file} to JSON-LD...")
with open(yaml_file, 'r', encoding='utf-8') as f:
institutions = yaml.safe_load(f)
jsonld_data = {
"@context": "https://w3id.org/heritage/custodian/context.jsonld",
"@graph": []
}
for inst in institutions:
jsonld_inst = {
"@id": inst.get('id'),
"@type": "HeritageCustodian",
"name": inst.get('name'),
"institutionType": inst.get('institution_type')
}
if inst.get('alternative_names'):
jsonld_inst['alternativeNames'] = inst['alternative_names']
if inst.get('description'):
jsonld_inst['description'] = inst['description']
if inst.get('homepage'):
jsonld_inst['homepage'] = inst['homepage']
# Locations
if inst.get('locations'):
jsonld_inst['locations'] = inst['locations']
# Identifiers
if inst.get('identifiers'):
jsonld_inst['identifiers'] = inst['identifiers']
# Collections
if inst.get('collections'):
jsonld_inst['collections'] = inst['collections']
# Contact
if inst.get('contact_info'):
jsonld_inst['contactInfo'] = inst['contact_info']
# Provenance
if inst.get('provenance'):
jsonld_inst['provenance'] = inst['provenance']
jsonld_data['@graph'].append(jsonld_inst)
with open(jsonld_file, 'w', encoding='utf-8') as f:
json.dump(jsonld_data, f, ensure_ascii=False, indent=2)
print(f"✓ Saved JSON-LD to {jsonld_file}")
def yaml_to_turtle(yaml_file: str, turtle_file: str):
"""Convert YAML to RDF Turtle."""
print(f"Converting {yaml_file} to RDF Turtle...")
with open(yaml_file, 'r', encoding='utf-8') as f:
institutions = yaml.safe_load(f)
ttl_lines = [
"@prefix hc: <https://w3id.org/heritage/custodian/> .",
"@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .",
"@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .",
"@prefix schema: <http://schema.org/> .",
"@prefix dct: <http://purl.org/dc/terms/> .",
"@prefix skos: <http://www.w3.org/2004/02/skos/core#> .",
"@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .",
"",
f"# Bulgarian ISIL Registry - RDF Export",
f"# Generated: {datetime.now(timezone.utc).isoformat()}",
f"# Institutions: {len(institutions)}",
""
]
for inst in institutions:
inst_id = inst.get('id', '').replace('https://w3id.org/heritage/custodian/', 'hc:')
ttl_lines.append(f"{inst_id}")
ttl_lines.append(f" a hc:HeritageCustodian ;")
ttl_lines.append(f' rdfs:label "{inst.get("name")}"@bg ;')
if inst.get('alternative_names'):
for alt_name in inst['alternative_names']:
ttl_lines.append(f' skos:altLabel "{alt_name}" ;')
if inst.get('description'):
desc = inst['description'].replace('"', '\\"').replace('\n', ' ')
ttl_lines.append(f' dct:description "{desc}"@bg ;')
if inst.get('homepage'):
ttl_lines.append(f' schema:url <{inst["homepage"]}> ;')
# Identifiers
if inst.get('identifiers'):
for ident in inst['identifiers']:
scheme = ident.get('identifier_scheme')
value = ident.get('identifier_value')
if scheme == 'ISIL':
ttl_lines.append(f' hc:isil "{value}" ;')
elif scheme == 'Wikidata':
ttl_lines.append(f' hc:wikidata <https://www.wikidata.org/wiki/{value}> ;')
elif scheme == 'VIAF':
ttl_lines.append(f' hc:viaf "{value}" ;')
# Locations
if inst.get('locations'):
loc = inst['locations'][0]
if loc.get('city'):
ttl_lines.append(f' schema:addressLocality "{loc["city"]}" ;')
if loc.get('country'):
ttl_lines.append(f' schema:addressCountry "{loc["country"]}" ;')
if loc.get('latitude') and loc.get('longitude'):
ttl_lines.append(f' schema:latitude "{loc["latitude"]}"^^xsd:double ;')
ttl_lines.append(f' schema:longitude "{loc["longitude"]}"^^xsd:double ;')
ttl_lines.append(f' hc:institutionType "{inst.get("institution_type")}" .')
ttl_lines.append("")
with open(turtle_file, 'w', encoding='utf-8') as f:
f.write('\n'.join(ttl_lines))
print(f"✓ Saved RDF Turtle to {turtle_file}")
def main():
yaml_file = "data/instances/bulgaria_complete.yaml"
jsonld_file = "data/jsonld/bulgaria_complete.jsonld"
turtle_file = "data/rdf/bulgaria_complete.ttl"
yaml_to_jsonld(yaml_file, jsonld_file)
yaml_to_turtle(yaml_file, turtle_file)
print("\n✓ RDF export complete!")
if __name__ == '__main__':
main()