146 lines
5.3 KiB
Python
146 lines
5.3 KiB
Python
#!/usr/bin/env python3
|
|
"""Export Bulgarian institutions to JSON-LD and RDF Turtle."""
|
|
|
|
import json
|
|
import yaml
|
|
from datetime import datetime, timezone
|
|
|
|
def yaml_to_jsonld(yaml_file: str, jsonld_file: str):
|
|
"""Convert YAML to JSON-LD."""
|
|
print(f"Converting {yaml_file} to JSON-LD...")
|
|
|
|
with open(yaml_file, 'r', encoding='utf-8') as f:
|
|
institutions = yaml.safe_load(f)
|
|
|
|
jsonld_data = {
|
|
"@context": "https://w3id.org/heritage/custodian/context.jsonld",
|
|
"@graph": []
|
|
}
|
|
|
|
for inst in institutions:
|
|
jsonld_inst = {
|
|
"@id": inst.get('id'),
|
|
"@type": "HeritageCustodian",
|
|
"name": inst.get('name'),
|
|
"institutionType": inst.get('institution_type')
|
|
}
|
|
|
|
if inst.get('alternative_names'):
|
|
jsonld_inst['alternativeNames'] = inst['alternative_names']
|
|
|
|
if inst.get('description'):
|
|
jsonld_inst['description'] = inst['description']
|
|
|
|
if inst.get('homepage'):
|
|
jsonld_inst['homepage'] = inst['homepage']
|
|
|
|
# Locations
|
|
if inst.get('locations'):
|
|
jsonld_inst['locations'] = inst['locations']
|
|
|
|
# Identifiers
|
|
if inst.get('identifiers'):
|
|
jsonld_inst['identifiers'] = inst['identifiers']
|
|
|
|
# Collections
|
|
if inst.get('collections'):
|
|
jsonld_inst['collections'] = inst['collections']
|
|
|
|
# Contact
|
|
if inst.get('contact_info'):
|
|
jsonld_inst['contactInfo'] = inst['contact_info']
|
|
|
|
# Provenance
|
|
if inst.get('provenance'):
|
|
jsonld_inst['provenance'] = inst['provenance']
|
|
|
|
jsonld_data['@graph'].append(jsonld_inst)
|
|
|
|
with open(jsonld_file, 'w', encoding='utf-8') as f:
|
|
json.dump(jsonld_data, f, ensure_ascii=False, indent=2)
|
|
|
|
print(f"✓ Saved JSON-LD to {jsonld_file}")
|
|
|
|
def yaml_to_turtle(yaml_file: str, turtle_file: str):
|
|
"""Convert YAML to RDF Turtle."""
|
|
print(f"Converting {yaml_file} to RDF Turtle...")
|
|
|
|
with open(yaml_file, 'r', encoding='utf-8') as f:
|
|
institutions = yaml.safe_load(f)
|
|
|
|
ttl_lines = [
|
|
"@prefix hc: <https://w3id.org/heritage/custodian/> .",
|
|
"@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .",
|
|
"@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .",
|
|
"@prefix schema: <http://schema.org/> .",
|
|
"@prefix dct: <http://purl.org/dc/terms/> .",
|
|
"@prefix skos: <http://www.w3.org/2004/02/skos/core#> .",
|
|
"@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .",
|
|
"",
|
|
f"# Bulgarian ISIL Registry - RDF Export",
|
|
f"# Generated: {datetime.now(timezone.utc).isoformat()}",
|
|
f"# Institutions: {len(institutions)}",
|
|
""
|
|
]
|
|
|
|
for inst in institutions:
|
|
inst_id = inst.get('id', '').replace('https://w3id.org/heritage/custodian/', 'hc:')
|
|
ttl_lines.append(f"{inst_id}")
|
|
ttl_lines.append(f" a hc:HeritageCustodian ;")
|
|
ttl_lines.append(f' rdfs:label "{inst.get("name")}"@bg ;')
|
|
|
|
if inst.get('alternative_names'):
|
|
for alt_name in inst['alternative_names']:
|
|
ttl_lines.append(f' skos:altLabel "{alt_name}" ;')
|
|
|
|
if inst.get('description'):
|
|
desc = inst['description'].replace('"', '\\"').replace('\n', ' ')
|
|
ttl_lines.append(f' dct:description "{desc}"@bg ;')
|
|
|
|
if inst.get('homepage'):
|
|
ttl_lines.append(f' schema:url <{inst["homepage"]}> ;')
|
|
|
|
# Identifiers
|
|
if inst.get('identifiers'):
|
|
for ident in inst['identifiers']:
|
|
scheme = ident.get('identifier_scheme')
|
|
value = ident.get('identifier_value')
|
|
|
|
if scheme == 'ISIL':
|
|
ttl_lines.append(f' hc:isil "{value}" ;')
|
|
elif scheme == 'Wikidata':
|
|
ttl_lines.append(f' hc:wikidata <https://www.wikidata.org/wiki/{value}> ;')
|
|
elif scheme == 'VIAF':
|
|
ttl_lines.append(f' hc:viaf "{value}" ;')
|
|
|
|
# Locations
|
|
if inst.get('locations'):
|
|
loc = inst['locations'][0]
|
|
if loc.get('city'):
|
|
ttl_lines.append(f' schema:addressLocality "{loc["city"]}" ;')
|
|
if loc.get('country'):
|
|
ttl_lines.append(f' schema:addressCountry "{loc["country"]}" ;')
|
|
if loc.get('latitude') and loc.get('longitude'):
|
|
ttl_lines.append(f' schema:latitude "{loc["latitude"]}"^^xsd:double ;')
|
|
ttl_lines.append(f' schema:longitude "{loc["longitude"]}"^^xsd:double ;')
|
|
|
|
ttl_lines.append(f' hc:institutionType "{inst.get("institution_type")}" .')
|
|
ttl_lines.append("")
|
|
|
|
with open(turtle_file, 'w', encoding='utf-8') as f:
|
|
f.write('\n'.join(ttl_lines))
|
|
|
|
print(f"✓ Saved RDF Turtle to {turtle_file}")
|
|
|
|
def main():
|
|
yaml_file = "data/instances/bulgaria_complete.yaml"
|
|
jsonld_file = "data/jsonld/bulgaria_complete.jsonld"
|
|
turtle_file = "data/rdf/bulgaria_complete.ttl"
|
|
|
|
yaml_to_jsonld(yaml_file, jsonld_file)
|
|
yaml_to_turtle(yaml_file, turtle_file)
|
|
|
|
print("\n✓ RDF export complete!")
|
|
|
|
if __name__ == '__main__':
|
|
main()
|