glam/scripts/debug_diocesan_library.py
kempersc e5a532a8bc Add comprehensive tests for NLP institution extraction and RDF partnership integration
- Introduced `test_nlp_extractor.py` with unit tests for the InstitutionExtractor, covering various extraction patterns (ISIL, Wikidata, VIAF, city names) and ensuring proper classification of institutions (museum, library, archive).
- Added tests for extracted entities and result handling to validate the extraction process.
- Created `test_partnership_rdf_integration.py` to validate the end-to-end process of extracting partnerships from a conversation and exporting them to RDF format.
- Implemented tests for temporal properties in partnerships and ensured compliance with W3C Organization Ontology patterns.
- Verified that extracted partnerships are correctly linked with PROV-O provenance metadata.
2025-11-19 23:20:47 +01:00

87 lines
2.4 KiB
Python

#!/usr/bin/env python3
"""
Direct Wikidata query to inspect Q28149782 (Diocesan Library).
"""
import requests
import json
SPARQL_ENDPOINT = "https://query.wikidata.org/sparql"
USER_AGENT = "GLAM-Tunisia-Debug/1.0"
# Query to get all properties of Q28149782
query = """
SELECT ?property ?propertyLabel ?value ?valueLabel WHERE {
wd:Q28149782 ?p ?value .
?property wikibase:directClaim ?p .
SERVICE wikibase:label { bd:serviceParam wikibase:language "en,fr,ar" . }
}
LIMIT 100
"""
headers = {'User-Agent': USER_AGENT}
params = {
'query': query,
'format': 'json'
}
print("Querying Wikidata for Q28149782 (Diocesan Library of Tunis)")
print("=" * 60)
response = requests.get(SPARQL_ENDPOINT, params=params, headers=headers, timeout=30)
response.raise_for_status()
results = response.json()
bindings = results.get("results", {}).get("bindings", [])
print(f"Found {len(bindings)} properties\n")
# Group by property
properties = {}
for binding in bindings:
prop = binding.get("propertyLabel", {}).get("value", "unknown")
val = binding.get("valueLabel", {}).get("value", binding.get("value", {}).get("value", ""))
if prop not in properties:
properties[prop] = []
properties[prop].append(val)
# Print relevant properties
important_props = ['instance of', 'country', 'located in the administrative territorial entity',
'coordinate location', 'label']
for prop in important_props:
if prop in properties:
print(f"{prop}:")
for val in properties[prop]:
print(f" - {val}")
print("\n" + "=" * 60)
# Now get the specific instance type QID
query2 = """
SELECT ?type ?typeLabel WHERE {
wd:Q28149782 wdt:P31 ?type .
SERVICE wikibase:label { bd:serviceParam wikibase:language "en,fr" . }
}
"""
params2 = {
'query': query2,
'format': 'json'
}
print("\nInstance types (P31) for Q28149782:")
response2 = requests.get(SPARQL_ENDPOINT, params=params2, headers=headers, timeout=30)
response2.raise_for_status()
results2 = response2.json()
bindings2 = results2.get("results", {}).get("bindings", [])
for binding in bindings2:
type_uri = binding.get("type", {}).get("value", "")
type_qid = type_uri.split("/")[-1] if type_uri else "unknown"
type_label = binding.get("typeLabel", {}).get("value", "")
print(f" {type_qid}: {type_label}")
print("\nCheck if Q105338594 is in our LIBRARY type mapping...")