- Introduced `test_nlp_extractor.py` with unit tests for the InstitutionExtractor, covering various extraction patterns (ISIL, Wikidata, VIAF, city names) and ensuring proper classification of institutions (museum, library, archive). - Added tests for extracted entities and result handling to validate the extraction process. - Created `test_partnership_rdf_integration.py` to validate the end-to-end process of extracting partnerships from a conversation and exporting them to RDF format. - Implemented tests for temporal properties in partnerships and ensured compliance with W3C Organization Ontology patterns. - Verified that extracted partnerships are correctly linked with PROV-O provenance metadata.
87 lines
2.4 KiB
Python
87 lines
2.4 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Direct Wikidata query to inspect Q28149782 (Diocesan Library).
|
|
"""
|
|
|
|
import requests
|
|
import json
|
|
|
|
SPARQL_ENDPOINT = "https://query.wikidata.org/sparql"
|
|
USER_AGENT = "GLAM-Tunisia-Debug/1.0"
|
|
|
|
# Query to get all properties of Q28149782
|
|
query = """
|
|
SELECT ?property ?propertyLabel ?value ?valueLabel WHERE {
|
|
wd:Q28149782 ?p ?value .
|
|
?property wikibase:directClaim ?p .
|
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en,fr,ar" . }
|
|
}
|
|
LIMIT 100
|
|
"""
|
|
|
|
headers = {'User-Agent': USER_AGENT}
|
|
params = {
|
|
'query': query,
|
|
'format': 'json'
|
|
}
|
|
|
|
print("Querying Wikidata for Q28149782 (Diocesan Library of Tunis)")
|
|
print("=" * 60)
|
|
|
|
response = requests.get(SPARQL_ENDPOINT, params=params, headers=headers, timeout=30)
|
|
response.raise_for_status()
|
|
|
|
results = response.json()
|
|
bindings = results.get("results", {}).get("bindings", [])
|
|
|
|
print(f"Found {len(bindings)} properties\n")
|
|
|
|
# Group by property
|
|
properties = {}
|
|
for binding in bindings:
|
|
prop = binding.get("propertyLabel", {}).get("value", "unknown")
|
|
val = binding.get("valueLabel", {}).get("value", binding.get("value", {}).get("value", ""))
|
|
|
|
if prop not in properties:
|
|
properties[prop] = []
|
|
properties[prop].append(val)
|
|
|
|
# Print relevant properties
|
|
important_props = ['instance of', 'country', 'located in the administrative territorial entity',
|
|
'coordinate location', 'label']
|
|
|
|
for prop in important_props:
|
|
if prop in properties:
|
|
print(f"{prop}:")
|
|
for val in properties[prop]:
|
|
print(f" - {val}")
|
|
|
|
print("\n" + "=" * 60)
|
|
|
|
# Now get the specific instance type QID
|
|
query2 = """
|
|
SELECT ?type ?typeLabel WHERE {
|
|
wd:Q28149782 wdt:P31 ?type .
|
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en,fr" . }
|
|
}
|
|
"""
|
|
|
|
params2 = {
|
|
'query': query2,
|
|
'format': 'json'
|
|
}
|
|
|
|
print("\nInstance types (P31) for Q28149782:")
|
|
response2 = requests.get(SPARQL_ENDPOINT, params=params2, headers=headers, timeout=30)
|
|
response2.raise_for_status()
|
|
|
|
results2 = response2.json()
|
|
bindings2 = results2.get("results", {}).get("bindings", [])
|
|
|
|
for binding in bindings2:
|
|
type_uri = binding.get("type", {}).get("value", "")
|
|
type_qid = type_uri.split("/")[-1] if type_uri else "unknown"
|
|
type_label = binding.get("typeLabel", {}).get("value", "")
|
|
print(f" {type_qid}: {type_label}")
|
|
|
|
print("\nCheck if Q105338594 is in our LIBRARY type mapping...")
|