97 lines
3.1 KiB
Python
97 lines
3.1 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Demo script showing how to use the NLP Institution Extractor.
|
|
|
|
This demonstrates extracting heritage institutions from conversation text.
|
|
"""
|
|
|
|
from glam_extractor.extractors.nlp_extractor import InstitutionExtractor
|
|
|
|
|
|
def main():
|
|
"""Run demonstration extraction"""
|
|
|
|
# Create extractor instance
|
|
extractor = InstitutionExtractor()
|
|
|
|
# Sample text from the user's requirement
|
|
sample_text = "The Rijksmuseum in Amsterdam (ISIL: NL-AsdRM) is a major art museum."
|
|
|
|
print("=" * 70)
|
|
print("NLP Institution Extractor Demo")
|
|
print("=" * 70)
|
|
print()
|
|
print("Sample text:")
|
|
print(f" {sample_text}")
|
|
print()
|
|
|
|
# Extract institutions
|
|
result = extractor.extract_from_text(sample_text)
|
|
|
|
if not result.success:
|
|
print(f"Error: {result.error}")
|
|
return
|
|
|
|
print(f"Extracted {len(result.value)} institution(s):")
|
|
print()
|
|
|
|
for i, institution in enumerate(result.value, 1):
|
|
print(f"Institution #{i}:")
|
|
print(f" Name: {institution.name}")
|
|
print(f" Type: {institution.institution_type}")
|
|
print(f" Status: {institution.organization_status}")
|
|
print()
|
|
|
|
if institution.locations:
|
|
print(f" Location:")
|
|
for loc in institution.locations:
|
|
if loc.city:
|
|
print(f" City: {loc.city}")
|
|
if loc.country:
|
|
print(f" Country: {loc.country}")
|
|
|
|
if institution.identifiers:
|
|
print(f" Identifiers:")
|
|
for ident in institution.identifiers:
|
|
print(f" {ident.identifier_scheme}: {ident.identifier_value}")
|
|
if ident.identifier_url:
|
|
print(f" URL: {ident.identifier_url}")
|
|
|
|
print()
|
|
print(f" Provenance:")
|
|
print(f" Data Source: {institution.provenance.data_source}")
|
|
print(f" Data Tier: {institution.provenance.data_tier}")
|
|
print(f" Extraction Method: {institution.provenance.extraction_method}")
|
|
print(f" Confidence Score: {institution.provenance.confidence_score:.2f}")
|
|
print()
|
|
print("-" * 70)
|
|
print()
|
|
|
|
# Test with more complex examples
|
|
print("=" * 70)
|
|
print("Additional Examples")
|
|
print("=" * 70)
|
|
print()
|
|
|
|
examples = [
|
|
"The British Library in London is the national library of the United Kingdom.",
|
|
"National Archives of Brazil (Arquivo Nacional) holds historical documents.",
|
|
"The Louvre Museum (Q3044768) in Paris is the world's largest art museum.",
|
|
"Biblioteca Nacional de España has ISIL code ES-M.",
|
|
]
|
|
|
|
for example in examples:
|
|
print(f"Text: {example}")
|
|
result = extractor.extract_from_text(example)
|
|
|
|
if result.success and result.value:
|
|
for inst in result.value:
|
|
print(f" → Extracted: {inst.name} ({inst.institution_type})")
|
|
print(f" Confidence: {inst.provenance.confidence_score:.2f}")
|
|
else:
|
|
print(" → No institutions extracted")
|
|
print()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|