glam/examples/demo_nlp_extractor.py
2025-11-19 23:25:22 +01:00

97 lines
3.1 KiB
Python

#!/usr/bin/env python3
"""
Demo script showing how to use the NLP Institution Extractor.
This demonstrates extracting heritage institutions from conversation text.
"""
from glam_extractor.extractors.nlp_extractor import InstitutionExtractor
def main():
"""Run demonstration extraction"""
# Create extractor instance
extractor = InstitutionExtractor()
# Sample text from the user's requirement
sample_text = "The Rijksmuseum in Amsterdam (ISIL: NL-AsdRM) is a major art museum."
print("=" * 70)
print("NLP Institution Extractor Demo")
print("=" * 70)
print()
print("Sample text:")
print(f" {sample_text}")
print()
# Extract institutions
result = extractor.extract_from_text(sample_text)
if not result.success:
print(f"Error: {result.error}")
return
print(f"Extracted {len(result.value)} institution(s):")
print()
for i, institution in enumerate(result.value, 1):
print(f"Institution #{i}:")
print(f" Name: {institution.name}")
print(f" Type: {institution.institution_type}")
print(f" Status: {institution.organization_status}")
print()
if institution.locations:
print(f" Location:")
for loc in institution.locations:
if loc.city:
print(f" City: {loc.city}")
if loc.country:
print(f" Country: {loc.country}")
if institution.identifiers:
print(f" Identifiers:")
for ident in institution.identifiers:
print(f" {ident.identifier_scheme}: {ident.identifier_value}")
if ident.identifier_url:
print(f" URL: {ident.identifier_url}")
print()
print(f" Provenance:")
print(f" Data Source: {institution.provenance.data_source}")
print(f" Data Tier: {institution.provenance.data_tier}")
print(f" Extraction Method: {institution.provenance.extraction_method}")
print(f" Confidence Score: {institution.provenance.confidence_score:.2f}")
print()
print("-" * 70)
print()
# Test with more complex examples
print("=" * 70)
print("Additional Examples")
print("=" * 70)
print()
examples = [
"The British Library in London is the national library of the United Kingdom.",
"National Archives of Brazil (Arquivo Nacional) holds historical documents.",
"The Louvre Museum (Q3044768) in Paris is the world's largest art museum.",
"Biblioteca Nacional de España has ISIL code ES-M.",
]
for example in examples:
print(f"Text: {example}")
result = extractor.extract_from_text(example)
if result.success and result.value:
for inst in result.value:
print(f" → Extracted: {inst.name} ({inst.institution_type})")
print(f" Confidence: {inst.provenance.confidence_score:.2f}")
else:
print(" → No institutions extracted")
print()
if __name__ == "__main__":
main()