glam/src/glam_extractor/cli.py
kempersc e5a532a8bc Add comprehensive tests for NLP institution extraction and RDF partnership integration
- Introduced `test_nlp_extractor.py` with unit tests for the InstitutionExtractor, covering various extraction patterns (ISIL, Wikidata, VIAF, city names) and ensuring proper classification of institutions (museum, library, archive).
- Added tests for extracted entities and result handling to validate the extraction process.
- Created `test_partnership_rdf_integration.py` to validate the end-to-end process of extracting partnerships from a conversation and exporting them to RDF format.
- Implemented tests for temporal properties in partnerships and ensured compliance with W3C Organization Ontology patterns.
- Verified that extracted partnerships are correctly linked with PROV-O provenance metadata.
2025-11-19 23:20:47 +01:00

67 lines
2.1 KiB
Python

"""
Command-line interface for GLAM Extractor
"""
import click
from pathlib import Path
from typing import Optional
@click.group()
@click.version_option()
def main() -> None:
"""GLAM Extractor - Extract and standardize heritage institution data"""
pass
@main.command()
@click.argument("input_path", type=click.Path(exists=True))
@click.option("--output", "-o", type=click.Path(), help="Output file path")
@click.option("--format", "-f", type=click.Choice(["jsonld", "rdf", "csv", "parquet"]), default="jsonld")
@click.option("--conversation/--csv", default=True, help="Input type: conversation JSON or CSV")
def extract_command(
input_path: str,
output: Optional[str],
format: str,
conversation: bool,
) -> None:
"""Extract heritage institution data from conversations or CSV files"""
click.echo(f"Extracting data from: {input_path}")
click.echo(f"Output format: {format}")
# TODO: Implement extraction logic
pass
@main.command()
@click.argument("input_path", type=click.Path(exists=True))
@click.option("--schema", "-s", type=click.Path(exists=True), help="LinkML schema file")
def validate_command(input_path: str, schema: Optional[str]) -> None:
"""Validate extracted data against LinkML schema"""
click.echo(f"Validating: {input_path}")
# TODO: Implement validation logic
pass
@main.command()
@click.argument("input_path", type=click.Path(exists=True))
@click.option("--output", "-o", type=click.Path(), required=True)
@click.option("--format", "-f", type=click.Choice(["jsonld", "rdf", "csv", "parquet", "sqlite"]), required=True)
def export_command(input_path: str, output: str, format: str) -> None:
"""Export data to various formats"""
click.echo(f"Exporting {input_path} to {format}")
# TODO: Implement export logic
pass
@main.command()
@click.argument("url", type=str)
@click.option("--output", "-o", type=click.Path(), help="Output file path")
def crawl_command(url: str, output: Optional[str]) -> None:
"""Crawl institutional website for data"""
click.echo(f"Crawling: {url}")
# TODO: Implement crawl4ai integration
pass
if __name__ == "__main__":
main()