- Introduced `test_nlp_extractor.py` with unit tests for the InstitutionExtractor, covering various extraction patterns (ISIL, Wikidata, VIAF, city names) and ensuring proper classification of institutions (museum, library, archive). - Added tests for extracted entities and result handling to validate the extraction process. - Created `test_partnership_rdf_integration.py` to validate the end-to-end process of extracting partnerships from a conversation and exporting them to RDF format. - Implemented tests for temporal properties in partnerships and ensured compliance with W3C Organization Ontology patterns. - Verified that extracted partnerships are correctly linked with PROV-O provenance metadata.
67 lines
2.1 KiB
Python
67 lines
2.1 KiB
Python
"""
|
|
Command-line interface for GLAM Extractor
|
|
"""
|
|
|
|
import click
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
|
|
@click.group()
|
|
@click.version_option()
|
|
def main() -> None:
|
|
"""GLAM Extractor - Extract and standardize heritage institution data"""
|
|
pass
|
|
|
|
|
|
@main.command()
|
|
@click.argument("input_path", type=click.Path(exists=True))
|
|
@click.option("--output", "-o", type=click.Path(), help="Output file path")
|
|
@click.option("--format", "-f", type=click.Choice(["jsonld", "rdf", "csv", "parquet"]), default="jsonld")
|
|
@click.option("--conversation/--csv", default=True, help="Input type: conversation JSON or CSV")
|
|
def extract_command(
|
|
input_path: str,
|
|
output: Optional[str],
|
|
format: str,
|
|
conversation: bool,
|
|
) -> None:
|
|
"""Extract heritage institution data from conversations or CSV files"""
|
|
click.echo(f"Extracting data from: {input_path}")
|
|
click.echo(f"Output format: {format}")
|
|
# TODO: Implement extraction logic
|
|
pass
|
|
|
|
|
|
@main.command()
|
|
@click.argument("input_path", type=click.Path(exists=True))
|
|
@click.option("--schema", "-s", type=click.Path(exists=True), help="LinkML schema file")
|
|
def validate_command(input_path: str, schema: Optional[str]) -> None:
|
|
"""Validate extracted data against LinkML schema"""
|
|
click.echo(f"Validating: {input_path}")
|
|
# TODO: Implement validation logic
|
|
pass
|
|
|
|
|
|
@main.command()
|
|
@click.argument("input_path", type=click.Path(exists=True))
|
|
@click.option("--output", "-o", type=click.Path(), required=True)
|
|
@click.option("--format", "-f", type=click.Choice(["jsonld", "rdf", "csv", "parquet", "sqlite"]), required=True)
|
|
def export_command(input_path: str, output: str, format: str) -> None:
|
|
"""Export data to various formats"""
|
|
click.echo(f"Exporting {input_path} to {format}")
|
|
# TODO: Implement export logic
|
|
pass
|
|
|
|
|
|
@main.command()
|
|
@click.argument("url", type=str)
|
|
@click.option("--output", "-o", type=click.Path(), help="Output file path")
|
|
def crawl_command(url: str, output: Optional[str]) -> None:
|
|
"""Crawl institutional website for data"""
|
|
click.echo(f"Crawling: {url}")
|
|
# TODO: Implement crawl4ai integration
|
|
pass
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|