glam/data/custodian/US-CA-SAN-A-IA.yaml
2025-12-30 23:07:03 +01:00

224 lines
8.7 KiB
YAML

original_entry:
name: Internet Archive
institution_type: ARCHIVE
source: CH-Annotator (latin_american_institutions_AUTHORITATIVE_ch_annotator.yaml)
identifiers:
- &id001
identifier_scheme: Website
identifier_value: https://archive.org/
identifier_url: https://archive.org/
- identifier_scheme: GHCID
identifier_value: US-CA-SAN-A-IA
- identifier_scheme: GHCID_NUMERIC
identifier_value: '213324328442227739'
- &id002
identifier_scheme: OLD_ID
identifier_value: https://w3id.org/heritage/custodian/mx/a-internet-archive-0113
- identifier_scheme: GHCID_UUID
identifier_value: a0cc444e-9393-5f88-8cbf-7e63ac831249
identifier_url: urn:uuid:a0cc444e-9393-5f88-8cbf-7e63ac831249
- identifier_scheme: GHCID_UUID_SHA256
identifier_value: 02f5e158-b2bf-8c1b-a302-cf050a43c559
identifier_url: urn:uuid:02f5e158-b2bf-8c1b-a302-cf050a43c559
- identifier_scheme: RECORD_ID
identifier_value: 019a58fd-3229-7e09-8740-57e413f04639
identifier_url: urn:uuid:019a58fd-3229-7e09-8740-57e413f04639
- &id003
identifier_scheme: OLD_ID
identifier_value: 213324328442227739
locations:
- city: San Francisco
region: California
country: US
processing_timestamp: '2025-12-06T23:39:02.326629+00:00'
ghcid:
ghcid_current: US-CA-SAN-A-IA
ghcid_original: US-CA-SAN-A-IA
ghcid_uuid: a0cc444e-9393-5f88-8cbf-7e63ac831249
ghcid_uuid_sha256: 02f5e158-b2bf-8c1b-2302-cf050a43c559
ghcid_numeric: 213324328442227739
record_id: d80a9256-d0ff-454f-8aa6-0ba5d905845b
generation_timestamp: '2025-12-06T23:39:02.326629+00:00'
location_resolution:
country_code: US
region_code: CA
city_code: SAN
method: CH_ANNOTATOR_SOURCE
ghcid_history:
- ghcid: US-CA-SAN-A-IA
ghcid_numeric: 213324328442227739
valid_from: '2025-12-06T23:39:02.326629+00:00'
reason: Initial GHCID from CH-Annotator (latin_american_institutions_AUTHORITATIVE_ch_annotator.yaml)
custodian_name:
claim_type: custodian_name
claim_value: Internet Archive
source_type: ch_annotator
identifiers:
- identifier_scheme: GHCID
identifier_value: US-CA-SAN-A-IA
- identifier_scheme: GHCID_UUID
identifier_value: a0cc444e-9393-5f88-8cbf-7e63ac831249
- identifier_scheme: GHCID_UUID_SHA256
identifier_value: 02f5e158-b2bf-8c1b-2302-cf050a43c559
- identifier_scheme: GHCID_NUMERIC
identifier_value: '213324328442227739'
- identifier_scheme: RECORD_ID
identifier_value: d80a9256-d0ff-454f-8aa6-0ba5d905845b
- *id001
- *id002
- *id003
provenance:
data_source: CONVERSATION_NLP
data_tier: TIER_4_INFERRED
extraction_date: '2025-11-06T07:54:59.620774+00:00'
extraction_method: 'Created from CH-Annotator file: latin_american_institutions_AUTHORITATIVE_ch_annotator.yaml'
confidence_score: 0.85
notes:
- 'YouTube/Google Maps enrichment 2025-12-08T19:41:06Z: Maps: rejected by LLM; YouTube: not found'
- Canonical location added via normalize_custodian_files.py on 2025-12-09T07:13:45Z
schema_version: 2.0.0
enrichment_provenance:
wikidata_enrichment:
content_hash: sha256-iJpCINJzNww4NgO+W0v2vKj7/ydvX2ZIRGM+PIQpaMo=
verified_at: '2025-12-28T20:09:38.052242+00:00'
wasDerivedFrom: https://www.wikidata.org/wiki/Q461
provenance_schema_version: '2.0'
standards_compliance:
- W3C PROV-O
- W3C SRI (content hashes)
generated_at: '2025-12-28T20:09:38.052259+00:00'
ch_annotator:
convention_id: ch_annotator-v1_7_0
convention_version: 1.7.0
entity_classification:
hypernym: GRP
hypernym_label: GROUP
subtype: GRP.HER.ARC
subtype_label: ARCHIVE
ontology_class: schema:ArchiveOrganization
alternative_classes:
- org:FormalOrganization
- rov:RegisteredOrganization
- glam:HeritageCustodian
extraction_provenance:
namespace: glam
path: /conversations/mixed
agent: batch-script-create-custodian-from-ch-annotator
context_convention: ch_annotator-v1_7_0
source_archived_at: '2025-11-06T07:54:59.620774+00:00'
statement_created_at: '2025-12-30T21:23:27.936301+00:00'
migration_note: Migrated from agent:claude-conversation on 2025-12-30 (ch_annotator.extraction_provenance)
annotation_provenance:
annotation_agent: opencode-claude-sonnet-4
annotation_date: '2025-12-06T21:13:56.173868+00:00'
annotation_method: retroactive CH-Annotator application via batch script
source_file: latin_american_institutions_AUTHORITATIVE.yaml
annotation_metadata:
confidence_score: 0.85
verified: false
verification_date: null
verified_by: null
entity_claims:
- claim_type: full_name
claim_value: Internet Archive
property_uri: skos:prefLabel
provenance:
namespace: glam
path: /conversations/mixed
agent: batch-script-create-custodian-from-ch-annotator
context_convention: ch_annotator-v1_7_0
source_archived_at: '2025-11-06T07:54:59.620774+00:00'
statement_created_at: '2025-12-30T21:23:27.936311+00:00'
migration_note: Migrated from agent:claude-conversation on 2025-12-30 (entity_claims[0].provenance)
confidence: 0.85
- claim_type: institution_type
claim_value: ARCHIVE
property_uri: rdf:type
provenance:
namespace: glam
path: /conversations/mixed
agent: batch-script-create-custodian-from-ch-annotator
context_convention: ch_annotator-v1_7_0
source_archived_at: '2025-11-06T07:54:59.620774+00:00'
statement_created_at: '2025-12-30T21:23:27.936314+00:00'
migration_note: Migrated from agent:claude-conversation on 2025-12-30 (entity_claims[1].provenance)
confidence: 0.95
- claim_type: located_in_city
claim_value: San Francisco
property_uri: schema:addressLocality
provenance:
namespace: glam
path: /conversations/mixed
agent: batch-script-create-custodian-from-ch-annotator
context_convention: ch_annotator-v1_7_0
source_archived_at: '2025-11-06T07:54:59.620774+00:00'
statement_created_at: '2025-12-30T21:23:27.936316+00:00'
migration_note: Migrated from agent:claude-conversation on 2025-12-30 (entity_claims[2].provenance)
confidence: 0.9
- claim_type: ghcid
claim_value: US-CA-SAN-A-IA
property_uri: glam:ghcid
provenance:
namespace: glam
path: /conversations/mixed
agent: batch-script-create-custodian-from-ch-annotator
context_convention: ch_annotator-v1_7_0
source_archived_at: '2025-11-06T07:54:59.620774+00:00'
statement_created_at: '2025-12-30T21:23:27.936317+00:00'
migration_note: Migrated from agent:claude-conversation on 2025-12-30 (entity_claims[3].provenance)
confidence: 1.0
integration_note:
created_from: latin_american_institutions_AUTHORITATIVE_ch_annotator.yaml
creation_date: '2025-12-06T23:39:02.326629+00:00'
creation_method: create_custodian_from_ch_annotator.py
google_maps_status: NO_MATCH
google_maps_rejected:
candidate_name: Internet Archive Europe
rejection_reason: Rejected due to location mismatch. The source institution is in the US, while the candidate is in the
Netherlands. Although the names are related, indicating a branch of the parent institution, the location is in a different
country. The Google Place type 'point_of_interest' is also not specific enough to confirm the entity type.
timestamp: '2025-12-08T19:41:05.807957+00:00'
youtube_status: NOT_FOUND
youtube_search_query: Internet Archive official
youtube_search_timestamp: '2025-12-08T19:41:06.153916+00:00'
location:
city: San Francisco
region: California
region_code: CA
country: US
normalization_timestamp: '2025-12-09T19:32:31.442626+00:00'
latitude: 37.77493
longitude: -122.41942
coordinate_provenance:
source_type: GEONAMES_LOCAL
source_path: data/reference/geonames.db
entity_id: 5391959
original_timestamp: '2025-12-09T19:32:31.441756+00:00'
geonames_id: 5391959
geonames_name: San Francisco
feature_code: PPLA2
wikidata_enrichment:
wikidata_id: Q461
wikidata_label: Internet Archive
wikidata_description: American non-profit organization
wikidata_url: https://www.wikidata.org/wiki/Q461
enrichment_timestamp: '2025-01-21T00:00:00Z'
enrichment_method: manual_wikidata_lookup
enrichment_note: Major digital archive and web preservation organization based in San Francisco
_provenance:
content_hash:
algorithm: sha256
value: sha256-iJpCINJzNww4NgO+W0v2vKj7/ydvX2ZIRGM+PIQpaMo=
scope: enrichment_section
computed_at: '2025-12-28T20:09:38.052242+00:00'
prov:
wasGeneratedBy:
'@type': prov:Activity
name: wikidata_api_fetch
used: https://www.wikidata.org/w/rest.php/wikibase/v1
wasDerivedFrom: https://www.wikidata.org/wiki/Q461
generatedAtTime: '2025-01-21T00:00:00Z'
verification:
status: verified
last_verified: '2025-12-28T20:09:38.052250+00:00'
wikidata_entity_id: Q461