Batch enrichment of 3,728 person profiles with additional data: - Birth decade inference from education/career history - Location resolution for inferred birth settlements - Web claims with full provenance (source_url, retrieved_on) - Organizational subdivision extraction - Heritage relevance scoring Also includes: - 14 profile renames for PPID format corrections - Updated _manifest.json with extraction statistics - New _extraction_log.txt and _extraction_summary.json Enrichment follows AGENTS.md rules: - Rule 44: EDTF unknown date notation (XXXX, 196X, etc.) - Rule 45: Inferred data with explicit provenance - Rule 30: Confidence scoring (0.50-0.95) - Rule 31: Organizational subdivision extraction 35,052 files changed, +4,507,411 insertions, -63,118 deletions
22 lines
No EOL
645 B
JSON
22 lines
No EOL
645 B
JSON
{
|
|
"extraction_timestamp": "2026-01-09T22:46:31.741623+00:00",
|
|
"script": "extract_persons_with_provenance.py",
|
|
"schema_version": "1.0.0",
|
|
"dry_run": false,
|
|
"statistics": {
|
|
"total_files": 6670,
|
|
"processed": 6670,
|
|
"errors": 0,
|
|
"skipped": 0,
|
|
"total_profiles": 108398,
|
|
"total_entities": 108398,
|
|
"heritage_relevant": 46409,
|
|
"total_web_claims": 392463,
|
|
"errors_list": []
|
|
},
|
|
"compliance": {
|
|
"rule_6": "WebObservation Claims MUST Have XPath Provenance",
|
|
"rule_26": "Person Data Provenance - Web Claims for Staff Information",
|
|
"rule_35": "Provenance Statements MUST Have Dual Timestamps"
|
|
}
|
|
} |