glam/scripts/quick_wikidata_search_batch14.py

#!/usr/bin/env python3
"""
Chilean Batch 14: Quick Wikidata Check for Rodulfo Philippi Museum
Focus on the most promising candidate: museum named after famous scientist
"""

import json
import requests
import time

def search_wikidata_simple(search_term: str, language='es'):
    """Simple Wikidata API search."""
    url = 'https://www.wikidata.org/w/api.php'
    params = {
        'action': 'wbsearchentities',
        'format': 'json',
        'language': language,
        'type': 'item',
        'search': search_term,
        'limit': 10
    }

    try:
        response = requests.get(url, params=params, timeout=10)
        response.raise_for_status()
        data = response.json()
        return data.get('search', [])
    except Exception as e:
        print(f"Error: {e}")
        return []

def get_entity_details(qid: str):
    """Get details for a Wikidata entity."""
    url = f'https://www.wikidata.org/wiki/Special:EntityData/{qid}.json'

    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        data = response.json()
        entity = data['entities'][qid]

        # Extract useful info
        label = entity.get('labels', {}).get('es', {}).get('value', 'No label')
        if label == 'No label':
            label = entity.get('labels', {}).get('en', {}).get('value', 'No label')

        description = entity.get('descriptions', {}).get('es', {}).get('value', 'No description')
        if description == 'No description':
            description = entity.get('descriptions', {}).get('en', {}).get('value', 'No description')

        # Get location if available (P131)
        location = 'Unknown'
        if 'claims' in entity and 'P131' in entity['claims']:
            loc_claim = entity['claims']['P131'][0]
            loc_qid = loc_claim['mainsnak']['datavalue']['value']['id']
            location = loc_qid

        # Get instance of (P31)
        instance_of = 'Unknown'
        if 'claims' in entity and 'P31' in entity['claims']:
            inst_claim = entity['claims']['P31'][0]
            inst_qid = inst_claim['mainsnak']['datavalue']['value']['id']
            instance_of = inst_qid

        return {
            'qid': qid,
            'label': label,
            'description': description,
            'location_qid': location,
            'instance_of_qid': instance_of
        }
    except Exception as e:
        print(f"Error getting entity {qid}: {e}")
        return None

def main():
    """Quick search for Philippi museums."""

    search_terms = [
        "Museo Rodolfo Philippi Chile",
        "Museo Rudolph Philippi Chile",
        "Museo Philippi Chañaral",
        "Museo Philippi Valdivia",
        "Instituto Alemán Puerto Montt"
    ]

    all_results = {}

    print("=" * 80)
    print("Chilean Batch 14: Quick Wikidata Search")
    print("=" * 80)
    print()

    for term in search_terms:
        print(f"Searching: {term}")
        results = search_wikidata_simple(term)

        if results:
            print(f"  Found {len(results)} results:")
            term_results = []

            for result in results[:5]:
                qid = result['id']
                label = result.get('label', 'No label')
                description = result.get('description', 'No description')

                print(f"    {qid}: {label}")
                print(f"      {description}")

                # Get more details
                details = get_entity_details(qid)
                if details:
                    term_results.append(details)

                time.sleep(0.5)  # Be nice to Wikidata

            all_results[term] = term_results
        else:
            print("  No results found")
            all_results[term] = []

        print()
        time.sleep(1)  # Rate limiting

    # Save results
    output_file = 'scripts/batch14_quick_search_results.json'
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(all_results, f, indent=2, ensure_ascii=False)

    print(f"Results saved to: {output_file}")
    print()

if __name__ == '__main__':
    main()