296 lines
9.6 KiB
Python
296 lines
9.6 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Z39.50 Client for Biblioteca Nacional de la República Argentina
|
|
|
|
Extracts ISIL codes and institution records from the Biblioteca Nacional's
|
|
Z39.50 server. ISIL codes are stored in MARC field 024 (Standard Identifier).
|
|
|
|
Server: 200.123.191.9:9991
|
|
Database: BNA01 (bibliographic) or BNA10 (authority records)
|
|
Credentials: Username 'Z39.50' / Password 'Z39.50'
|
|
|
|
Based on investigation: data/isil/AR/ARGENTINA_ISIL_INVESTIGATION.md
|
|
"""
|
|
|
|
import json
|
|
import socket
|
|
import struct
|
|
from datetime import datetime, timezone
|
|
from typing import List, Dict, Optional
|
|
from dataclasses import dataclass, asdict
|
|
from pathlib import Path
|
|
|
|
|
|
@dataclass
|
|
class Z3950Config:
|
|
"""Configuration for Z39.50 connection."""
|
|
host: str = "200.123.191.9"
|
|
port: int = 9991
|
|
database: str = "BNA10" # Authority records
|
|
username: str = "Z39.50"
|
|
password: str = "Z39.50"
|
|
charset: str = "UTF-8"
|
|
timeout: int = 30
|
|
|
|
|
|
@dataclass
|
|
class ISILRecord:
|
|
"""Represents an institution with ISIL code extracted from Z39.50."""
|
|
isil_code: str
|
|
institution_name: str
|
|
institution_type: Optional[str] = None
|
|
address: Optional[str] = None
|
|
city: Optional[str] = None
|
|
province: Optional[str] = None
|
|
country: str = "AR"
|
|
marc_record: Optional[Dict] = None
|
|
extraction_date: Optional[str] = None
|
|
|
|
def __post_init__(self):
|
|
if self.extraction_date is None:
|
|
self.extraction_date = datetime.now(timezone.utc).isoformat()
|
|
|
|
|
|
class SimpleZ3950Client:
|
|
"""
|
|
Minimal Z39.50 client for extracting ISIL codes.
|
|
|
|
NOTE: This is a simplified implementation. For production use,
|
|
consider using PyZ3950 or YAZ Python bindings.
|
|
|
|
This implementation uses raw socket communication to demonstrate
|
|
the Z39.50 protocol. It may not handle all edge cases.
|
|
"""
|
|
|
|
def __init__(self, config: Z3950Config):
|
|
self.config = config
|
|
self.socket = None
|
|
|
|
def connect(self) -> bool:
|
|
"""Establish connection to Z39.50 server."""
|
|
try:
|
|
print(f"Connecting to Z39.50 server: {self.config.host}:{self.config.port}")
|
|
self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
self.socket.settimeout(self.config.timeout)
|
|
self.socket.connect((self.config.host, self.config.port))
|
|
print("✅ Connected to Z39.50 server")
|
|
return True
|
|
except Exception as e:
|
|
print(f"❌ Connection failed: {e}")
|
|
return False
|
|
|
|
def disconnect(self):
|
|
"""Close Z39.50 connection."""
|
|
if self.socket:
|
|
self.socket.close()
|
|
print("Disconnected from Z39.50 server")
|
|
|
|
def search(self, query: str, max_records: int = 100) -> List[Dict]:
|
|
"""
|
|
Execute Z39.50 search query.
|
|
|
|
Args:
|
|
query: Search query (e.g., "@attr 1=1003 'biblioteca'")
|
|
max_records: Maximum number of records to retrieve
|
|
|
|
Returns:
|
|
List of MARC records
|
|
"""
|
|
# This is a placeholder implementation
|
|
# Full implementation requires encoding Z39.50 protocol messages
|
|
raise NotImplementedError(
|
|
"Z39.50 protocol implementation requires PyZ3950 or YAZ library. "
|
|
"See installation instructions below."
|
|
)
|
|
|
|
def extract_isil_from_marc(self, marc_record: Dict) -> Optional[ISILRecord]:
|
|
"""
|
|
Extract ISIL code and institution info from MARC record.
|
|
|
|
MARC field 024: Standard Identifier
|
|
- $a: Standard number or code
|
|
- $2: Source of number (should be 'isil' or 'ISIL')
|
|
|
|
MARC field 110: Corporate Name (Main Entry)
|
|
- $a: Corporate name
|
|
|
|
MARC field 370: Place Associated (for archives/libraries)
|
|
- $e: Place of corporate headquarters
|
|
"""
|
|
try:
|
|
# Extract ISIL code from field 024
|
|
isil_code = None
|
|
for field in marc_record.get('fields', []):
|
|
if '024' in field:
|
|
subfields = field['024'].get('subfields', [])
|
|
for subfield in subfields:
|
|
if '$2' in subfield and subfield['$2'].lower() == 'isil':
|
|
isil_code = subfield.get('$a')
|
|
break
|
|
|
|
if not isil_code:
|
|
return None
|
|
|
|
# Extract institution name from field 110
|
|
institution_name = None
|
|
for field in marc_record.get('fields', []):
|
|
if '110' in field:
|
|
subfields = field['110'].get('subfields', [])
|
|
for subfield in subfields:
|
|
if '$a' in subfield:
|
|
institution_name = subfield['$a']
|
|
break
|
|
|
|
# Extract location from field 370
|
|
city = None
|
|
for field in marc_record.get('fields', []):
|
|
if '370' in field:
|
|
subfields = field['370'].get('subfields', [])
|
|
for subfield in subfields:
|
|
if '$e' in subfield:
|
|
city = subfield['$e']
|
|
break
|
|
|
|
return ISILRecord(
|
|
isil_code=isil_code,
|
|
institution_name=institution_name or "Unknown",
|
|
city=city,
|
|
marc_record=marc_record
|
|
)
|
|
|
|
except Exception as e:
|
|
print(f"⚠️ Error extracting ISIL from MARC: {e}")
|
|
return None
|
|
|
|
|
|
def install_instructions():
|
|
"""Print installation instructions for Z39.50 libraries."""
|
|
print("""
|
|
================================================================================
|
|
Z39.50 LIBRARY INSTALLATION REQUIRED
|
|
================================================================================
|
|
|
|
To use this script, you need a Z39.50 library. We recommend PyZ3950:
|
|
|
|
OPTION 1: PyZ3950 (Python-native, easier)
|
|
------------------------------------------
|
|
pip install PyZ3950
|
|
|
|
OPTION 2: YAZ + Python bindings (more robust)
|
|
----------------------------------------------
|
|
# macOS (via Homebrew)
|
|
brew install yaz
|
|
pip install pymarc yaz
|
|
|
|
# Ubuntu/Debian
|
|
sudo apt-get install yaz libyaz-dev
|
|
pip install pymarc yaz
|
|
|
|
OPTION 3: Use existing tools (command-line)
|
|
--------------------------------------------
|
|
# Install YAZ command-line tools
|
|
brew install yaz # macOS
|
|
|
|
# Query Biblioteca Nacional
|
|
yaz-client 200.123.191.9:9991/BNA10
|
|
|
|
# In yaz-client interactive shell:
|
|
open 200.123.191.9:9991/BNA10
|
|
find @attr 1=1003 biblioteca
|
|
show 1
|
|
|
|
================================================================================
|
|
ALTERNATIVE: Manual Web Interface
|
|
================================================================================
|
|
|
|
The Biblioteca Nacional may have a web-based catalog (OPAC) that can be
|
|
scraped as an alternative to Z39.50. Check:
|
|
|
|
https://catalogo.bn.gov.ar/
|
|
|
|
If available, we can create a web scraper instead of using Z39.50.
|
|
|
|
================================================================================
|
|
""")
|
|
|
|
|
|
def test_connection():
|
|
"""Test connection to Biblioteca Nacional Z39.50 server."""
|
|
config = Z3950Config()
|
|
client = SimpleZ3950Client(config)
|
|
|
|
print("================================================================================")
|
|
print("TESTING: Biblioteca Nacional Z39.50 Server")
|
|
print("================================================================================")
|
|
print(f"Host: {config.host}")
|
|
print(f"Port: {config.port}")
|
|
print(f"Database: {config.database}")
|
|
print()
|
|
|
|
if client.connect():
|
|
print("✅ Connection successful!")
|
|
print()
|
|
print("⚠️ However, full Z39.50 protocol implementation requires PyZ3950 library.")
|
|
print(" Run this script with --install-help for installation instructions.")
|
|
client.disconnect()
|
|
return True
|
|
else:
|
|
print("❌ Connection failed.")
|
|
print()
|
|
print("Possible issues:")
|
|
print(" 1. Server may be temporarily down")
|
|
print(" 2. Firewall blocking outbound connections")
|
|
print(" 3. Server IP/port changed")
|
|
print()
|
|
print("Alternative: Check if Biblioteca Nacional has a web catalog (OPAC)")
|
|
return False
|
|
|
|
|
|
def main():
|
|
"""Main execution flow."""
|
|
import sys
|
|
|
|
if "--install-help" in sys.argv:
|
|
install_instructions()
|
|
return
|
|
|
|
if "--test" in sys.argv:
|
|
test_connection()
|
|
return
|
|
|
|
print("""
|
|
================================================================================
|
|
BIBLIOTECA NACIONAL Z39.50 ISIL EXTRACTOR
|
|
================================================================================
|
|
|
|
This script extracts ISIL codes from Biblioteca Nacional's Z39.50 server.
|
|
|
|
USAGE:
|
|
python3 query_biblioteca_nacional_z3950.py --test # Test connection
|
|
python3 query_biblioteca_nacional_z3950.py --install-help # Installation guide
|
|
|
|
STATUS:
|
|
⚠️ Z39.50 library (PyZ3950) not yet installed.
|
|
|
|
This script provides the framework. To use it:
|
|
1. Install PyZ3950: pip install PyZ3950
|
|
2. Implement full Z39.50 search/retrieve protocol
|
|
3. Parse MARC records to extract ISIL codes
|
|
|
|
ALTERNATIVE APPROACHES:
|
|
1. Use YAZ command-line tools (yaz-client) to query manually
|
|
2. Check if Biblioteca Nacional has a web catalog (OPAC) to scrape
|
|
3. Contact Biblioteca Nacional directly for ISIL registry export
|
|
|
|
For investigation details, see:
|
|
data/isil/AR/ARGENTINA_ISIL_INVESTIGATION.md
|
|
|
|
================================================================================
|
|
""")
|
|
|
|
# Test connection
|
|
test_connection()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|