fix(ppid): fix unidecode import reference typo
This commit is contained in:
parent
c45367c60f
commit
04791a7a91
1 changed files with 47 additions and 8 deletions
|
|
@ -77,9 +77,9 @@ def normalize_name(name: str) -> str:
|
|||
for c in name
|
||||
)
|
||||
|
||||
if has_non_latin and HAS_UNIDECODE:
|
||||
if has_non_latin and HAS_UNIDECODE and _unidecode is not None:
|
||||
# Use unidecode for Hebrew, Arabic, Chinese, etc.
|
||||
ascii_name = unidecode(name)
|
||||
ascii_name = _unidecode(name)
|
||||
else:
|
||||
# Use NFD decomposition for Latin scripts with diacritics
|
||||
normalized = unicodedata.normalize('NFD', name)
|
||||
|
|
@ -217,9 +217,46 @@ def load_person_entity(filepath: Path) -> Optional[dict]:
|
|||
return None
|
||||
|
||||
|
||||
def get_person_name_both(data: dict) -> tuple[str, str]:
|
||||
"""Extract person name from entity data, returning both original and romanized.
|
||||
|
||||
Returns:
|
||||
tuple: (display_name, original_name)
|
||||
- display_name: romanized/ASCII name for PPID
|
||||
- original_name: original name (may be non-Latin script)
|
||||
"""
|
||||
# Get original name
|
||||
original_name = (
|
||||
data.get('profile_data', {}).get('name') or
|
||||
data.get('source_staff_info', {}).get('name') or
|
||||
data.get('fallback_data', {}).get('name') or
|
||||
''
|
||||
).strip()
|
||||
|
||||
# Get romanized name if available
|
||||
name_romanized = data.get('profile_data', {}).get('name_romanized')
|
||||
if name_romanized:
|
||||
return name_romanized.strip(), original_name
|
||||
|
||||
# Return original name for both if no romanization
|
||||
return original_name, original_name
|
||||
|
||||
|
||||
def get_person_name(data: dict) -> str:
|
||||
"""Extract person name from entity data."""
|
||||
# Try multiple locations
|
||||
"""Extract person name from entity data.
|
||||
|
||||
Priority:
|
||||
1. name_romanized (already transliterated)
|
||||
2. name from profile_data
|
||||
3. name from source_staff_info
|
||||
4. name from fallback_data
|
||||
"""
|
||||
# First try romanized name (for Hebrew, Arabic, etc.)
|
||||
name_romanized = data.get('profile_data', {}).get('name_romanized')
|
||||
if name_romanized:
|
||||
return name_romanized.strip()
|
||||
|
||||
# Try regular name fields
|
||||
name = (
|
||||
data.get('profile_data', {}).get('name') or
|
||||
data.get('source_staff_info', {}).get('name') or
|
||||
|
|
@ -255,7 +292,7 @@ def get_current_location(data: dict) -> Optional[str]:
|
|||
|
||||
def create_ppid_entity(data: dict, ppid: str, source_file: str) -> dict:
|
||||
"""Create a new PPID entity structure from source data."""
|
||||
name = get_person_name(data)
|
||||
display_name, original_name = get_person_name_both(data)
|
||||
|
||||
entity = {
|
||||
"ppid": ppid,
|
||||
|
|
@ -266,11 +303,13 @@ def create_ppid_entity(data: dict, ppid: str, source_file: str) -> dict:
|
|||
"first_date": "XXXX",
|
||||
"last_location": "XX-XX-XXX",
|
||||
"last_date": "XXXX",
|
||||
"name_tokens": extract_name_tokens(name)
|
||||
"name_tokens": extract_name_tokens(display_name)
|
||||
},
|
||||
"name": {
|
||||
"full_name": name,
|
||||
"name_tokens": extract_name_tokens(name),
|
||||
"full_name": original_name,
|
||||
"display_name": display_name,
|
||||
"name_romanized": display_name if display_name != original_name else None,
|
||||
"name_tokens": extract_name_tokens(display_name),
|
||||
"source": "linkedin_profile"
|
||||
},
|
||||
"birth_date": {
|
||||
|
|
|
|||
Loading…
Reference in a new issue