127 lines
3.9 KiB
YAML
127 lines
3.9 KiB
YAML
# GoogleMapsPlaywrightEnrichment - Google Maps data extracted via Playwright browser automation
|
|
# Extracted from custodian_source.yaml per Rule 38 (modular schema files)
|
|
# Extraction date: 2026-01-08
|
|
|
|
id: https://nde.nl/ontology/hc/classes/GoogleMapsPlaywrightEnrichment
|
|
name: GoogleMapsPlaywrightEnrichment
|
|
title: GoogleMapsPlaywrightEnrichment
|
|
|
|
prefixes:
|
|
linkml: https://w3id.org/linkml/
|
|
hc: https://nde.nl/ontology/hc/
|
|
schema: http://schema.org/
|
|
prov: http://www.w3.org/ns/prov#
|
|
xsd: http://www.w3.org/2001/XMLSchema#
|
|
|
|
imports:
|
|
- linkml:types
|
|
|
|
|
|
- ./GoogleReview
|
|
- ./OpeningHours
|
|
- ./PhotoMetadata
|
|
- ./RelatedPlace
|
|
- ./ReviewBreakdown
|
|
- ./ReviewTopics
|
|
default_range: string
|
|
|
|
classes:
|
|
GoogleMapsPlaywrightEnrichment:
|
|
description: >-
|
|
Google Maps data extracted via Playwright browser automation including
|
|
place details, ratings, reviews, opening hours, photos, and popular times.
|
|
|
|
Ontology mapping rationale:
|
|
- class_uri is prov:Entity because this represents enrichment data
|
|
(an entity) derived from Google Maps via automated extraction
|
|
- close_mappings includes schema:Place for location/place semantics
|
|
- related_mappings includes schema:LocalBusiness for business attributes
|
|
class_uri: prov:Entity
|
|
close_mappings:
|
|
- schema:Place
|
|
related_mappings:
|
|
- schema:LocalBusiness
|
|
attributes:
|
|
scrape_timestamp:
|
|
range: datetime
|
|
description: When the data was scraped
|
|
scrape_method:
|
|
range: string
|
|
description: Method used (playwright)
|
|
source_url:
|
|
range: uri
|
|
description: Google Maps URL that was scraped
|
|
name:
|
|
range: string
|
|
description: Place name
|
|
address:
|
|
range: string
|
|
description: Full address
|
|
plus_code:
|
|
range: string
|
|
description: Google Plus Code
|
|
phone:
|
|
range: string
|
|
description: Phone number
|
|
website:
|
|
slot_uri: hc:hasWebsite
|
|
range: uri # FIXED 2026-01-16: was string, changed to uri for consistency
|
|
description: Website URL (may be domain only)
|
|
close_mappings:
|
|
- schema:url
|
|
rating:
|
|
range: float
|
|
description: Average rating (0-5)
|
|
total_reviews:
|
|
range: integer
|
|
description: Total number of reviews
|
|
review_breakdown:
|
|
range: ReviewBreakdown
|
|
description: Review count by star rating
|
|
hours_text:
|
|
range: string
|
|
description: Human-readable hours text
|
|
is_open_now:
|
|
range: boolean
|
|
description: Whether currently open
|
|
review_topics:
|
|
range: ReviewTopics
|
|
description: Review topic mentions (dict of topic -> count)
|
|
related_places:
|
|
range: RelatedPlace
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
description: Nearby related places
|
|
at_this_place:
|
|
range: RelatedPlace
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
description: Places located at this venue
|
|
live_busyness_percent:
|
|
range: integer
|
|
description: Current busyness percentage
|
|
typical_busyness_percent:
|
|
range: integer
|
|
description: Typical busyness at this time
|
|
place_description:
|
|
range: string
|
|
description: Place description from Google
|
|
fetch_timestamp:
|
|
range: datetime
|
|
place_id:
|
|
range: string
|
|
total_ratings:
|
|
range: integer
|
|
reviews:
|
|
range: GoogleReview
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
photos:
|
|
range: PhotoMetadata
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
opening_hour:
|
|
range: OpeningHours
|
|
popular_times:
|
|
range: string
|
|
description: Popular times data as JSON string
|