143 lines
5.8 KiB
Python
143 lines
5.8 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Test the Google Maps snapshot parser with real data from Rijksmuseum.
|
|
|
|
This script demonstrates parsing the Playwright accessibility snapshot
|
|
to extract structured Google Maps data.
|
|
"""
|
|
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Add scripts to path for imports
|
|
sys.path.insert(0, str(Path(__file__).parent))
|
|
|
|
from scrape_google_maps_reviews import (
|
|
parse_snapshot_for_reviews,
|
|
to_json_serializable,
|
|
GoogleMapsScrapedData,
|
|
ReviewBreakdown,
|
|
RelatedPlace,
|
|
ReviewData,
|
|
)
|
|
|
|
# Sample snapshot data extracted from Rijksmuseum Google Maps page
|
|
# This is a subset of the real accessibility tree for testing
|
|
SAMPLE_SNAPSHOT = """
|
|
- Page URL: https://www.google.com/maps/place/Rijksmuseum/@52.3599976,4.8852188,17z/data=!3m1!4b1!4m6!3m5!1s0x47c609eec1bb16e5:0xd54373ae6a408585!8m2!3d52.3599976!4d4.8852188!16zL20vMDZqc2Y?entry=ttu
|
|
- Page Title: Rijksmuseum - Google Maps
|
|
- heading "Rijksmuseum" [level=1] [ref=e131]: Rijksmuseum
|
|
- img "4.7 stars" [ref=e137]
|
|
- img "108,805 reviews" [ref=e145]: (108,805)
|
|
- button "Art museum" [ref=e159]
|
|
- img "5 stars, 81,221 reviews" [ref=e668]: "5"
|
|
- img "4 stars, 21,263 reviews" [ref=e671]: "4"
|
|
- img "3 stars, 4,356 reviews" [ref=e674]: "3"
|
|
- img "2 stars, 907 reviews" [ref=e677]: "2"
|
|
- img "1 stars, 1,058 reviews" [ref=e680]: "1"
|
|
- button "Address: Museumstraat 1, 1071 XX Amsterdam" [ref=e290]
|
|
- button "Open · Closes 5 pm · See more hours" [ref=e302]
|
|
- link "Website: rijksmuseum.nl" [ref=e313]
|
|
- button "Phone: 020 674 7000" [ref=e329]
|
|
- button "Plus code: 9V5P+X3 Amsterdam" [ref=e345]
|
|
- img "Currently 28% busy, usually 41% busy." [ref=e487]
|
|
- radio "rembrandt, mentioned in 2,834 reviews" [ref=e752]
|
|
- radio "the night watch, mentioned in 1,846 reviews" [ref=e757]
|
|
- radio "johannes vermeer, mentioned in 1,359 reviews" [ref=e762]
|
|
- radio "history of the netherlands, mentioned in 527 reviews" [ref=e767]
|
|
- link "Van Gogh Museum · 4.6 stars · 101,806 reviews · Art museum" [ref=e923]
|
|
- link "Rembrandt House Museum · 4.5 stars · 14,408 reviews · Art museum" [ref=e935]
|
|
- link "Anne Frank House · 4.5 stars · 73,577 reviews · Museum" [ref=e947]
|
|
- link "Stedelijk Museum Amsterdam · 4.4 stars · 14,860 reviews · Art museum" [ref=e959]
|
|
- generic "Ariel Jewel" [ref=e775]:
|
|
- button "Ariel Jewel Local Guide · 199 reviews · 682 photos" [ref=e782]
|
|
- img "5 stars" [ref=e790]
|
|
- generic [ref=e796]: a week ago
|
|
- text: The only way I can describe this museum aptly is by comparing it the Louvre. It was so much better than the Louvre in every sense of the word.
|
|
- generic "Kevin Bourke" [ref=e822]:
|
|
- button "Kevin Bourke Local Guide · 16 reviews · 15 photos" [ref=e829]
|
|
- img "4 stars" [ref=e837]
|
|
- generic [ref=e843]: a month ago
|
|
- text: Stunning setting on par with the Louvre; goldmine of Dutch Masters but also extensive collections of porcelain, china, silverware, jewelry, firearms, nautical models and other items.
|
|
- generic "Vidya H" [ref=e866]:
|
|
- button "Vidya H Local Guide · 258 reviews · 1,274 photos" [ref=e873]
|
|
- img "5 stars" [ref=e881]
|
|
- generic [ref=e887]: a month ago
|
|
- text: The Rijksmuseum is truly massive — a treasure trove filled with everything from exquisite porcelain and jewelry to world-class paintings and sculptures.
|
|
"""
|
|
|
|
|
|
def main():
|
|
print("=" * 70)
|
|
print("Testing Google Maps Snapshot Parser")
|
|
print("=" * 70)
|
|
|
|
# Parse the sample snapshot
|
|
data = parse_snapshot_for_reviews(SAMPLE_SNAPSHOT)
|
|
|
|
# Convert to JSON
|
|
result = to_json_serializable(data)
|
|
|
|
# Print results
|
|
print("\n📍 Place Information:")
|
|
print(f" Name: {data.name}")
|
|
print(f" Rating: {data.rating} ⭐")
|
|
print(f" Total Reviews: {data.total_reviews:,}")
|
|
print(f" Address: {data.address}")
|
|
print(f" Phone: {data.phone}")
|
|
print(f" Website: {data.website}")
|
|
print(f" Plus Code: {data.plus_code}")
|
|
print(f" Hours: {data.hours_text}")
|
|
print(f" Open Now: {data.is_open_now}")
|
|
|
|
if data.latitude and data.longitude:
|
|
print(f" Coordinates: {data.latitude}, {data.longitude}")
|
|
|
|
print("\n📊 Review Breakdown:")
|
|
if data.review_breakdown:
|
|
print(f" 5 stars: {data.review_breakdown.five_star:,}")
|
|
print(f" 4 stars: {data.review_breakdown.four_star:,}")
|
|
print(f" 3 stars: {data.review_breakdown.three_star:,}")
|
|
print(f" 2 stars: {data.review_breakdown.two_star:,}")
|
|
print(f" 1 star: {data.review_breakdown.one_star:,}")
|
|
|
|
print("\n💬 Review Topics:")
|
|
for topic, count in sorted(data.review_topics.items(), key=lambda x: -x[1]):
|
|
print(f" {topic}: {count:,}")
|
|
|
|
print("\n📈 Live Busyness:")
|
|
if data.live_busyness:
|
|
print(f" Currently {data.live_busyness}% busy")
|
|
|
|
print("\n🔗 Related Places:")
|
|
for place in data.related_places:
|
|
print(f" {place.name}: {place.rating}⭐ ({place.review_count:,} reviews) - {place.place_type}")
|
|
|
|
print("\n📝 Sample Reviews:")
|
|
for i, review in enumerate(data.reviews[:3]):
|
|
print(f"\n Review {i+1}:")
|
|
print(f" Author: {review.author_name}")
|
|
print(f" Rating: {review.rating}⭐")
|
|
print(f" Time: {review.relative_time}")
|
|
print(f" Local Guide: {review.local_guide}")
|
|
if review.text:
|
|
print(f" Text: {review.text[:100]}...")
|
|
|
|
print("\n" + "=" * 70)
|
|
print("Full JSON Output:")
|
|
print("=" * 70)
|
|
print(json.dumps(result, indent=2, ensure_ascii=False))
|
|
|
|
# Save to file
|
|
output_path = Path(__file__).parent.parent / "data" / "test" / "rijksmuseum_google_maps.json"
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
json.dump(result, f, indent=2, ensure_ascii=False)
|
|
print(f"\n✅ Saved to: {output_path}")
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|