Files
hotels/check_graphiti_data.py
Фёдор 0cf3297290 Проект аудита отелей: основные скрипты и документация
- Краулеры: smart_crawler.py, regional_crawler.py
- Аудит: audit_orel_to_excel.py, audit_chukotka_to_excel.py
- РКН проверка: check_rkn_registry.py, recheck_unclear_rkn.py
- Отчёты: create_orel_horizontal_report.py
- Обработка: process_all_hotels_embeddings.py
- Документация: README.md, DB_SCHEMA_REFERENCE.md
2025-10-16 10:52:09 +03:00

78 lines
2.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
from neo4j import GraphDatabase
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "supersecret"
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
with driver.session() as session:
print("=" * 70)
print("🔍 ПРОВЕРКА ДАННЫХ В NEO4J (group_id='hotel_spb')")
print("=" * 70)
# Проверяем эпизоды
result = session.run("""
MATCH (e:Episode)
WHERE e.group_id = 'hotel_spb'
RETURN count(e) AS episode_count
""")
episode_count = result.single()["episode_count"]
print(f"\n📄 Эпизодов в hotel_spb: {episode_count}")
if episode_count > 0:
# Примеры эпизодов
result = session.run("""
MATCH (e:Episode)
WHERE e.group_id = 'hotel_spb'
RETURN e.name AS name, e.content AS content,
size(e.embedding) AS emb_size
LIMIT 3
""")
print(f"\n🔍 Примеры эпизодов:")
for r in result:
print(f" Name: {r['name']}")
print(f" Embedding: {r['emb_size']} размерность")
print(f" Content: {r['content'][:120]}...")
print()
# Сущности
result = session.run("""
MATCH (e:Entity)
WHERE e.group_id = 'hotel_spb'
RETURN count(e) AS count
""")
entities = result.single()["count"]
print(f"🏷️ Сущностей: {entities}")
# Рёбра
result = session.run("""
MATCH ()-[r]->()
WHERE r.group_id = 'hotel_spb'
RETURN count(r) AS count
""")
edges = result.single()["count"]
print(f"🔗 Рёбер: {edges}")
else:
print("\n❌ Данных НЕТ!")
print(" Возможно данные загружались с другим group_id")
# Поищем недавно созданные эпизоды
result = session.run("""
MATCH (e:Episode)
WHERE e.created_at > datetime() - duration('PT10M')
RETURN e.group_id AS group_id, count(e) AS count
""")
print("\n Эпизоды созданные за последние 10 минут:")
for r in result:
print(f" group_id='{r['group_id']}': {r['count']} эпизодов")
print("\n" + "=" * 70)
driver.close()