Проект аудита отелей: основные скрипты и документация
- Краулеры: smart_crawler.py, regional_crawler.py - Аудит: audit_orel_to_excel.py, audit_chukotka_to_excel.py - РКН проверка: check_rkn_registry.py, recheck_unclear_rkn.py - Отчёты: create_orel_horizontal_report.py - Обработка: process_all_hotels_embeddings.py - Документация: README.md, DB_SCHEMA_REFERENCE.md
This commit is contained in:
50
check_progress.sh
Executable file
50
check_progress.sh
Executable file
@@ -0,0 +1,50 @@
|
||||
#!/bin/bash
|
||||
echo "📊 СТАТУС ФОНОВЫХ ПРОЦЕССОВ"
|
||||
echo "============================================================"
|
||||
echo ""
|
||||
|
||||
# Проверяем процессы
|
||||
echo "🔍 Активные процессы:"
|
||||
ps aux | grep -E "smart_crawler|process_all_hotels_embeddings" | grep -v grep | awk '{print " PID: "$2" - "$11" "$12" "$13}'
|
||||
|
||||
echo ""
|
||||
echo "📝 Последние логи краулера:"
|
||||
tail -5 smart_crawler_output_*.log 2>/dev/null | grep -E "INFO|ERROR" | tail -3
|
||||
|
||||
echo ""
|
||||
echo "📝 Последние логи чанкинизации:"
|
||||
tail -5 embeddings_processing_*.log 2>/dev/null | grep -E "INFO|ERROR|отелей|chunks" | tail -3
|
||||
|
||||
echo ""
|
||||
echo "📈 Статистика из БД:"
|
||||
python3 << 'PYEOF'
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
from urllib.parse import unquote
|
||||
|
||||
DB_CONFIG = {
|
||||
'host': '147.45.189.234',
|
||||
'port': 5432,
|
||||
'database': 'default_db',
|
||||
'user': 'gen_user',
|
||||
'password': unquote('2~~9_%5EkVsU%3F2%5CS')
|
||||
}
|
||||
|
||||
conn = psycopg2.connect(**DB_CONFIG, cursor_factory=RealDictCursor)
|
||||
cur = conn.cursor()
|
||||
|
||||
cur.execute("SELECT COUNT(DISTINCT hotel_id) as count FROM hotel_website_raw")
|
||||
crawled = cur.fetchone()['count']
|
||||
|
||||
cur.execute("SELECT COUNT(DISTINCT metadata->>'hotel_id') as count FROM hotel_website_chunks WHERE metadata->>'hotel_id' IS NOT NULL")
|
||||
chunked = cur.fetchone()['count']
|
||||
|
||||
cur.execute("SELECT COUNT(*) as count FROM hotel_website_chunks")
|
||||
total_chunks = cur.fetchone()['count']
|
||||
|
||||
print(f" 🕷️ Краулинг: {crawled:,} отелей")
|
||||
print(f" 📦 Chunks: {chunked:,} отелей ({total_chunks:,} chunks)")
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
PYEOF
|
||||
Reference in New Issue
Block a user