Files
hotels/test_parser_api.py
Фёдор 684fada337 🚀 Full project sync: Hotels RAG & Audit System
 Major Features:
- Complete RAG system for hotel website analysis
- Hybrid audit with BGE-M3 embeddings + Natasha NER
- Universal horizontal Excel reports with dashboards
- Multi-region processing (SPb, Orel, Chukotka, Kamchatka)

📊 Completed Regions:
- Орловская область: 100% (36/36)
- Чукотский АО: 100% (4/4)
- г. Санкт-Петербург: 93% (893/960)
- Камчатский край: 87% (89/102)

🔧 Infrastructure:
- PostgreSQL with pgvector extension
- BGE-M3 embeddings API
- Browserless for web scraping
- N8N workflows for automation
- S3/Nextcloud file storage

📝 Documentation:
- Complete DB schemas
- API documentation
- Setup guides
- Status reports
2025-10-27 22:49:42 +03:00

113 lines
3.2 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
Тестовый клиент для Universal Parser API
"""
import requests
import json
# Конфигурация
API_URL = "http://localhost:8003"
API_KEY = "parser_2025_secret_key_a8f3d9c1b4e7"
def test_parse(url: str, extract_links: bool = False):
"""Тест парсинга страницы"""
print(""*80)
print(f"🔍 ТЕСТИРУЕМ ПАРСИНГ: {url}")
print(""*80)
print()
headers = {
"X-API-Key": API_KEY,
"Content-Type": "application/json"
}
payload = {
"url": url,
"wait_seconds": 5,
"extract_links": extract_links,
"screenshot": False,
"javascript_enabled": True
}
try:
print("📤 Отправляем запрос...")
response = requests.post(
f"{API_URL}/parse",
headers=headers,
json=payload,
timeout=60
)
if response.status_code == 200:
data = response.json()
print(f"✅ Успех!")
print()
print(f"📊 РЕЗУЛЬТАТЫ:")
print(f" Status Code: {data['status_code']}")
print(f" Title: {data['title']}")
print(f" Текст: {data['text_length']:,} символов")
print(f" Время: {data['parsing_time']}с")
print()
if data['success']:
print("📄 ПРЕВЬЮ КОНТЕНТА:")
print("-" * 80)
print(data['text'][:1000])
print("-" * 80)
if extract_links and data.get('links'):
print()
print(f"🔗 Найдено ссылок: {len(data['links'])}")
for i, link in enumerate(data['links'][:10], 1):
print(f" {i}. {link}")
if len(data['links']) > 10:
print(f" ... и ещё {len(data['links']) - 10}")
else:
print(f"❌ Ошибка: {data.get('error')}")
else:
print(f"❌ HTTP {response.status_code}")
print(response.text)
except Exception as e:
print(f"❌ Ошибка: {e}")
print()
print(""*80)
def test_health():
"""Тест health check"""
print("🏥 Проверка здоровья API...")
response = requests.get(f"{API_URL}/health")
if response.status_code == 200:
data = response.json()
print(f"✅ API работает: {data['status']}")
print(f" Версия: {data['version']}")
else:
print(f"❌ API недоступен")
if __name__ == "__main__":
# Тест 1: Health check
test_health()
print()
# Тест 2: Судебный сайт (с защитой)
test_parse(
"https://mos-sud.ru/312/cases/civil/details/7b8a110a-162d-4493-88b0-e505523c9935?uid=77MS0312-01-2025-002929-35&formType=fullForm",
extract_links=False
)
# Тест 3: Обычный сайт
print()
test_parse("https://example.com", extract_links=True)