#!/usr/bin/env python3 """ Векторизация ТОЛЬКО Камчатского края (осталось 7 отелей) """ import sys sys.path.insert(0, '/root/engine/public_oversight/hotels') from process_all_hotels_embeddings import EmbeddingProcessor import logging # Настройка логирования logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler('kamchatka_embeddings.log'), logging.StreamHandler() ] ) logger = logging.getLogger(__name__) def main(): logger.info("🚀 Векторизация Камчатского края") processor = EmbeddingProcessor() try: # Получаем отели Камчатки без chunks processor.cur.execute(""" SELECT DISTINCT p.hotel_id, m.full_name FROM hotel_website_processed p INNER JOIN hotel_main m ON p.hotel_id = m.id LEFT JOIN hotel_website_chunks c ON p.hotel_id::text = c.metadata->>'hotel_id' WHERE m.region_name = 'Камчатский край' AND p.cleaned_text IS NOT NULL AND LENGTH(p.cleaned_text) > 50 AND c.id IS NULL ORDER BY m.full_name """) hotels = processor.cur.fetchall() logger.info(f"📊 Найдено отелей Камчатки без chunks: {len(hotels)}") if not hotels: logger.info("✅ Все отели Камчатки уже обработаны!") return # Обрабатываем successful = 0 failed = 0 for i, (hotel_id, hotel_name) in enumerate(hotels, 1): logger.info(f"\n[{i}/{len(hotels)}] 🏨 {hotel_name}") logger.info(f" ID: {hotel_id}") if processor.process_hotel(hotel_id): successful += 1 logger.info(f" ✅ Успешно") else: failed += 1 logger.error(f" ❌ Ошибка") logger.info(f"\n🎉 ЗАВЕРШЕНО!") logger.info(f" ✅ Успешно: {successful}") logger.info(f" ❌ Ошибок: {failed}") except Exception as e: logger.error(f"❌ Критическая ошибка: {e}") finally: processor.close() if __name__ == "__main__": main()