"""
OCR Service - Распознавание документов + AI проверка
"""
import httpx
import logging
from typing import Optional, Dict, Any
from ..config import settings
import json
from .s3_service import s3_service

logger = logging.getLogger(__name__)


class OCRService:
    """Сервис для OCR и AI анализа документов"""
    
    def __init__(self):
        self.ocr_url = settings.ocr_api_url
        self.ai_api_key = settings.openrouter_api_key
        self.ai_model = settings.openrouter_model
    
    async def process_document(self, file_content: bytes, filename: str) -> Dict[str, Any]:
        """
        Обработка документа: OCR + AI анализ
        
        Args:
            file_content: Содержимое файла
            filename: Имя файла
        
        Returns:
            Dict с результатами OCR и AI анализа
        """
        result = {
            "ocr_text": "",
            "ai_analysis": None,
            "document_type": "unknown",  # policy, passport, ticket, other, garbage
            "is_valid": False,
            "confidence": 0.0,
            "extracted_data": {}
        }
        
        try:
            # Шаг 0: Загружаем файл в S3 и получаем presigned URL
            logger.info(f"📤 Uploading file to S3: {filename}")
            
            # Определяем content_type
            content_type = "image/jpeg"
            if filename.lower().endswith('.pdf'):
                content_type = "application/pdf"
            elif filename.lower().endswith('.png'):
                content_type = "image/png"
            elif filename.lower().endswith(('.heic', '.heif')):
                content_type = "image/heic"
            
            # Загружаем в S3
            s3_url = await s3_service.upload_file(
                file_content=file_content,
                filename=filename,
                content_type=content_type,
                folder="ocr_temp"
            )
            
            if not s3_url:
                logger.error("❌ Failed to upload file to S3")
                return result
            
            # Используем простой публичный URL
            # Файлы в ocr_temp/ загружаются с ACL=public-read
            ocr_file_url = s3_url  # Уже публичный URL!
            
            logger.info(f"✅ File uploaded to S3, using public URL for OCR")
            
            # Шаг 1: OCR распознавание текста через URL
            logger.info(f"🔍 Starting OCR for: {filename}")
            
            # Определяем file_type по расширению (OCR API требует строку!)
            file_ext = filename.lower().split('.')[-1]
            file_type_map = {
                'pdf': 'pdf',
                'jpg': 'jpeg',
                'jpeg': 'jpeg',
                'png': 'png',
                'heic': 'heic',
                'heif': 'heic',
                'docx': 'docx',
                'doc': 'doc'
            }
            file_type = file_type_map.get(file_ext, 'pdf')  # По умолчанию pdf
            
            logger.info(f"📄 File type detected: {file_type}")
            
            async with httpx.AsyncClient(timeout=90.0) as client:
                # OCR API ожидает JSON с file_url
                response = await client.post(
                    f"{self.ocr_url}/analyze-file",
                    json={
                        "file_url": ocr_file_url,  # Публичный URL
                        "file_name": filename,
                        "file_type": file_type  # ✅ Теперь строка, не None!
                    }
                )
                
                if response.status_code == 200:
                    ocr_result = response.json()
                    
                    # OCR API возвращает массив: [{text: "", pages_data: [...]}]
                    ocr_text = ""
                    
                    if isinstance(ocr_result, list) and len(ocr_result) > 0:
                        data = ocr_result[0]
                        
                        # Пробуем извлечь текст из pages_data
                        if "pages_data" in data and len(data["pages_data"]) > 0:
                            # Собираем текст со всех страниц
                            texts = []
                            for page in data["pages_data"]:
                                page_text = page.get("ocr_text", "")
                                if page_text:
                                    texts.append(page_text)
                            ocr_text = "\n\n".join(texts)
                        
                        # Если нет pages_data, пробуем text или full_text
                        if not ocr_text:
                            ocr_text = data.get("text", "") or data.get("full_text", "")
                    
                    elif isinstance(ocr_result, dict):
                        # Старый формат (на всякий случай)
                        ocr_text = ocr_result.get("text", "") or ocr_result.get("full_text", "")
                    
                    result["ocr_text"] = ocr_text
                    
                    logger.info(f"📄 OCR completed: {len(ocr_text)} chars")
                    if ocr_text:
                        logger.info(f"OCR Text preview: {ocr_text[:200]}...")
                    else:
                        logger.warning("⚠️ OCR returned empty text!")
                        logger.debug(f"OCR response structure: {list(ocr_result.keys()) if isinstance(ocr_result, dict) else type(ocr_result)}")
                else:
                    logger.error(f"❌ OCR failed: {response.status_code}")
                    logger.error(f"Response: {response.text[:500]}")
                    return result
            
            # Шаг 2: AI анализ - что это за документ?
            logger.info(f"🤖 Starting AI analysis with {self.ai_model}")
            
            ai_analysis = await self._analyze_with_vision(ocr_text)
            result["ai_analysis"] = ai_analysis
            
            if ai_analysis:
                result["document_type"] = ai_analysis.get("document_type", "unknown")
                result["is_valid"] = ai_analysis.get("is_valid_policy", False)
                result["confidence"] = ai_analysis.get("confidence", 0.0)
                result["extracted_data"] = ai_analysis.get("extracted_data", {})
                
                # Логируем результат
                logger.info(f"✅ AI Analysis complete:")
                logger.info(f"   Document type: {result['document_type']}")
                logger.info(f"   Valid policy: {result['is_valid']}")
                logger.info(f"   Confidence: {result['confidence']}")
                
                if result['document_type'] == 'garbage':
                    logger.warning(f"⚠️ GARBAGE DETECTED: {filename} - not a policy document!")
                elif result['document_type'] == 'policy':
                    logger.info(f"✅ VALID POLICY: {filename}")
                    if result['extracted_data']:
                        logger.info(f"   Extracted: {json.dumps(result['extracted_data'], ensure_ascii=False)}")
        
        except Exception as e:
            logger.error(f"❌ OCR/AI processing error: {e}")
        
        return result
    
    async def _analyze_with_vision(self, ocr_text: str) -> Optional[Dict[str, Any]]:
        """
        Анализ через Gemini Vision
        
        Проверяет:
        - Это полис или нет?
        - Извлекает данные полиса
        """
        try:
            prompt = f"""Проанализируй этот текст из OCR документа.

Текст: {ocr_text}

Задачи:
1. Определи тип документа: policy (страховой полис), passport, ticket, other, garbage (не документ)
2. Если это полис - извлеки данные:
   - voucher (номер полиса вида E1000-302538524)
   - holder_name (ФИО держателя)
   - insured_from (дата начала)
   - insured_to (дата окончания)
   - destination (страна/регион)
3. Оцени confidence (0.0-1.0) насколько уверен
4. is_valid_policy: true если это реальный страховой полис

Ответь ТОЛЬКО в формате JSON:
{{
  "document_type": "policy|passport|ticket|other|garbage",
  "is_valid_policy": true/false,
  "confidence": 0.95,
  "extracted_data": {{
    "voucher": "E1000-302538524",
    "holder_name": "...",
    "insured_from": "DD.MM.YYYY",
    "insured_to": "DD.MM.YYYY",
    "destination": "..."
  }}
}}"""

            async with httpx.AsyncClient(timeout=30.0) as client:
                response = await client.post(
                    "https://openrouter.ai/api/v1/chat/completions",
                    headers={
                        "Authorization": f"Bearer {self.ai_api_key}",
                        "HTTP-Referer": settings.backend_url,
                        "Content-Type": "application/json"
                    },
                    json={
                        "model": self.ai_model,
                        "messages": [
                            {
                                "role": "user",
                                "content": prompt
                            }
                        ],
                        "temperature": 0.1,
                        "max_tokens": 500
                    }
                )
                
                if response.status_code == 200:
                    ai_response = response.json()
                    content = ai_response["choices"][0]["message"]["content"]
                    
                    # Парсим JSON из ответа
                    # Убираем markdown если есть
                    if "```json" in content:
                        content = content.split("```json")[1].split("```")[0]
                    elif "```" in content:
                        content = content.split("```")[1].split("```")[0]
                    
                    analysis = json.loads(content.strip())
                    return analysis
                else:
                    logger.error(f"❌ AI API error: {response.status_code}")
                    return None
        
        except Exception as e:
            logger.error(f"❌ AI analysis error: {e}")
            return None


# Глобальный экземпляр
ocr_service = OCRService()