Files
crm.clientright.ru/aiassist/function.php

181 lines
6.9 KiB
PHP
Executable File

<?php
// aiassist/search.php
if (!defined('ELASTIC_URL')) {
define('ELASTIC_URL', 'http://localhost:9200');
}
$logFile = __DIR__ . "/logs/search1.log";
$resultsDir = __DIR__ . "/logs/results/";
// Создаем папку для результатов, если её нет
if (!is_dir($resultsDir)) {
mkdir($resultsDir, 0777, true);
}
/**
* Функция нормализации эмбеддинга
*/
function normalizeEmbedding($vector) {
$magnitude = sqrt(array_sum(array_map(function ($x) { return $x * $x; }, $vector)));
return $magnitude > 0 ? array_map(function ($x) use ($magnitude) { return $x / $magnitude; }, $vector) : $vector;
}
/**
* Функция поиска похожих дел
*/
function searchSimilarCases($queryParams, $size = 5) {
global $logFile, $resultsDir;
if (empty($queryParams['facts']) || empty($queryParams['category'])) {
file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] ❌ Ошибка: Недостаточно данных для поиска!\n", FILE_APPEND);
return [];
}
// Получаем параметры из теста
$fuzzinessLevel = $_GET['fuzziness'] ?? "AUTO";
$yearFrom = (int) ($_GET['year_from'] ?? 2010);
$amountFactor = (int) ($_GET['amount_factor'] ?? 30);
// Фильтр по сумме
$amount = isset($queryParams['amount']) && is_numeric($queryParams['amount']) ? floatval($queryParams['amount']) : null;
// Нормализация эмбеддингов
$vector2048 = isset($queryParams['embedding_2048']) ? $queryParams['embedding_2048'] : [];
$normalizedEmbedding2048 = is_array($vector2048) && !empty($vector2048) ? normalizeEmbedding($vector2048) : null;
$vector1024 = isset($queryParams['embedding_1024']) ? $queryParams['embedding_1024'] : [];
$normalizedEmbedding1024 = is_array($vector1024) && !empty($vector1024) ? normalizeEmbedding($vector1024) : null;
// Формируем Elasticsearch-запрос
$query = [
"size" => $size,
"query" => [
"bool" => [
"should" => [
[
"multi_match" => [
"query" => $queryParams['facts'],
"fields" => ["court_decision", "law_articles"],
"fuzziness" => $fuzzinessLevel
]
]
],
"filter" => [
["match" => ["case_category_text" => ["query" => $queryParams['category'], "fuzziness" => "AUTO"]]],
["exists" => ["field" => "court_decision"]],
["exists" => ["field" => "law_articles"]],
["range" => ["case_year" => ["gte" => $yearFrom]]]
]
]
]
];
// Добавляем векторный поиск
if ($normalizedEmbedding2048 !== null) {
$query["query"]["bool"]["should"][] = [
"script_score" => [
"query" => ["match_all" => new stdClass()],
"script" => [
"source" => "cosineSimilarity(params.query_vector, 'embedding_2048') + 1.0",
"params" => ["query_vector" => $normalizedEmbedding2048]
]
]
];
}
if ($normalizedEmbedding1024 !== null) {
$query["query"]["bool"]["should"][] = [
"script_score" => [
"query" => ["match_all" => new stdClass()],
"script" => [
"source" => "cosineSimilarity(params.query_vector, 'embedding_1024') + 1.0",
"params" => ["query_vector" => $normalizedEmbedding1024]
]
]
];
}
// Фильтр по сумме иска
if ($amount !== null && $amount > 0) {
$query["query"]["bool"]["filter"][] = [
"range" => [
"requested_amount" => [
"gte" => $amount * (1 - ($amountFactor / 100)),
"lte" => $amount * (1 + ($amountFactor / 100))
]
]
];
}
file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] 🔎 Запрос в Elasticsearch: " . json_encode($query, JSON_UNESCAPED_UNICODE) . "\n", FILE_APPEND);
try {
$response = searchIndex("legal_cases", $query);
if (!isset($response['hits']['hits']) || empty($response['hits']['hits'])) {
file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] ❌ Elasticsearch вернул 0 результатов!\n", FILE_APPEND);
return [];
}
$results = [];
$output = "🔍 **Результаты поиска**\nДата: " . date("Y-m-d H:i:s") . "\n\n";
foreach ($response['hits']['hits'] as $index => $doc) {
$source = $doc['_source'];
$caseId = $source['case_id'] ?? 'Неизвестный ID';
$court = $source['court'] ?? 'Неизвестный суд';
$courtDecision = $source['court_decision'] ?? 'Текст решения отсутствует';
$caseUrl = $source['case_url'] ?? 'Нет ссылки';
$output .= "🔹 **Дело ID:** $caseId\n📜 **Решение суда:**\n" . mb_substr($courtDecision, 0, 500) . "...\n🔗 URL: $caseUrl\n\n---------------------\n";
$results[] = [
'case_id' => $caseId,
'court' => $court,
'court_decision' => $courtDecision,
'case_url' => $caseUrl
];
}
// Генерируем уникальный лог-файл для каждого запроса
$fileName = $resultsDir . "search_results_" . date("Ymd_His") . ".txt";
file_put_contents($fileName, $output);
file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] ✅ Найдено " . count($results) . " документов. Итог сохранен в `$fileName`.\n", FILE_APPEND);
return $results;
} catch (Exception $e) {
file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] ❌ Ошибка Elasticsearch: " . $e->getMessage() . "\n", FILE_APPEND);
return [];
}
}
/**
* Запрос в Elasticsearch
*/
function searchIndex($index, $query) {
$elasticsearchUrl = "http://localhost:9200/$index/_search";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $elasticsearchUrl);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "POST");
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($query));
curl_setopt($ch, CURLOPT_HTTPHEADER, [
'Content-Type: application/json'
]);
$response = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
if ($httpCode !== 200) {
throw new Exception("Ошибка Elasticsearch. Код HTTP: $httpCode, Ответ: $response");
}
return json_decode($response, true);
}
?>