181 lines
6.9 KiB
PHP
Executable File
181 lines
6.9 KiB
PHP
Executable File
<?php
|
|
// aiassist/search.php
|
|
if (!defined('ELASTIC_URL')) {
|
|
define('ELASTIC_URL', 'http://localhost:9200');
|
|
}
|
|
|
|
|
|
$logFile = __DIR__ . "/logs/search1.log";
|
|
$resultsDir = __DIR__ . "/logs/results/";
|
|
|
|
// Создаем папку для результатов, если её нет
|
|
if (!is_dir($resultsDir)) {
|
|
mkdir($resultsDir, 0777, true);
|
|
}
|
|
|
|
/**
|
|
* Функция нормализации эмбеддинга
|
|
*/
|
|
function normalizeEmbedding($vector) {
|
|
$magnitude = sqrt(array_sum(array_map(function ($x) { return $x * $x; }, $vector)));
|
|
return $magnitude > 0 ? array_map(function ($x) use ($magnitude) { return $x / $magnitude; }, $vector) : $vector;
|
|
}
|
|
|
|
/**
|
|
* Функция поиска похожих дел
|
|
*/
|
|
function searchSimilarCases($queryParams, $size = 5) {
|
|
global $logFile, $resultsDir;
|
|
|
|
if (empty($queryParams['facts']) || empty($queryParams['category'])) {
|
|
file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] ❌ Ошибка: Недостаточно данных для поиска!\n", FILE_APPEND);
|
|
return [];
|
|
}
|
|
|
|
// Получаем параметры из теста
|
|
$fuzzinessLevel = $_GET['fuzziness'] ?? "AUTO";
|
|
$yearFrom = (int) ($_GET['year_from'] ?? 2010);
|
|
$amountFactor = (int) ($_GET['amount_factor'] ?? 30);
|
|
|
|
// Фильтр по сумме
|
|
$amount = isset($queryParams['amount']) && is_numeric($queryParams['amount']) ? floatval($queryParams['amount']) : null;
|
|
|
|
// Нормализация эмбеддингов
|
|
$vector2048 = isset($queryParams['embedding_2048']) ? $queryParams['embedding_2048'] : [];
|
|
$normalizedEmbedding2048 = is_array($vector2048) && !empty($vector2048) ? normalizeEmbedding($vector2048) : null;
|
|
|
|
$vector1024 = isset($queryParams['embedding_1024']) ? $queryParams['embedding_1024'] : [];
|
|
$normalizedEmbedding1024 = is_array($vector1024) && !empty($vector1024) ? normalizeEmbedding($vector1024) : null;
|
|
|
|
// Формируем Elasticsearch-запрос
|
|
$query = [
|
|
"size" => $size,
|
|
"query" => [
|
|
"bool" => [
|
|
"should" => [
|
|
[
|
|
"multi_match" => [
|
|
"query" => $queryParams['facts'],
|
|
"fields" => ["court_decision", "law_articles"],
|
|
"fuzziness" => $fuzzinessLevel
|
|
]
|
|
]
|
|
],
|
|
"filter" => [
|
|
["match" => ["case_category_text" => ["query" => $queryParams['category'], "fuzziness" => "AUTO"]]],
|
|
["exists" => ["field" => "court_decision"]],
|
|
["exists" => ["field" => "law_articles"]],
|
|
["range" => ["case_year" => ["gte" => $yearFrom]]]
|
|
]
|
|
]
|
|
]
|
|
];
|
|
|
|
// Добавляем векторный поиск
|
|
if ($normalizedEmbedding2048 !== null) {
|
|
$query["query"]["bool"]["should"][] = [
|
|
"script_score" => [
|
|
"query" => ["match_all" => new stdClass()],
|
|
"script" => [
|
|
"source" => "cosineSimilarity(params.query_vector, 'embedding_2048') + 1.0",
|
|
"params" => ["query_vector" => $normalizedEmbedding2048]
|
|
]
|
|
]
|
|
];
|
|
}
|
|
|
|
if ($normalizedEmbedding1024 !== null) {
|
|
$query["query"]["bool"]["should"][] = [
|
|
"script_score" => [
|
|
"query" => ["match_all" => new stdClass()],
|
|
"script" => [
|
|
"source" => "cosineSimilarity(params.query_vector, 'embedding_1024') + 1.0",
|
|
"params" => ["query_vector" => $normalizedEmbedding1024]
|
|
]
|
|
]
|
|
];
|
|
}
|
|
|
|
// Фильтр по сумме иска
|
|
if ($amount !== null && $amount > 0) {
|
|
$query["query"]["bool"]["filter"][] = [
|
|
"range" => [
|
|
"requested_amount" => [
|
|
"gte" => $amount * (1 - ($amountFactor / 100)),
|
|
"lte" => $amount * (1 + ($amountFactor / 100))
|
|
]
|
|
]
|
|
];
|
|
}
|
|
|
|
file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] 🔎 Запрос в Elasticsearch: " . json_encode($query, JSON_UNESCAPED_UNICODE) . "\n", FILE_APPEND);
|
|
|
|
try {
|
|
$response = searchIndex("legal_cases", $query);
|
|
|
|
if (!isset($response['hits']['hits']) || empty($response['hits']['hits'])) {
|
|
file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] ❌ Elasticsearch вернул 0 результатов!\n", FILE_APPEND);
|
|
return [];
|
|
}
|
|
|
|
$results = [];
|
|
$output = "🔍 **Результаты поиска**\nДата: " . date("Y-m-d H:i:s") . "\n\n";
|
|
|
|
foreach ($response['hits']['hits'] as $index => $doc) {
|
|
$source = $doc['_source'];
|
|
$caseId = $source['case_id'] ?? 'Неизвестный ID';
|
|
$court = $source['court'] ?? 'Неизвестный суд';
|
|
$courtDecision = $source['court_decision'] ?? 'Текст решения отсутствует';
|
|
$caseUrl = $source['case_url'] ?? 'Нет ссылки';
|
|
|
|
$output .= "🔹 **Дело ID:** $caseId\n📜 **Решение суда:**\n" . mb_substr($courtDecision, 0, 500) . "...\n🔗 URL: $caseUrl\n\n---------------------\n";
|
|
|
|
$results[] = [
|
|
'case_id' => $caseId,
|
|
'court' => $court,
|
|
'court_decision' => $courtDecision,
|
|
'case_url' => $caseUrl
|
|
];
|
|
}
|
|
|
|
// Генерируем уникальный лог-файл для каждого запроса
|
|
$fileName = $resultsDir . "search_results_" . date("Ymd_His") . ".txt";
|
|
file_put_contents($fileName, $output);
|
|
|
|
file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] ✅ Найдено " . count($results) . " документов. Итог сохранен в `$fileName`.\n", FILE_APPEND);
|
|
return $results;
|
|
} catch (Exception $e) {
|
|
file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] ❌ Ошибка Elasticsearch: " . $e->getMessage() . "\n", FILE_APPEND);
|
|
return [];
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* Запрос в Elasticsearch
|
|
*/
|
|
function searchIndex($index, $query) {
|
|
$elasticsearchUrl = "http://localhost:9200/$index/_search";
|
|
|
|
$ch = curl_init();
|
|
curl_setopt($ch, CURLOPT_URL, $elasticsearchUrl);
|
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
|
|
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "POST");
|
|
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($query));
|
|
curl_setopt($ch, CURLOPT_HTTPHEADER, [
|
|
'Content-Type: application/json'
|
|
]);
|
|
|
|
$response = curl_exec($ch);
|
|
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
|
curl_close($ch);
|
|
|
|
if ($httpCode !== 200) {
|
|
throw new Exception("Ошибка Elasticsearch. Код HTTP: $httpCode, Ответ: $response");
|
|
}
|
|
|
|
return json_decode($response, true);
|
|
}
|
|
|
|
?>
|