Files
crm.clientright.ru/court_decisions_example.php
Fedor 01c4fe80b5 chore: snapshot current working tree changes
Save all currently accumulated repository changes as a backup snapshot for Gitea so no local work is lost.
2026-03-26 14:19:01 +03:00

257 lines
7.5 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
/**
* Пример использования таблицы sprf_court_decisions
* Для работы с судебными решениями из Telegram
*/
// Функция для вычисления SHA-256 хеша файла
function calculateFileHash($filePath) {
return hash_file('sha256', $filePath);
}
// Функция для сохранения файла в БД при загрузке из Telegram
function saveCourtDecisionFile($pdo, $fileData) {
$sql = "
INSERT INTO sprf_court_decisions (
uuid,
file_name,
file_size,
mime_type,
file_hash,
s3_url,
telegram_message_id,
telegram_chat_id,
telegram_user_id,
telegram_username,
telegram_full_name,
processing_status
) VALUES (
gen_random_uuid()::text,
:file_name,
:file_size,
:mime_type,
:file_hash,
:s3_url,
:telegram_message_id,
:telegram_chat_id,
:telegram_user_id,
:telegram_username,
:telegram_full_name,
'pending'
) RETURNING id, uuid, uploaded_at
";
$stmt = $pdo->prepare($sql);
$stmt->execute([
':file_name' => $fileData['file_name'],
':file_size' => $fileData['file_size'],
':mime_type' => $fileData['mime_type'],
':file_hash' => $fileData['file_hash'],
':s3_url' => $fileData['s3_url'],
':telegram_message_id' => $fileData['telegram_message_id'] ?? null,
':telegram_chat_id' => $fileData['telegram_chat_id'] ?? null,
':telegram_user_id' => $fileData['telegram_user_id'],
':telegram_username' => $fileData['telegram_username'] ?? null,
':telegram_full_name' => $fileData['telegram_full_name'] ?? null,
]);
return $stmt->fetch(PDO::FETCH_ASSOC);
}
// Функция для проверки дубликатов
function checkDuplicate($pdo, $fileHash) {
$sql = "
SELECT id, uuid, file_name, uploaded_at, processing_status
FROM sprf_court_decisions
WHERE file_hash = :file_hash
LIMIT 1
";
$stmt = $pdo->prepare($sql);
$stmt->execute([':file_hash' => $fileHash]);
return $stmt->fetch(PDO::FETCH_ASSOC);
}
// Функция для обновления статуса после OCR
function updateOCRStatus($pdo, $uuid, $ocrResult) {
$sql = "
UPDATE sprf_court_decisions
SET
ocr_processed = TRUE,
ocr_processed_at = CURRENT_TIMESTAMP,
ocr_text = :ocr_text,
ocr_pages_data = :ocr_pages_data::jsonb,
ocr_pages_count = :ocr_pages_count,
processing_status = CASE
WHEN vector_processed THEN 'completed'
ELSE 'processing'
END,
updated_at = CURRENT_TIMESTAMP
WHERE uuid = :uuid
RETURNING id, uuid, ocr_pages_count
";
// Собираем текст со всех страниц
$fullText = '';
$pagesCount = count($ocrResult['pages_data'] ?? []);
if (isset($ocrResult['pages_data']) && is_array($ocrResult['pages_data'])) {
$pagesTexts = array_map(function($page) {
return $page['ocr_text'] ?? '';
}, $ocrResult['pages_data']);
$fullText = implode("\n\n", array_filter($pagesTexts));
}
$stmt = $pdo->prepare($sql);
$stmt->execute([
':uuid' => $uuid,
':ocr_text' => $fullText,
':ocr_pages_data' => json_encode($ocrResult['pages_data'] ?? [], JSON_UNESCAPED_UNICODE),
':ocr_pages_count' => $pagesCount,
]);
return $stmt->fetch(PDO::FETCH_ASSOC);
}
// Функция для обновления статуса после векторизации
function updateVectorStatus($pdo, $uuid, $vectorStoreId, $fileIds) {
$sql = "
UPDATE sprf_court_decisions
SET
vector_processed = TRUE,
vector_processed_at = CURRENT_TIMESTAMP,
vector_store_id = :vector_store_id,
vector_file_ids = :vector_file_ids::jsonb,
processing_status = 'completed',
updated_at = CURRENT_TIMESTAMP
WHERE uuid = :uuid
RETURNING id, uuid, processing_status
";
$stmt = $pdo->prepare($sql);
$stmt->execute([
':uuid' => $uuid,
':vector_store_id' => $vectorStoreId,
':vector_file_ids' => json_encode($fileIds),
]);
return $stmt->fetch(PDO::FETCH_ASSOC);
}
// Функция для получения файлов, требующих OCR обработки
function getFilesForOCR($pdo, $limit = 10) {
$sql = "
SELECT
id,
uuid,
file_name,
s3_url,
uploaded_at
FROM sprf_court_decisions
WHERE
ocr_processed = FALSE
AND processing_status IN ('pending', 'processing')
ORDER BY uploaded_at ASC
LIMIT :limit
";
$stmt = $pdo->prepare($sql);
$stmt->bindValue(':limit', $limit, PDO::PARAM_INT);
$stmt->execute();
return $stmt->fetchAll(PDO::FETCH_ASSOC);
}
// Функция для получения файлов, требующих векторизации
function getFilesForVector($pdo, $limit = 10) {
$sql = "
SELECT
id,
uuid,
file_name,
ocr_text,
ocr_pages_data,
s3_url
FROM sprf_court_decisions
WHERE
ocr_processed = TRUE
AND vector_processed = FALSE
AND processing_status IN ('processing', 'completed')
AND ocr_text IS NOT NULL
ORDER BY ocr_processed_at ASC
LIMIT :limit
";
$stmt = $pdo->prepare($sql);
$stmt->bindValue(':limit', $limit, PDO::PARAM_INT);
$stmt->execute();
return $stmt->fetchAll(PDO::FETCH_ASSOC);
}
// Пример использования:
//
// 1. При получении файла из Telegram:
/*
$filePath = '/tmp/downloaded_file.pdf';
$fileHash = calculateFileHash($filePath);
// Проверяем дубликат
$duplicate = checkDuplicate($pdo, $fileHash);
if ($duplicate) {
echo "Файл уже существует: {$duplicate['uuid']}";
exit;
}
// Загружаем в S3 и получаем URL
$s3Url = uploadToS3($filePath);
// Сохраняем в БД
$fileData = [
'file_name' => 'file_0.pdf',
'file_size' => filesize($filePath),
'mime_type' => 'application/pdf',
'file_hash' => $fileHash,
's3_url' => $s3Url,
'telegram_message_id' => $messageId,
'telegram_chat_id' => $chatId,
'telegram_user_id' => $userId,
'telegram_username' => $username,
'telegram_full_name' => $fullName,
];
$result = saveCourtDecisionFile($pdo, $fileData);
echo "Файл сохранен с UUID: {$result['uuid']}";
*/
// 2. После OCR обработки:
/*
$ocrResult = [
'pages_data' => [
[
'page' => 1,
'ocr_text' => 'Текст страницы 1...',
'image_url' => '/static/vision_input/file1.png'
],
// ...
]
];
$result = updateOCRStatus($pdo, $uuid, $ocrResult);
echo "OCR обработан: {$result['ocr_pages_count']} страниц";
*/
// 3. После векторизации:
/*
$result = updateVectorStatus(
$pdo,
$uuid,
'vs_abc123xyz',
['file-id-1', 'file-id-2']
);
echo "Векторизация завершена";
*/
?>