Files
hotels/db_schema.sql
Фёдор 684fada337 🚀 Full project sync: Hotels RAG & Audit System
 Major Features:
- Complete RAG system for hotel website analysis
- Hybrid audit with BGE-M3 embeddings + Natasha NER
- Universal horizontal Excel reports with dashboards
- Multi-region processing (SPb, Orel, Chukotka, Kamchatka)

📊 Completed Regions:
- Орловская область: 100% (36/36)
- Чукотский АО: 100% (4/4)
- г. Санкт-Петербург: 93% (893/960)
- Камчатский край: 87% (89/102)

🔧 Infrastructure:
- PostgreSQL with pgvector extension
- BGE-M3 embeddings API
- Browserless for web scraping
- N8N workflows for automation
- S3/Nextcloud file storage

📝 Documentation:
- Complete DB schemas
- API documentation
- Setup guides
- Status reports
2025-10-27 22:49:42 +03:00

154 lines
4.8 KiB
SQL
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

-- Схема базы данных для хранения информации об отелях
-- Префикс: hotel_
-- Основная информация об отелях
CREATE TABLE IF NOT EXISTS hotel_main (
id UUID PRIMARY KEY,
full_name TEXT,
short_name TEXT,
status_id INTEGER,
status_name TEXT,
category_id INTEGER,
category_name TEXT,
region_id INTEGER,
region_name TEXT,
hotel_type_id INTEGER,
hotel_type_name TEXT,
register_record TEXT,
register_record_date DATE,
owner_full_name TEXT,
owner_ogrn TEXT,
owner_inn TEXT,
phone TEXT,
email TEXT,
website_address TEXT,
addresses JSONB,
photo_ids TEXT[],
has_seasonal BOOLEAN,
activation_datetime TIMESTAMP,
updated TIMESTAMP,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Дополнительная информация о владельце
CREATE TABLE IF NOT EXISTS hotel_additional_info (
hotel_id UUID PRIMARY KEY REFERENCES hotel_main(id),
owner_ogrn TEXT,
owner_inn TEXT,
owner_kpp TEXT,
owner_short_name TEXT,
owner_phone TEXT,
owner_email TEXT,
resort_full_name TEXT,
owner_address_name TEXT,
owner_legal_type_id INTEGER,
phone TEXT,
email TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Санаторная информация (для санаториев)
CREATE TABLE IF NOT EXISTS hotel_sanatorium (
hotel_id UUID PRIMARY KEY REFERENCES hotel_main(id),
oid TEXT,
full_name TEXT,
short_name TEXT,
ogrn TEXT,
inn TEXT,
legal_address TEXT,
actual_address TEXT,
phone TEXT,
email TEXT,
web_site TEXT,
medical_license TEXT,
farm_license TEXT,
terrenkur BOOLEAN,
resort_name TEXT,
has_water_supply BOOLEAN,
has_heating BOOLEAN,
has_sewage BOOLEAN,
has_air_conditioning BOOLEAN,
has_elevator BOOLEAN,
has_telephone BOOLEAN,
has_internet BOOLEAN,
has_mobility_lift BOOLEAN,
has_gym BOOLEAN,
has_conference_room BOOLEAN,
swimming_pool_info JSONB,
plage_info JSONB,
land_document_info JSONB,
rooms_info JSONB,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Услуги отелей (из drawer)
CREATE TABLE IF NOT EXISTS hotel_services (
id SERIAL PRIMARY KEY,
hotel_id UUID REFERENCES hotel_main(id),
service_category_id INTEGER,
service_category_name TEXT,
service_id INTEGER,
service_name TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
UNIQUE(hotel_id, service_id)
);
-- Информация о номерах
CREATE TABLE IF NOT EXISTS hotel_rooms (
id SERIAL PRIMARY KEY,
hotel_id UUID REFERENCES hotel_main(id),
room_category_id INTEGER,
room_category_name TEXT,
apartment_count INTEGER,
number_seats INTEGER,
equipment_list JSONB,
family_room_count INTEGER,
disability_room_count INTEGER,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Сырые JSON для backup (опционально)
CREATE TABLE IF NOT EXISTS hotel_raw_json (
hotel_id UUID PRIMARY KEY REFERENCES hotel_main(id),
main_data JSONB,
additional_info JSONB,
sanatorium_data JSONB,
drawer_data JSONB,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Прогресс парсинга
CREATE TABLE IF NOT EXISTS hotel_parsing_progress (
id SERIAL PRIMARY KEY,
page_number INTEGER,
total_pages INTEGER,
processed_count INTEGER,
total_count INTEGER,
status TEXT, -- 'in_progress', 'completed', 'failed'
error_message TEXT,
started_at TIMESTAMP,
completed_at TIMESTAMP,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Индексы для быстрого поиска
CREATE INDEX IF NOT EXISTS idx_hotel_main_region ON hotel_main(region_id);
CREATE INDEX IF NOT EXISTS idx_hotel_main_status ON hotel_main(status_id);
CREATE INDEX IF NOT EXISTS idx_hotel_main_category ON hotel_main(category_id);
CREATE INDEX IF NOT EXISTS idx_hotel_main_type ON hotel_main(hotel_type_id);
CREATE INDEX IF NOT EXISTS idx_hotel_main_full_name ON hotel_main(full_name);
CREATE INDEX IF NOT EXISTS idx_hotel_services_hotel_id ON hotel_services(hotel_id);
CREATE INDEX IF NOT EXISTS idx_hotel_rooms_hotel_id ON hotel_rooms(hotel_id);
-- Полнотекстовый поиск по названию и адресу
CREATE INDEX IF NOT EXISTS idx_hotel_main_fulltext ON hotel_main
USING gin(to_tsvector('russian', coalesce(full_name, '') || ' ' || coalesce(short_name, '')));
COMMENT ON TABLE hotel_main IS 'Основная информация об отелях из tourism.fsa.gov.ru';
COMMENT ON TABLE hotel_parsing_progress IS 'Контрольные точки для возобновления парсинга';