You've already forked RekomenciBackend
fix(): better resolution of relevant vacancies
This commit is contained in:
@@ -3,6 +3,9 @@ config.toml
|
|||||||
docker-compose.yml
|
docker-compose.yml
|
||||||
.idea
|
.idea
|
||||||
firebase.json
|
firebase.json
|
||||||
|
dumps
|
||||||
|
full_skills_unique.json
|
||||||
|
filtered_vacancies.csv
|
||||||
|
|
||||||
# Byte-compiled / optimized / DLL files
|
# Byte-compiled / optimized / DLL files
|
||||||
__pycache__/
|
__pycache__/
|
||||||
|
|||||||
Executable
+43
@@ -0,0 +1,43 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import subprocess
|
||||||
|
from pathlib import Path
|
||||||
|
from template_project.web_api.configuration import load_configuration
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
config_path = Path("config.toml")
|
||||||
|
configuration = load_configuration(config_path)
|
||||||
|
|
||||||
|
db_url = str(configuration.database.url.get_value())
|
||||||
|
db_url = db_url.replace("postgresql+psycopg://", "postgresql://")
|
||||||
|
|
||||||
|
output_dir = Path("dumps")
|
||||||
|
output_dir.mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
output_file = output_dir / "data_dump.sql"
|
||||||
|
|
||||||
|
print("Создание дампа таблиц vacancy, vacancy_embedding, key_skills...")
|
||||||
|
|
||||||
|
subprocess.run(
|
||||||
|
[
|
||||||
|
"pg_dump",
|
||||||
|
db_url,
|
||||||
|
"--table=vacancy",
|
||||||
|
"--table=vacancy_embedding",
|
||||||
|
"--table=key_skills",
|
||||||
|
"--data-only",
|
||||||
|
"--column-inserts",
|
||||||
|
f"--file={output_file}",
|
||||||
|
],
|
||||||
|
check=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"\nДамп создан: {output_file}")
|
||||||
|
print(f"Размер файла: {output_file.stat().st_size / 1024 / 1024:.2f} MB")
|
||||||
|
print("\nДля импорта на прод сервере выполните:")
|
||||||
|
print(f" psql <PROD_DB_URL> -f {output_file}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
||||||
Executable
+17
@@ -0,0 +1,17 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
DB_URL="${DATABASE_URL:-postgresql://user:password@localhost:5432/dbname}"
|
||||||
|
|
||||||
|
echo "Создание дампа таблиц vacancy, vacancy_embedding, key_skills..."
|
||||||
|
|
||||||
|
pg_dump "$DB_URL" \
|
||||||
|
--table=vacancy \
|
||||||
|
--table=vacancy_embedding \
|
||||||
|
--table=key_skills \
|
||||||
|
--data-only \
|
||||||
|
--column-inserts \
|
||||||
|
--file=dump_data.sql
|
||||||
|
|
||||||
|
echo "Дамп создан: dump_data.sql"
|
||||||
|
echo "Размер файла: $(du -h dump_data.sql | cut -f1)"
|
||||||
|
|
||||||
Executable
+16
@@ -0,0 +1,16 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
DB_URL="${DATABASE_URL:-postgresql://user:password@localhost:5432/dbname}"
|
||||||
|
DUMP_FILE="${1:-dump_data.sql}"
|
||||||
|
|
||||||
|
if [ ! -f "$DUMP_FILE" ]; then
|
||||||
|
echo "Ошибка: файл $DUMP_FILE не найден"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Импорт дампа из $DUMP_FILE в БД..."
|
||||||
|
|
||||||
|
psql "$DB_URL" -f "$DUMP_FILE"
|
||||||
|
|
||||||
|
echo "Импорт завершен!"
|
||||||
|
|
||||||
@@ -41,7 +41,7 @@ async def main() -> None:
|
|||||||
ml_container = make_ml_ioc(ml_configuration)
|
ml_container = make_ml_ioc(ml_configuration)
|
||||||
|
|
||||||
csv_path = Path("filtered_vacancies.csv")
|
csv_path = Path("filtered_vacancies.csv")
|
||||||
max_records = 1000
|
max_records = 100_000
|
||||||
|
|
||||||
try:
|
try:
|
||||||
async with backend_container() as backend_request_container, ml_container() as ml_request_container:
|
async with backend_container() as backend_request_container, ml_container() as ml_request_container:
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ from sqlalchemy.orm import registry
|
|||||||
|
|
||||||
from template_project.application.access_token.entity import AccessToken
|
from template_project.application.access_token.entity import AccessToken
|
||||||
from template_project.application.auth_identity.entity import AuthIdentity, AuthMethod
|
from template_project.application.auth_identity.entity import AuthIdentity, AuthMethod
|
||||||
from template_project.application.common.enums import EducationGrade
|
from template_project.application.common.enums import EducationGrade, ExperienceType
|
||||||
from template_project.application.notification_device.entity import NotificationDevice
|
from template_project.application.notification_device.entity import NotificationDevice
|
||||||
from template_project.application.resume.entity import (
|
from template_project.application.resume.entity import (
|
||||||
Resume,
|
Resume,
|
||||||
|
|||||||
@@ -16,11 +16,13 @@ class DefaultVacancyDataGateway(VacancyDataGateway):
|
|||||||
|
|
||||||
@override
|
@override
|
||||||
async def get_suitable(self, vector: list[float]) -> Sequence[SuitableVacancy]:
|
async def get_suitable(self, vector: list[float]) -> Sequence[SuitableVacancy]:
|
||||||
|
distance_expr = vacancy_embedding_table.c.vector.cosine_distance(vector)
|
||||||
|
similarity_expr = 1 - distance_expr
|
||||||
statement = (
|
statement = (
|
||||||
select(Vacancy, label("resume_similarity", vacancy_embedding_table.c.vector.cosine_distance(vector)))
|
select(Vacancy, label("resume_similarity", similarity_expr))
|
||||||
.join(VacancyEmbedding, vacancy_embedding_table.c.vacancy_id == vacancy_table.c.id)
|
.join(VacancyEmbedding, vacancy_embedding_table.c.vacancy_id == vacancy_table.c.id)
|
||||||
.where(vacancy_embedding_table.c.vector.cosine_distance(vector) > 0.5)
|
.where(similarity_expr >= 0.5)
|
||||||
.order_by(vacancy_embedding_table.c.vector.cosine_distance(vector).asc())
|
.order_by(distance_expr.asc())
|
||||||
.limit(100)
|
.limit(100)
|
||||||
)
|
)
|
||||||
result = await self._session.execute(statement)
|
result = await self._session.execute(statement)
|
||||||
|
|||||||
@@ -22,11 +22,11 @@ class DefaultResumePredictionGenerator(ResumePredictionGenerator):
|
|||||||
key_skills=resume.key_skills,
|
key_skills=resume.key_skills,
|
||||||
suitable_vacancies=[
|
suitable_vacancies=[
|
||||||
SuitableVacancyDs(
|
SuitableVacancyDs(
|
||||||
vacancy_id=str(suituble_vacancy.vacancy.id),
|
vacancy_id=str(suitable_vacancy.vacancy.id),
|
||||||
from_salary=suituble_vacancy.vacancy.from_salary,
|
from_salary=suitable_vacancy.vacancy.from_salary,
|
||||||
to_salary=suituble_vacancy.vacancy.to_salary,
|
to_salary=suitable_vacancy.vacancy.to_salary,
|
||||||
key_skills=suituble_vacancy.vacancy.key_skills,
|
key_skills=suitable_vacancy.vacancy.key_skills,
|
||||||
resume_similarity=suituble_vacancy.resume_similarity,
|
resume_similarity=suitable_vacancy.resume_similarity,
|
||||||
)
|
)
|
||||||
for suitable_vacancy in suitable_vacancies
|
for suitable_vacancy in suitable_vacancies
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -2425,15 +2425,28 @@ backend = [
|
|||||||
{ name = "sqlalchemy" },
|
{ name = "sqlalchemy" },
|
||||||
]
|
]
|
||||||
dev = [
|
dev = [
|
||||||
|
{ name = "aioboto3" },
|
||||||
{ name = "alembic" },
|
{ name = "alembic" },
|
||||||
|
{ name = "argon2-cffi" },
|
||||||
{ name = "bandit" },
|
{ name = "bandit" },
|
||||||
{ name = "codespell" },
|
{ name = "codespell" },
|
||||||
{ name = "coverage" },
|
{ name = "coverage" },
|
||||||
|
{ name = "cryptography" },
|
||||||
{ name = "dirty-equals" },
|
{ name = "dirty-equals" },
|
||||||
|
{ name = "firebase-admin" },
|
||||||
|
{ name = "httpx" },
|
||||||
{ name = "mypy" },
|
{ name = "mypy" },
|
||||||
|
{ name = "pgvector" },
|
||||||
|
{ name = "prometheus-fastapi-instrumentator" },
|
||||||
|
{ name = "psycopg", extra = ["binary"] },
|
||||||
{ name = "pytest" },
|
{ name = "pytest" },
|
||||||
{ name = "pytest-asyncio" },
|
{ name = "pytest-asyncio" },
|
||||||
|
{ name = "python-multipart" },
|
||||||
{ name = "ruff" },
|
{ name = "ruff" },
|
||||||
|
{ name = "sentence-transformers" },
|
||||||
|
{ name = "sqlalchemy" },
|
||||||
|
{ name = "torch", version = "2.2.2", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux') or (python_full_version < '3.13' and sys_platform == 'darwin')" },
|
||||||
|
{ name = "torch", version = "2.2.2+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(python_full_version >= '3.13' and sys_platform == 'darwin') or (python_full_version >= '3.13' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_python_implementation != 'CPython' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
|
||||||
{ name = "types-cachetools" },
|
{ name = "types-cachetools" },
|
||||||
]
|
]
|
||||||
linters = [
|
linters = [
|
||||||
@@ -2487,15 +2500,27 @@ backend = [
|
|||||||
{ name = "sqlalchemy", specifier = "==2.0.44" },
|
{ name = "sqlalchemy", specifier = "==2.0.44" },
|
||||||
]
|
]
|
||||||
dev = [
|
dev = [
|
||||||
|
{ name = "aioboto3", specifier = "==15.5.0" },
|
||||||
{ name = "alembic", specifier = "==1.17.0" },
|
{ name = "alembic", specifier = "==1.17.0" },
|
||||||
|
{ name = "argon2-cffi", specifier = "==23.1.0" },
|
||||||
{ name = "bandit", specifier = "==1.8.6" },
|
{ name = "bandit", specifier = "==1.8.6" },
|
||||||
{ name = "codespell", specifier = "==2.4.1" },
|
{ name = "codespell", specifier = "==2.4.1" },
|
||||||
{ name = "coverage", specifier = "==7.11.0" },
|
{ name = "coverage", specifier = "==7.11.0" },
|
||||||
|
{ name = "cryptography", specifier = "==46.0.3" },
|
||||||
{ name = "dirty-equals", specifier = ">=0.11" },
|
{ name = "dirty-equals", specifier = ">=0.11" },
|
||||||
|
{ name = "firebase-admin", specifier = ">=7.1.0" },
|
||||||
|
{ name = "httpx", specifier = "==0.28.1" },
|
||||||
{ name = "mypy", specifier = "==1.18.1" },
|
{ name = "mypy", specifier = "==1.18.1" },
|
||||||
|
{ name = "pgvector", specifier = ">=0.4.1" },
|
||||||
|
{ name = "prometheus-fastapi-instrumentator", specifier = ">=7.1.0" },
|
||||||
|
{ name = "psycopg", extras = ["binary"], specifier = ">=3.2.12" },
|
||||||
{ name = "pytest", specifier = "==8.4.0" },
|
{ name = "pytest", specifier = "==8.4.0" },
|
||||||
{ name = "pytest-asyncio", specifier = "==1.2.0" },
|
{ name = "pytest-asyncio", specifier = "==1.2.0" },
|
||||||
|
{ name = "python-multipart", specifier = ">=0.0.20" },
|
||||||
{ name = "ruff", specifier = "==0.12.11" },
|
{ name = "ruff", specifier = "==0.12.11" },
|
||||||
|
{ name = "sentence-transformers", specifier = ">=5.1.2" },
|
||||||
|
{ name = "sqlalchemy", specifier = "==2.0.44" },
|
||||||
|
{ name = "torch", index = "https://download.pytorch.org/whl/cpu" },
|
||||||
{ name = "types-cachetools", specifier = "==6.2.0.20250827" },
|
{ name = "types-cachetools", specifier = "==6.2.0.20250827" },
|
||||||
]
|
]
|
||||||
linters = [
|
linters = [
|
||||||
|
|||||||
Reference in New Issue
Block a user