Merge branch 'main' of gitlab.prodcontest.com:team-39/backend

This commit is contained in:
ivankirpichnikov
2025-11-23 01:36:36 +03:00
9 changed files with 267 additions and 91 deletions
+41 -23
View File
@@ -6,6 +6,9 @@ stages:
- tag - tag
- deploy - deploy
default:
retry: 2
variables: variables:
BASE_IMAGE_NAME: $CI_REGISTRY_IMAGE BASE_IMAGE_NAME: $CI_REGISTRY_IMAGE
TRIVY_CACHE_DIR: .cache/trivy TRIVY_CACHE_DIR: .cache/trivy
@@ -18,6 +21,8 @@ variables:
UV_CACHE_DIR: .cache/uv UV_CACHE_DIR: .cache/uv
BUILDAH_ISOLATION: oci BUILDAH_ISOLATION: oci
STORAGE_DRIVER: vfs STORAGE_DRIVER: vfs
DOCKER_HOST: "tcp://docker:2375"
DOCKER_TLS_CERTDIR: ""
cache: cache:
key: "${CI_COMMIT_REF_SLUG}" key: "${CI_COMMIT_REF_SLUG}"
@@ -27,6 +32,13 @@ cache:
- $UV_PROJECT_ENVIRONMENT - $UV_PROJECT_ENVIRONMENT
policy: pull-push policy: pull-push
.docker-job: &docker-job
image: docker:28.5
services:
- docker:28.5-dind
before_script:
- docker login -u "$CI_REGISTRY_USER" -p "$CI_REGISTRY_PASSWORD" $CI_REGISTRY
.buildah-job: &buildah-job .buildah-job: &buildah-job
image: quay.io/containers/buildah:latest image: quay.io/containers/buildah:latest
variables: variables:
@@ -223,13 +235,13 @@ build-migrations:
CONTAINERFILE: Containerfile CONTAINERFILE: Containerfile
BUILDTARGET: migrations BUILDTARGET: migrations
# build-ml: build-ml:
# <<: *build-config <<: *build-config
# when: manual when: manual
# variables: variables:
# IMAGE_NAME: $BASE_IMAGE_NAME/ml IMAGE_NAME: $BASE_IMAGE_NAME/ml
# CONTAINERFILE: Containerfile CONTAINERFILE: Containerfile
# BUILDTARGET: ml BUILDTARGET: ml
lint: lint:
<<: *uv-job <<: *uv-job
@@ -246,19 +258,18 @@ lint:
- if: $CI_COMMIT_TAG - if: $CI_COMMIT_TAG
test: test:
<<: *docker-job
stage: test stage: test
tags: tags:
- self-hosted - beta
variables: variables:
COMPOSE_PROFILES: | COMPOSE_PROFILES: |
--profile migrations --profile migrations
--profile tests --profile tests
before_script:
- docker login -u "$CI_REGISTRY_USER" -p "$CI_REGISTRY_PASSWORD" "$CI_REGISTRY"
script: script:
- apk add --no-cache docker-compose
- export PROFILES="$(printf '%s ' $COMPOSE_PROFILES)" - export PROFILES="$(printf '%s ' $COMPOSE_PROFILES)"
- cp "$TEST_STAGE_FIREBASE_CONF" ./infrastructure/configs/backend/firebase.json - cp "$TEST_STAGE_FIREBASE_CONF" ./infrastructure/configs/backend/firebase.json
- | - |
( (
while true; do while true; do
@@ -267,24 +278,23 @@ test:
done done
) | tee -a compose.log & ) | tee -a compose.log &
- LOGS_PID=$! - LOGS_PID=$!
- | - |
REGISTRY_PREFIX=$CI_REGISTRY_IMAGE IMAGE_TAG=$CI_COMMIT_SHA \ REGISTRY_PREFIX=$CI_REGISTRY_IMAGE IMAGE_TAG=$CI_COMMIT_SHA \
docker compose -f compose.yaml -f compose.prod.yaml \ docker compose -f compose.yaml -f compose.prod.yaml \
$PROFILES up -d --quiet-pull --quiet-build 2>&1 | tee compose.log $PROFILES up -d --quiet-pull --quiet-build 2>&1 | tee compose.log
- | - |
TEST_CONTAINER_ID=$(docker compose -f compose.yaml $PROFILES ps -q tests -a) TEST_CONTAINER_ID=$(docker compose -f compose.yaml $PROFILES ps -q tests -a)
timeout 600 docker wait "$TEST_CONTAINER_ID" timeout 600 docker wait $TEST_CONTAINER_ID
TEST_EXIT_CODE=$(docker inspect --format "{{.State.ExitCode}}" "$TEST_CONTAINER_ID") TEST_EXIT_CODE=$(docker inspect --format "{{.State.ExitCode}}" $TEST_CONTAINER_ID)
if [ "$TEST_EXIT_CODE" -eq 0 ]; then if [ $TEST_EXIT_CODE -eq 0 ]; then
echo "Tests passed." echo "Tests passed."
else else
echo "Tests failed with exit code $TEST_EXIT_CODE." echo "Tests failed with exit code $TEST_EXIT_CODE."
exit 1 exit 1
fi fi
- docker compose -f compose.yaml $PROFILES down - |
docker compose -f compose.yaml $PROFILES down
- cat .cov/coverage.txt - cat .cov/coverage.txt
artifacts: artifacts:
paths: paths:
@@ -334,7 +344,6 @@ sast-image-migrations:
# sast-image-ml: # sast-image-ml:
# <<: *trivy-image-scan # <<: *trivy-image-scan
# when: manual
# variables: # variables:
# IMAGE_NAME: $BASE_IMAGE_NAME/ml # IMAGE_NAME: $BASE_IMAGE_NAME/ml
# IMAGE_TYPE: ml # IMAGE_TYPE: ml
@@ -356,11 +365,10 @@ tag-migrations:
variables: variables:
IMAGE_NAME: $BASE_IMAGE_NAME/backend-migrations IMAGE_NAME: $BASE_IMAGE_NAME/backend-migrations
# tag-ml: tag-ml:
# <<: *tag-config <<: *tag-config
# when: manual variables:
# variables: IMAGE_NAME: $BASE_IMAGE_NAME/ml
# IMAGE_NAME: $BASE_IMAGE_NAME/ml
webhook-migrations-deploy: webhook-migrations-deploy:
<<: *webhook-config <<: *webhook-config
@@ -385,6 +393,16 @@ webhook-backend-deploy:
- build-runtime - build-runtime
- sast-image-runtime - sast-image-runtime
webhook-ml-deploy:
<<: *webhook-config
stage: deploy
variables:
WEBHOOK_URL: $WEBHOOK_URL_ML
resource_group: staging
dependencies:
- build-ml
# - sast-image-ml
workflow: workflow:
rules: rules:
- if: $CI_PIPELINE_SOURCE == "merge_request_event" - if: $CI_PIPELINE_SOURCE == "merge_request_event"
+1 -1
View File
@@ -64,7 +64,7 @@ services:
ml: ml:
build: build:
context: . context: .
dockerfile: Containerfile.ml dockerfile: Containerfile
target: ml target: ml
tags: tags:
- template-project-ml:latest - template-project-ml:latest
@@ -0,0 +1,208 @@
from collections import defaultdict
from decimal import Decimal
from operator import itemgetter
from Levenshtein import ratio
from template_project.application.common.data_structure import to_data_structure
from template_project.application.common.interactor import to_interactor
from template_project.application.resume.entity import ResumeId
@to_data_structure
class VacancyInput:
vacancy_id: str
from_salary: Decimal
to_salary: Decimal
key_skills: list[str]
resume_similarity: float
@to_data_structure
class PredictSalaryRequest:
resume_id: ResumeId
key_skills: list[str]
vacancies: list[VacancyInput]
@to_data_structure
class PredictSalaryResponse:
salary_from: Decimal
salary_to: Decimal
recommended_skills: list[str]
@to_interactor
class PredictSalaryInteractor:
async def execute(self, request: PredictSalaryRequest) -> PredictSalaryResponse:
salary_from, salary_to = self._predict_salary(request.vacancies, request.key_skills)
recommended_skills = self._recommend_skills(request.vacancies, request.key_skills)
return PredictSalaryResponse(
salary_from=salary_from,
salary_to=salary_to,
recommended_skills=recommended_skills,
)
def _predict_salary(self, vacancies: list[VacancyInput], resume_skills: list[str]) -> tuple[Decimal, Decimal]:
if not vacancies:
return Decimal(50000), Decimal(80000)
vacancy_weights: list[float] = []
for vacancy in vacancies:
skills_similarity = self._calculate_skills_similarity(resume_skills, vacancy.key_skills)
vacancy_weight = 0.8 * vacancy.resume_similarity + 0.2 * skills_similarity
vacancy_weights.append(vacancy_weight)
total_weight = sum(vacancy_weights)
if total_weight == 0:
return Decimal(50000), Decimal(80000)
weighted_from_sum = Decimal(0)
weighted_to_sum = Decimal(0)
for vacancy, weight in zip(vacancies, vacancy_weights, strict=False):
weighted_from_sum += vacancy.from_salary * Decimal(str(weight))
weighted_to_sum += vacancy.to_salary * Decimal(str(weight))
predicted_from = weighted_from_sum / Decimal(str(total_weight))
predicted_to = weighted_to_sum / Decimal(str(total_weight))
return predicted_from.quantize(Decimal("0.01")), predicted_to.quantize(Decimal("0.01"))
def _recommend_skills(
self,
vacancies: list[VacancyInput],
resume_skills: list[str],
) -> list[str]:
if not vacancies:
return []
skill_salaries, skill_frequencies = self._collect_skill_statistics(vacancies)
filtered_skills = self._filter_skills_by_frequency(skill_frequencies, min_frequency=3)
candidate_skills = self._filter_skills_by_resume_similarity(filtered_skills, resume_skills)
if not candidate_skills:
return []
skill_scores = self._calculate_skill_scores(candidate_skills, skill_salaries, skill_frequencies)
return self._get_top_skills(skill_scores, top_n=3)
def _collect_skill_statistics(
self, vacancies: list[VacancyInput]
) -> tuple[dict[str, list[Decimal]], dict[str, int]]:
skill_salaries: dict[str, list[Decimal]] = defaultdict(list)
skill_frequencies: dict[str, int] = defaultdict(int)
for vacancy in vacancies:
avg_salary = (vacancy.from_salary + vacancy.to_salary) / Decimal(2)
for skill in vacancy.key_skills:
normalized_skill = skill.lower().strip()
skill_salaries[normalized_skill].append(avg_salary)
skill_frequencies[normalized_skill] += 1
return skill_salaries, skill_frequencies
def _filter_skills_by_frequency(
self,
skill_frequencies: dict[str, int],
min_frequency: int = 3,
) -> set[str]:
return {skill for skill, frequency in skill_frequencies.items() if frequency >= min_frequency}
def _filter_skills_by_resume_similarity(
self,
skills: set[str],
resume_skills: list[str],
) -> list[str]:
resume_skills_normalized = {skill.lower().strip() for skill in resume_skills}
candidate_skills: list[str] = []
for skill in skills:
is_already_in_resume = any(
self._is_skill_similar(skill, resume_skill) for resume_skill in resume_skills_normalized
)
if not is_already_in_resume:
candidate_skills.append(skill)
return candidate_skills
def _calculate_skill_scores(
self,
candidate_skills: list[str],
skill_salaries: dict[str, list[Decimal]],
skill_frequencies: dict[str, int],
) -> list[tuple[str, float]]:
skill_avg_salaries: dict[str, Decimal] = {
skill: sum(salaries) / Decimal(str(len(salaries)))
for skill, salaries in skill_salaries.items()
if skill in candidate_skills
}
frequencies = [skill_frequencies[skill] for skill in candidate_skills]
avg_salaries = [float(skill_avg_salaries[skill]) for skill in candidate_skills]
min_freq = min(frequencies)
max_freq = max(frequencies)
min_salary = min(avg_salaries)
max_salary = max(avg_salaries)
skill_scores: list[tuple[str, float]] = []
for skill in candidate_skills:
normalized_freq = self._normalize(float(skill_frequencies[skill]), min_freq, max_freq)
normalized_salary = self._normalize(float(skill_avg_salaries[skill]), min_salary, max_salary)
score = normalized_freq + normalized_salary
skill_scores.append((skill, score))
return skill_scores
def _get_top_skills(self, skill_scores: list[tuple[str, float]], top_n: int = 3) -> list[str]:
skill_scores.sort(key=itemgetter(1), reverse=True)
return [skill for skill, _ in skill_scores[:top_n]]
def _normalize(self, value: float, min_val: float, max_val: float) -> float:
if max_val == min_val:
return 0.0
return (value - min_val) / (max_val - min_val)
def _is_skill_similar(self, skill1: str, skill2: str, threshold: float = 0.7) -> bool:
return ratio(skill1.lower().strip(), skill2.lower().strip()) >= threshold
def _calculate_skills_similarity(self, resume_skills: list[str], vacancy_skills: list[str]) -> float:
if not resume_skills or not vacancy_skills:
return 0.0
resume_skills_normalized = {skill.lower().strip() for skill in resume_skills}
vacancy_skills_normalized = {skill.lower().strip() for skill in vacancy_skills}
matched_resume_skills = set()
matched_vacancy_skills = set()
for resume_skill in resume_skills_normalized:
best_match_ratio = 0.0
best_match_skill = None
for vacancy_skill in vacancy_skills_normalized:
if vacancy_skill in matched_vacancy_skills:
continue
similarity_ratio = ratio(resume_skill, vacancy_skill)
if similarity_ratio > best_match_ratio:
best_match_ratio = similarity_ratio
best_match_skill = vacancy_skill
if best_match_ratio >= 0.7 and best_match_skill is not None:
matched_resume_skills.add(resume_skill)
matched_vacancy_skills.add(best_match_skill)
intersection_size = len(matched_resume_skills)
union_size = len(resume_skills_normalized | vacancy_skills_normalized)
if union_size == 0:
return 0.0
return intersection_size / union_size
+2 -2
View File
@@ -14,7 +14,7 @@ from fastapi.middleware.cors import CORSMiddleware
from template_project.ml.configuration import load_configuration from template_project.ml.configuration import load_configuration
from template_project.ml.ioc.make import make_ioc from template_project.ml.ioc.make import make_ioc
from template_project.ml.routes import embedding, healthcheck, predict from template_project.ml.routes import embed, healthcheck, predict
LOG_CONFIG: Final = { LOG_CONFIG: Final = {
"version": 1, "version": 1,
@@ -55,7 +55,7 @@ def make_asgi_application(
allow_headers=["*"], allow_headers=["*"],
) )
app.include_router(healthcheck.router) app.include_router(healthcheck.router)
app.include_router(embedding.router) app.include_router(embed.router)
app.include_router(predict.router) app.include_router(predict.router)
setup_dishka(container=ioc, app=app) setup_dishka(container=ioc, app=app)
@@ -1,38 +0,0 @@
from decimal import Decimal
from template_project.application.common.data_structure import to_data_structure
from template_project.application.common.interactor import to_interactor
from template_project.application.resume.entity import ResumeId
@to_data_structure
class VacancyInput:
vacancy_id: str
from_salary: Decimal
to_salary: Decimal
key_skills: list[str]
resume_similarity: float
@to_data_structure
class PredictSalaryRequest:
resume_id: ResumeId
key_skills: list[str]
vacancies: list[VacancyInput]
@to_data_structure
class PredictSalaryResponse:
salary_from: Decimal
salary_to: Decimal
recommended_skills: list[str]
@to_interactor
class PredictSalaryInteractor:
async def execute(self, request: PredictSalaryRequest) -> PredictSalaryResponse:
return PredictSalaryResponse(
salary_from=Decimal(50000),
salary_to=Decimal(80000),
recommended_skills=["python", "django", "postgresql"],
)
+1 -1
View File
@@ -1,6 +1,6 @@
from dishka import BaseScope, Provider, Scope, provide_all from dishka import BaseScope, Provider, Scope, provide_all
from template_project.ml.interactors.predict_salary import PredictSalaryInteractor from template_project.application.resume.interactors.predict_salary import PredictSalaryInteractor
class InteractorProvider(Provider): class InteractorProvider(Provider):
+8 -5
View File
@@ -6,7 +6,7 @@ from fastapi import APIRouter
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from template_project.application.resume.entity import ResumeId from template_project.application.resume.entity import ResumeId
from template_project.ml.interactors.predict_salary import ( from template_project.application.resume.interactors.predict_salary import (
PredictSalaryInteractor, PredictSalaryInteractor,
PredictSalaryRequest, PredictSalaryRequest,
VacancyInput, VacancyInput,
@@ -82,14 +82,17 @@ class PredictSalaryResponseModel(BaseModel):
@router.post( @router.post(
"/predict_salary", "/predict",
summary="Predict salary", summary="Predict salary and recommend skills",
description="Predict salary range and recommend skills based on resume and relevant vacancies", description="Predict salary range and recommend skills based on resume and relevant vacancies",
responses={ responses={
200: {"description": "Salary prediction generated successfully", "model": PredictSalaryResponseModel}, 200: {
"description": "Salary prediction and skills recommendation generated successfully",
"model": PredictSalaryResponseModel,
},
}, },
) )
async def predict_salary( async def predict(
request: PredictSalaryRequestModel, request: PredictSalaryRequestModel,
interactor: FromDishka[PredictSalaryInteractor], interactor: FromDishka[PredictSalaryInteractor],
) -> PredictSalaryResponseModel: ) -> PredictSalaryResponseModel:
Generated
+6 -21
View File
@@ -1438,29 +1438,14 @@ wheels = [
[[package]] [[package]]
name = "numpy" name = "numpy"
version = "2.1.2" version = "1.26.3"
source = { registry = "https://download.pytorch.org/whl/cpu" } source = { registry = "https://download.pytorch.org/whl/cpu" }
wheels = [ wheels = [
{ url = "https://download.pytorch.org/whl/numpy-2.1.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d7bf0a4f9f15b32b5ba53147369e94296f5fffb783db5aacc1be15b4bf72f43b" }, { url = "https://download.pytorch.org/whl/numpy-1.26.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a7081fd19a6d573e1a05e600c82a1c421011db7935ed0d5c483e9dd96b99cf13" },
{ url = "https://download.pytorch.org/whl/numpy-2.1.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b1d0fcae4f0949f215d4632be684a539859b295e2d0cb14f78ec231915d644db" }, { url = "https://download.pytorch.org/whl/numpy-1.26.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:12c70ac274b32bc00c7f61b515126c9205323703abb99cd41836e8125ea0043e" },
{ url = "https://download.pytorch.org/whl/numpy-2.1.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:f751ed0a2f250541e19dfca9f1eafa31a392c71c832b6bb9e113b10d050cb0f1" }, { url = "https://download.pytorch.org/whl/numpy-1.26.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7f784e13e598e9594750b2ef6729bcd5a47f6cfe4a12cca13def35e06d8163e3" },
{ url = "https://download.pytorch.org/whl/numpy-2.1.2-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:bd33f82e95ba7ad632bc57837ee99dba3d7e006536200c4e9124089e1bf42426" }, { url = "https://download.pytorch.org/whl/numpy-1.26.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f24750ef94d56ce6e33e4019a8a4d68cfdb1ef661a52cdaee628a56d2437419" },
{ url = "https://download.pytorch.org/whl/numpy-2.1.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b8cde4f11f0a975d1fd59373b32e2f5a562ade7cde4f85b7137f3de8fbb29a0" }, { url = "https://download.pytorch.org/whl/numpy-1.26.3-cp312-cp312-win_amd64.whl", hash = "sha256:da4b0c6c699a0ad73c810736303f7fbae483bcb012e38d7eb06a5e3b432c981b" },
{ url = "https://download.pytorch.org/whl/numpy-2.1.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d95f286b8244b3649b477ac066c6906fbb2905f8ac19b170e2175d3d799f4df" },
{ url = "https://download.pytorch.org/whl/numpy-2.1.2-cp312-cp312-win_amd64.whl", hash = "sha256:456e3b11cb79ac9946c822a56346ec80275eaf2950314b249b512896c0d2505e" },
{ url = "https://download.pytorch.org/whl/numpy-2.1.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a84498e0d0a1174f2b3ed769b67b656aa5460c92c9554039e11f20a05650f00d" },
{ url = "https://download.pytorch.org/whl/numpy-2.1.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4d6ec0d4222e8ffdab1744da2560f07856421b367928026fb540e1945f2eeeaf" },
{ url = "https://download.pytorch.org/whl/numpy-2.1.2-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:259ec80d54999cc34cd1eb8ded513cb053c3bf4829152a2e00de2371bd406f5e" },
{ url = "https://download.pytorch.org/whl/numpy-2.1.2-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:675c741d4739af2dc20cd6c6a5c4b7355c728167845e3c6b0e824e4e5d36a6c3" },
{ url = "https://download.pytorch.org/whl/numpy-2.1.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05b2d4e667895cc55e3ff2b56077e4c8a5604361fc21a042845ea3ad67465aa8" },
{ url = "https://download.pytorch.org/whl/numpy-2.1.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:43cca367bf94a14aca50b89e9bc2061683116cfe864e56740e083392f533ce7a" },
{ url = "https://download.pytorch.org/whl/numpy-2.1.2-cp313-cp313-win_amd64.whl", hash = "sha256:f2ded8d9b6f68cc26f8425eda5d3877b47343e68ca23d0d0846f4d312ecaa445" },
{ url = "https://download.pytorch.org/whl/numpy-2.1.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:2ffef621c14ebb0188a8633348504a35c13680d6da93ab5cb86f4e54b7e922b5" },
{ url = "https://download.pytorch.org/whl/numpy-2.1.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:ad369ed238b1959dfbade9018a740fb9392c5ac4f9b5173f420bd4f37ba1f7a0" },
{ url = "https://download.pytorch.org/whl/numpy-2.1.2-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:d82075752f40c0ddf57e6e02673a17f6cb0f8eb3f587f63ca1eaab5594da5b17" },
{ url = "https://download.pytorch.org/whl/numpy-2.1.2-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:1600068c262af1ca9580a527d43dc9d959b0b1d8e56f8a05d830eea39b7c8af6" },
{ url = "https://download.pytorch.org/whl/numpy-2.1.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a26ae94658d3ba3781d5e103ac07a876b3e9b29db53f68ed7df432fd033358a8" },
{ url = "https://download.pytorch.org/whl/numpy-2.1.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13311c2db4c5f7609b462bc0f43d3c465424d25c626d95040f073e30f7570e35" },
] ]
[[package]] [[package]]