feat(): prediction pipeline

This commit is contained in:
gitgernit
2025-11-23 04:11:52 +03:00
parent 2e6214a5ec
commit d1c7641698
25 changed files with 224 additions and 244 deletions
@@ -15,7 +15,7 @@ from template_project.application.user.entity import UserId
class ResumeDataGateway(Protocol):
@abstractmethod
async def load(self, resume_id: ResumeId) -> Resume:
async def load_by_resume_id(self, resume_id: ResumeId) -> Resume | None:
raise NotImplementedError
@abstractmethod
@@ -10,7 +10,6 @@ from template_project.application.resume.entity import (
ResumeId,
ResumeProject,
)
from template_project.application.resume.interactors.resume_embedding import ResumeEmbeddingInteractor
@to_data_structure
@@ -38,8 +37,6 @@ class ProjectInput:
class AddResumeInteractor:
unit_of_work: UnitOfWork
identity_provider: IdentityProvider
# TODO: переделать в фоновую таску
resume_embedding_interactor: ResumeEmbeddingInteractor
async def execute(
self,
@@ -97,8 +94,6 @@ class AddResumeInteractor:
)
await self.unit_of_work.add(resume_project)
await self.resume_embedding_interactor.run(resume)
await self.unit_of_work.commit()
return resume.id
@@ -16,7 +16,7 @@ from template_project.application.resume.entity import (
ResumeId,
ResumeProject,
)
from template_project.application.resume.errors import ResumeDoesBelongUserError
from template_project.application.resume.errors import ResumeDoesBelongUserError, ResumeNotFoundError
@to_data_structure
@@ -96,7 +96,9 @@ class EditResumeInteractor:
projects: list[ProjectInput] | None = None,
) -> EditResumeResponse:
user = await self.identity_provider.get_current_user()
old_resume = await self.resume_data_gateway.load(resume_id)
old_resume = await self.resume_data_gateway.load_by_resume_id(resume_id)
if old_resume is None:
raise ResumeNotFoundError(resume_id=resume_id)
if old_resume.user_id != user.id:
raise ResumeDoesBelongUserError
@@ -12,7 +12,7 @@ from template_project.application.resume.data_gateway import (
ResumeProjectDataGateway,
)
from template_project.application.resume.entity import ResumeId
from template_project.application.resume.errors import ResumeDoesBelongUserError
from template_project.application.resume.errors import ResumeDoesBelongUserError, ResumeNotFoundError
@to_data_structure
@@ -72,7 +72,9 @@ class GetResumeInteractor:
) -> GetResumeResponse:
user = await self.identity_provider.get_current_user()
resume = await self.resume_data_gateway.load(resume_id)
resume = await self.resume_data_gateway.load_by_resume_id(resume_id)
if resume is None:
raise ResumeNotFoundError(resume_id=resume_id)
if resume.user_id != user.id:
raise ResumeDoesBelongUserError
@@ -167,7 +169,9 @@ class GetResumeHistoryInteractor:
async def execute(self, resume_id: ResumeId) -> list[ResumeListItemResponse]:
user = await self.identity_provider.get_current_user()
resume = await self.resume_data_gateway.load(resume_id)
resume = await self.resume_data_gateway.load_by_resume_id(resume_id)
if resume is None:
raise ResumeNotFoundError(resume_id=resume_id)
if resume.user_id != user.id:
raise ResumeDoesBelongUserError
@@ -33,7 +33,7 @@ class PredictSalaryResponse:
@to_interactor
class PredictSalaryInteractor:
class PredictModelInteractor:
async def execute(self, request: PredictSalaryRequest) -> PredictSalaryResponse:
salary_from, salary_to = self._predict_salary(request.vacancies, request.key_skills)
recommended_skills = self._recommend_skills(request.vacancies, request.key_skills)
@@ -0,0 +1,102 @@
from typing import Final
from Levenshtein import ratio
from template_project.application.common.data_structure import to_data_structure
from template_project.application.common.interactor import to_interactor
from template_project.application.common.unit_of_work import UnitOfWork
from template_project.application.resume.data_gateway import ResumeDataGateway
from template_project.application.resume.entity import Resume, ResumeEmbedding, ResumeId
from template_project.application.resume.resume_prediction_generator import ResumePredictionGenerator
from template_project.application.resume.vector_generator import ResumeEmbeddingVectorGenerator
from template_project.application.vacancy.data_gateway import VacancyDataGateway
from template_project.application.vacancy.data_structure import SuitableVacancy
EMBEDDING_TEXT_TEMPLATE: Final = """
Позиция: {position}
Опыт: {experience_type}
Ключевые навыки: {key_skills}
Описание: {about_me}
"""
def _calculate_skills_matching(resume_skills: list[str], vacancy_skills: list[str]) -> float:
count_skills = 0
ratio_skill_sum = 0.0
for resume_key_skill in resume_skills:
for vacancy_key_skill in vacancy_skills:
ratio_skill = ratio(resume_key_skill, vacancy_key_skill)
if ratio_skill != 0:
count_skills += 1
ratio_skill_sum += ratio_skill
try:
return ratio_skill_sum / count_skills
except ZeroDivisionError:
return 0.0
def _filter_and_sort_vacancies(
resume: Resume,
suitable_vacancies: list[SuitableVacancy],
limit: int = 50,
) -> list[SuitableVacancy]:
def is_suitable(vacancy: SuitableVacancy) -> bool:
experience_match = resume.experience_type == vacancy.vacancy.experience_type
skills_matching = _calculate_skills_matching(resume.key_skills, vacancy.vacancy.key_skills)
skills_match = skills_matching >= 0.5
return experience_match and skills_match
filtered = [v for v in suitable_vacancies if is_suitable(v)]
if len(filtered) >= limit:
filtered.sort(key=lambda v: v.resume_similarity, reverse=True)
return filtered[:limit]
remaining = [v for v in suitable_vacancies if v not in filtered]
remaining.sort(key=lambda v: v.resume_similarity, reverse=True)
total_needed = limit - len(filtered)
return filtered + remaining[:total_needed]
@to_data_structure
class PredictResumeRequest:
resume_id: ResumeId
@to_interactor
class ResumePredictionInteractor:
unit_of_work: UnitOfWork
resume_data_gateway: ResumeDataGateway
vacancy_data_gateway: VacancyDataGateway
vector_generator: ResumeEmbeddingVectorGenerator
resume_prediction_generator: ResumePredictionGenerator
async def execute(self, request: PredictResumeRequest) -> None:
resume = await self.resume_data_gateway.load_by_resume_id(request.resume_id)
if resume is None:
return
embedding_text = EMBEDDING_TEXT_TEMPLATE.format_map({
"position": resume.position,
"experience_type": resume.experience_type.value,
"key_skills": ", ".join(resume.key_skills),
"about_me": resume.about_me,
})
vector = await self.vector_generator.generate(embedding_text)
resume_embedding = ResumeEmbedding.factory(
resume_id=resume.id,
vector=vector,
)
suitable_vacancies_list = list(await self.vacancy_data_gateway.get_suitable(resume_embedding.vector))
suitable_vacancies_filtered = _filter_and_sort_vacancies(resume, suitable_vacancies_list, limit=50)
resume_prediction = await self.resume_prediction_generator.generate(
resume=resume,
suitable_vacancies=suitable_vacancies_filtered,
)
await self.unit_of_work.add(resume_embedding, resume_prediction)
await self.unit_of_work.commit()
@@ -1,76 +0,0 @@
from collections.abc import Callable
from Levenshtein import ratio
from template_project.application.common.unit_of_work import UnitOfWork
from template_project.application.resume.entity import Resume, ResumeEmbedding
from template_project.application.resume.resume_prediction_generator import ResumePredictionGenerator
from template_project.application.resume.vector_generator import ResumeEmbeddingVectorGenerator
from template_project.application.vacancy.data_gateway import VacancyDataGateway
from template_project.application.vacancy.data_structure import SuitableVacancy
def suitable_vacancies_key(
resume: Resume,
) -> Callable[[SuitableVacancy], tuple[bool, bool]]:
def wrapper(suitable_vacancy: SuitableVacancy) -> tuple[bool, bool]:
count_skills = 0
ratio_skill_sum = 0.0
for resum_key_skill in resume.key_skills:
for suitable_resume_key_skill in suitable_vacancy.vacancy.key_skills:
ratio_skill = ratio(resum_key_skill, suitable_resume_key_skill)
if ratio_skill != 0:
count_skills += 1
ratio_skill_sum += ratio_skill
try:
matching_skills = ratio_skill_sum / count_skills
except ZeroDivisionError:
matching_skills = 0
return resume.experience_type == suitable_vacancy.vacancy.experience_type, matching_skills >= 50
return wrapper
class ResumeEmbeddingInteractor:
def __init__(
self,
unit_of_work: UnitOfWork,
vacancy_data_gateway: VacancyDataGateway,
vector_generator: ResumeEmbeddingVectorGenerator,
resume_prediction_generator: ResumePredictionGenerator,
) -> None:
self.unit_of_work = unit_of_work
self.vector_generator = vector_generator
self.vacancy_data_gateway = vacancy_data_gateway
self.resume_prediction_generator = resume_prediction_generator
async def run(
self,
resume: Resume,
) -> None:
vector = await self.vector_generator.generate(
position=resume.position,
about_me=resume.about_me,
key_skills=resume.key_skills,
experience_type=resume.experience_type,
)
resume_embedding = ResumeEmbedding.factory(
resume_id=resume.id,
vector=vector,
)
suitable_vacancies = await self.vacancy_data_gateway.get_suitable(resume_embedding.vector)
suitable_vacancies_filtered = sorted(
suitable_vacancies,
key=suitable_vacancies_key(resume),
)[:50]
resume_prediction = await self.resume_prediction_generator.generate(
resume=resume,
suituble_vacancies=suitable_vacancies_filtered,
)
await self.unit_of_work.add(resume_embedding, resume_prediction)
await self.unit_of_work.commit()
@@ -11,6 +11,6 @@ class ResumePredictionGenerator(Protocol):
async def generate(
self,
resume: Resume,
suituble_vacancies: Sequence[SuitableVacancy],
suitable_vacancies: Sequence[SuitableVacancy],
) -> ResumePrediction:
raise NotImplementedError
@@ -1,15 +1,10 @@
from abc import abstractmethod
from template_project.application.common.enums import ExperienceType
class ResumeEmbeddingVectorGenerator:
@abstractmethod
async def generate(
self,
position: str,
about_me: str,
experience_type: ExperienceType,
key_skills: list[str],
text: str,
) -> list[float]:
raise NotImplementedError