You've already forked RekomenciBackend
feat(): prediction pipeline
This commit is contained in:
@@ -15,7 +15,7 @@ from template_project.application.user.entity import UserId
|
||||
|
||||
class ResumeDataGateway(Protocol):
|
||||
@abstractmethod
|
||||
async def load(self, resume_id: ResumeId) -> Resume:
|
||||
async def load_by_resume_id(self, resume_id: ResumeId) -> Resume | None:
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
|
||||
@@ -10,7 +10,6 @@ from template_project.application.resume.entity import (
|
||||
ResumeId,
|
||||
ResumeProject,
|
||||
)
|
||||
from template_project.application.resume.interactors.resume_embedding import ResumeEmbeddingInteractor
|
||||
|
||||
|
||||
@to_data_structure
|
||||
@@ -38,8 +37,6 @@ class ProjectInput:
|
||||
class AddResumeInteractor:
|
||||
unit_of_work: UnitOfWork
|
||||
identity_provider: IdentityProvider
|
||||
# TODO: переделать в фоновую таску
|
||||
resume_embedding_interactor: ResumeEmbeddingInteractor
|
||||
|
||||
async def execute(
|
||||
self,
|
||||
@@ -97,8 +94,6 @@ class AddResumeInteractor:
|
||||
)
|
||||
await self.unit_of_work.add(resume_project)
|
||||
|
||||
await self.resume_embedding_interactor.run(resume)
|
||||
|
||||
await self.unit_of_work.commit()
|
||||
|
||||
return resume.id
|
||||
|
||||
@@ -16,7 +16,7 @@ from template_project.application.resume.entity import (
|
||||
ResumeId,
|
||||
ResumeProject,
|
||||
)
|
||||
from template_project.application.resume.errors import ResumeDoesBelongUserError
|
||||
from template_project.application.resume.errors import ResumeDoesBelongUserError, ResumeNotFoundError
|
||||
|
||||
|
||||
@to_data_structure
|
||||
@@ -96,7 +96,9 @@ class EditResumeInteractor:
|
||||
projects: list[ProjectInput] | None = None,
|
||||
) -> EditResumeResponse:
|
||||
user = await self.identity_provider.get_current_user()
|
||||
old_resume = await self.resume_data_gateway.load(resume_id)
|
||||
old_resume = await self.resume_data_gateway.load_by_resume_id(resume_id)
|
||||
if old_resume is None:
|
||||
raise ResumeNotFoundError(resume_id=resume_id)
|
||||
if old_resume.user_id != user.id:
|
||||
raise ResumeDoesBelongUserError
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ from template_project.application.resume.data_gateway import (
|
||||
ResumeProjectDataGateway,
|
||||
)
|
||||
from template_project.application.resume.entity import ResumeId
|
||||
from template_project.application.resume.errors import ResumeDoesBelongUserError
|
||||
from template_project.application.resume.errors import ResumeDoesBelongUserError, ResumeNotFoundError
|
||||
|
||||
|
||||
@to_data_structure
|
||||
@@ -72,7 +72,9 @@ class GetResumeInteractor:
|
||||
) -> GetResumeResponse:
|
||||
user = await self.identity_provider.get_current_user()
|
||||
|
||||
resume = await self.resume_data_gateway.load(resume_id)
|
||||
resume = await self.resume_data_gateway.load_by_resume_id(resume_id)
|
||||
if resume is None:
|
||||
raise ResumeNotFoundError(resume_id=resume_id)
|
||||
|
||||
if resume.user_id != user.id:
|
||||
raise ResumeDoesBelongUserError
|
||||
@@ -167,7 +169,9 @@ class GetResumeHistoryInteractor:
|
||||
async def execute(self, resume_id: ResumeId) -> list[ResumeListItemResponse]:
|
||||
user = await self.identity_provider.get_current_user()
|
||||
|
||||
resume = await self.resume_data_gateway.load(resume_id)
|
||||
resume = await self.resume_data_gateway.load_by_resume_id(resume_id)
|
||||
if resume is None:
|
||||
raise ResumeNotFoundError(resume_id=resume_id)
|
||||
if resume.user_id != user.id:
|
||||
raise ResumeDoesBelongUserError
|
||||
|
||||
|
||||
+1
-1
@@ -33,7 +33,7 @@ class PredictSalaryResponse:
|
||||
|
||||
|
||||
@to_interactor
|
||||
class PredictSalaryInteractor:
|
||||
class PredictModelInteractor:
|
||||
async def execute(self, request: PredictSalaryRequest) -> PredictSalaryResponse:
|
||||
salary_from, salary_to = self._predict_salary(request.vacancies, request.key_skills)
|
||||
recommended_skills = self._recommend_skills(request.vacancies, request.key_skills)
|
||||
@@ -0,0 +1,102 @@
|
||||
from typing import Final
|
||||
|
||||
from Levenshtein import ratio
|
||||
|
||||
from template_project.application.common.data_structure import to_data_structure
|
||||
from template_project.application.common.interactor import to_interactor
|
||||
from template_project.application.common.unit_of_work import UnitOfWork
|
||||
from template_project.application.resume.data_gateway import ResumeDataGateway
|
||||
from template_project.application.resume.entity import Resume, ResumeEmbedding, ResumeId
|
||||
from template_project.application.resume.resume_prediction_generator import ResumePredictionGenerator
|
||||
from template_project.application.resume.vector_generator import ResumeEmbeddingVectorGenerator
|
||||
from template_project.application.vacancy.data_gateway import VacancyDataGateway
|
||||
from template_project.application.vacancy.data_structure import SuitableVacancy
|
||||
|
||||
EMBEDDING_TEXT_TEMPLATE: Final = """
|
||||
Позиция: {position}
|
||||
Опыт: {experience_type}
|
||||
Ключевые навыки: {key_skills}
|
||||
Описание: {about_me}
|
||||
"""
|
||||
|
||||
|
||||
def _calculate_skills_matching(resume_skills: list[str], vacancy_skills: list[str]) -> float:
|
||||
count_skills = 0
|
||||
ratio_skill_sum = 0.0
|
||||
for resume_key_skill in resume_skills:
|
||||
for vacancy_key_skill in vacancy_skills:
|
||||
ratio_skill = ratio(resume_key_skill, vacancy_key_skill)
|
||||
if ratio_skill != 0:
|
||||
count_skills += 1
|
||||
ratio_skill_sum += ratio_skill
|
||||
|
||||
try:
|
||||
return ratio_skill_sum / count_skills
|
||||
except ZeroDivisionError:
|
||||
return 0.0
|
||||
|
||||
|
||||
def _filter_and_sort_vacancies(
|
||||
resume: Resume,
|
||||
suitable_vacancies: list[SuitableVacancy],
|
||||
limit: int = 50,
|
||||
) -> list[SuitableVacancy]:
|
||||
def is_suitable(vacancy: SuitableVacancy) -> bool:
|
||||
experience_match = resume.experience_type == vacancy.vacancy.experience_type
|
||||
skills_matching = _calculate_skills_matching(resume.key_skills, vacancy.vacancy.key_skills)
|
||||
skills_match = skills_matching >= 0.5
|
||||
return experience_match and skills_match
|
||||
|
||||
filtered = [v for v in suitable_vacancies if is_suitable(v)]
|
||||
|
||||
if len(filtered) >= limit:
|
||||
filtered.sort(key=lambda v: v.resume_similarity, reverse=True)
|
||||
return filtered[:limit]
|
||||
|
||||
remaining = [v for v in suitable_vacancies if v not in filtered]
|
||||
remaining.sort(key=lambda v: v.resume_similarity, reverse=True)
|
||||
|
||||
total_needed = limit - len(filtered)
|
||||
return filtered + remaining[:total_needed]
|
||||
|
||||
|
||||
@to_data_structure
|
||||
class PredictResumeRequest:
|
||||
resume_id: ResumeId
|
||||
|
||||
|
||||
@to_interactor
|
||||
class ResumePredictionInteractor:
|
||||
unit_of_work: UnitOfWork
|
||||
resume_data_gateway: ResumeDataGateway
|
||||
vacancy_data_gateway: VacancyDataGateway
|
||||
vector_generator: ResumeEmbeddingVectorGenerator
|
||||
resume_prediction_generator: ResumePredictionGenerator
|
||||
|
||||
async def execute(self, request: PredictResumeRequest) -> None:
|
||||
resume = await self.resume_data_gateway.load_by_resume_id(request.resume_id)
|
||||
if resume is None:
|
||||
return
|
||||
|
||||
embedding_text = EMBEDDING_TEXT_TEMPLATE.format_map({
|
||||
"position": resume.position,
|
||||
"experience_type": resume.experience_type.value,
|
||||
"key_skills": ", ".join(resume.key_skills),
|
||||
"about_me": resume.about_me,
|
||||
})
|
||||
vector = await self.vector_generator.generate(embedding_text)
|
||||
resume_embedding = ResumeEmbedding.factory(
|
||||
resume_id=resume.id,
|
||||
vector=vector,
|
||||
)
|
||||
|
||||
suitable_vacancies_list = list(await self.vacancy_data_gateway.get_suitable(resume_embedding.vector))
|
||||
suitable_vacancies_filtered = _filter_and_sort_vacancies(resume, suitable_vacancies_list, limit=50)
|
||||
|
||||
resume_prediction = await self.resume_prediction_generator.generate(
|
||||
resume=resume,
|
||||
suitable_vacancies=suitable_vacancies_filtered,
|
||||
)
|
||||
|
||||
await self.unit_of_work.add(resume_embedding, resume_prediction)
|
||||
await self.unit_of_work.commit()
|
||||
@@ -1,76 +0,0 @@
|
||||
from collections.abc import Callable
|
||||
|
||||
from Levenshtein import ratio
|
||||
|
||||
from template_project.application.common.unit_of_work import UnitOfWork
|
||||
from template_project.application.resume.entity import Resume, ResumeEmbedding
|
||||
from template_project.application.resume.resume_prediction_generator import ResumePredictionGenerator
|
||||
from template_project.application.resume.vector_generator import ResumeEmbeddingVectorGenerator
|
||||
from template_project.application.vacancy.data_gateway import VacancyDataGateway
|
||||
from template_project.application.vacancy.data_structure import SuitableVacancy
|
||||
|
||||
|
||||
def suitable_vacancies_key(
|
||||
resume: Resume,
|
||||
) -> Callable[[SuitableVacancy], tuple[bool, bool]]:
|
||||
def wrapper(suitable_vacancy: SuitableVacancy) -> tuple[bool, bool]:
|
||||
count_skills = 0
|
||||
ratio_skill_sum = 0.0
|
||||
for resum_key_skill in resume.key_skills:
|
||||
for suitable_resume_key_skill in suitable_vacancy.vacancy.key_skills:
|
||||
ratio_skill = ratio(resum_key_skill, suitable_resume_key_skill)
|
||||
if ratio_skill != 0:
|
||||
count_skills += 1
|
||||
ratio_skill_sum += ratio_skill
|
||||
|
||||
try:
|
||||
matching_skills = ratio_skill_sum / count_skills
|
||||
except ZeroDivisionError:
|
||||
matching_skills = 0
|
||||
|
||||
return resume.experience_type == suitable_vacancy.vacancy.experience_type, matching_skills >= 50
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
class ResumeEmbeddingInteractor:
|
||||
def __init__(
|
||||
self,
|
||||
unit_of_work: UnitOfWork,
|
||||
vacancy_data_gateway: VacancyDataGateway,
|
||||
vector_generator: ResumeEmbeddingVectorGenerator,
|
||||
resume_prediction_generator: ResumePredictionGenerator,
|
||||
) -> None:
|
||||
self.unit_of_work = unit_of_work
|
||||
self.vector_generator = vector_generator
|
||||
self.vacancy_data_gateway = vacancy_data_gateway
|
||||
self.resume_prediction_generator = resume_prediction_generator
|
||||
|
||||
async def run(
|
||||
self,
|
||||
resume: Resume,
|
||||
) -> None:
|
||||
vector = await self.vector_generator.generate(
|
||||
position=resume.position,
|
||||
about_me=resume.about_me,
|
||||
key_skills=resume.key_skills,
|
||||
experience_type=resume.experience_type,
|
||||
)
|
||||
resume_embedding = ResumeEmbedding.factory(
|
||||
resume_id=resume.id,
|
||||
vector=vector,
|
||||
)
|
||||
|
||||
suitable_vacancies = await self.vacancy_data_gateway.get_suitable(resume_embedding.vector)
|
||||
suitable_vacancies_filtered = sorted(
|
||||
suitable_vacancies,
|
||||
key=suitable_vacancies_key(resume),
|
||||
)[:50]
|
||||
|
||||
resume_prediction = await self.resume_prediction_generator.generate(
|
||||
resume=resume,
|
||||
suituble_vacancies=suitable_vacancies_filtered,
|
||||
)
|
||||
|
||||
await self.unit_of_work.add(resume_embedding, resume_prediction)
|
||||
await self.unit_of_work.commit()
|
||||
@@ -11,6 +11,6 @@ class ResumePredictionGenerator(Protocol):
|
||||
async def generate(
|
||||
self,
|
||||
resume: Resume,
|
||||
suituble_vacancies: Sequence[SuitableVacancy],
|
||||
suitable_vacancies: Sequence[SuitableVacancy],
|
||||
) -> ResumePrediction:
|
||||
raise NotImplementedError
|
||||
|
||||
@@ -1,15 +1,10 @@
|
||||
from abc import abstractmethod
|
||||
|
||||
from template_project.application.common.enums import ExperienceType
|
||||
|
||||
|
||||
class ResumeEmbeddingVectorGenerator:
|
||||
@abstractmethod
|
||||
async def generate(
|
||||
self,
|
||||
position: str,
|
||||
about_me: str,
|
||||
experience_type: ExperienceType,
|
||||
key_skills: list[str],
|
||||
text: str,
|
||||
) -> list[float]:
|
||||
raise NotImplementedError
|
||||
|
||||
Reference in New Issue
Block a user