You've already forked RekomenciBackend
Merge branch 'ml'
This commit is contained in:
@@ -0,0 +1,208 @@
|
||||
from collections import defaultdict
|
||||
from decimal import Decimal
|
||||
from operator import itemgetter
|
||||
|
||||
from Levenshtein import ratio
|
||||
|
||||
from template_project.application.common.data_structure import to_data_structure
|
||||
from template_project.application.common.interactor import to_interactor
|
||||
from template_project.application.resume.entity import ResumeId
|
||||
|
||||
|
||||
@to_data_structure
|
||||
class VacancyInput:
|
||||
vacancy_id: str
|
||||
from_salary: Decimal
|
||||
to_salary: Decimal
|
||||
key_skills: list[str]
|
||||
resume_similarity: float
|
||||
|
||||
|
||||
@to_data_structure
|
||||
class PredictSalaryRequest:
|
||||
resume_id: ResumeId
|
||||
key_skills: list[str]
|
||||
vacancies: list[VacancyInput]
|
||||
|
||||
|
||||
@to_data_structure
|
||||
class PredictSalaryResponse:
|
||||
salary_from: Decimal
|
||||
salary_to: Decimal
|
||||
recommended_skills: list[str]
|
||||
|
||||
|
||||
@to_interactor
|
||||
class PredictSalaryInteractor:
|
||||
async def execute(self, request: PredictSalaryRequest) -> PredictSalaryResponse:
|
||||
salary_from, salary_to = self._predict_salary(request.vacancies, request.key_skills)
|
||||
recommended_skills = self._recommend_skills(request.vacancies, request.key_skills)
|
||||
|
||||
return PredictSalaryResponse(
|
||||
salary_from=salary_from,
|
||||
salary_to=salary_to,
|
||||
recommended_skills=recommended_skills,
|
||||
)
|
||||
|
||||
def _predict_salary(self, vacancies: list[VacancyInput], resume_skills: list[str]) -> tuple[Decimal, Decimal]:
|
||||
if not vacancies:
|
||||
return Decimal(50000), Decimal(80000)
|
||||
|
||||
vacancy_weights: list[float] = []
|
||||
for vacancy in vacancies:
|
||||
skills_similarity = self._calculate_skills_similarity(resume_skills, vacancy.key_skills)
|
||||
vacancy_weight = 0.8 * vacancy.resume_similarity + 0.2 * skills_similarity
|
||||
vacancy_weights.append(vacancy_weight)
|
||||
|
||||
total_weight = sum(vacancy_weights)
|
||||
if total_weight == 0:
|
||||
return Decimal(50000), Decimal(80000)
|
||||
|
||||
weighted_from_sum = Decimal(0)
|
||||
weighted_to_sum = Decimal(0)
|
||||
|
||||
for vacancy, weight in zip(vacancies, vacancy_weights, strict=False):
|
||||
weighted_from_sum += vacancy.from_salary * Decimal(str(weight))
|
||||
weighted_to_sum += vacancy.to_salary * Decimal(str(weight))
|
||||
|
||||
predicted_from = weighted_from_sum / Decimal(str(total_weight))
|
||||
predicted_to = weighted_to_sum / Decimal(str(total_weight))
|
||||
|
||||
return predicted_from.quantize(Decimal("0.01")), predicted_to.quantize(Decimal("0.01"))
|
||||
|
||||
def _recommend_skills(
|
||||
self,
|
||||
vacancies: list[VacancyInput],
|
||||
resume_skills: list[str],
|
||||
) -> list[str]:
|
||||
if not vacancies:
|
||||
return []
|
||||
|
||||
skill_salaries, skill_frequencies = self._collect_skill_statistics(vacancies)
|
||||
|
||||
filtered_skills = self._filter_skills_by_frequency(skill_frequencies, min_frequency=3)
|
||||
|
||||
candidate_skills = self._filter_skills_by_resume_similarity(filtered_skills, resume_skills)
|
||||
|
||||
if not candidate_skills:
|
||||
return []
|
||||
|
||||
skill_scores = self._calculate_skill_scores(candidate_skills, skill_salaries, skill_frequencies)
|
||||
|
||||
return self._get_top_skills(skill_scores, top_n=3)
|
||||
|
||||
def _collect_skill_statistics(
|
||||
self, vacancies: list[VacancyInput]
|
||||
) -> tuple[dict[str, list[Decimal]], dict[str, int]]:
|
||||
skill_salaries: dict[str, list[Decimal]] = defaultdict(list)
|
||||
skill_frequencies: dict[str, int] = defaultdict(int)
|
||||
|
||||
for vacancy in vacancies:
|
||||
avg_salary = (vacancy.from_salary + vacancy.to_salary) / Decimal(2)
|
||||
|
||||
for skill in vacancy.key_skills:
|
||||
normalized_skill = skill.lower().strip()
|
||||
skill_salaries[normalized_skill].append(avg_salary)
|
||||
skill_frequencies[normalized_skill] += 1
|
||||
|
||||
return skill_salaries, skill_frequencies
|
||||
|
||||
def _filter_skills_by_frequency(
|
||||
self,
|
||||
skill_frequencies: dict[str, int],
|
||||
min_frequency: int = 3,
|
||||
) -> set[str]:
|
||||
return {skill for skill, frequency in skill_frequencies.items() if frequency >= min_frequency}
|
||||
|
||||
def _filter_skills_by_resume_similarity(
|
||||
self,
|
||||
skills: set[str],
|
||||
resume_skills: list[str],
|
||||
) -> list[str]:
|
||||
resume_skills_normalized = {skill.lower().strip() for skill in resume_skills}
|
||||
|
||||
candidate_skills: list[str] = []
|
||||
for skill in skills:
|
||||
is_already_in_resume = any(
|
||||
self._is_skill_similar(skill, resume_skill) for resume_skill in resume_skills_normalized
|
||||
)
|
||||
if not is_already_in_resume:
|
||||
candidate_skills.append(skill)
|
||||
|
||||
return candidate_skills
|
||||
|
||||
def _calculate_skill_scores(
|
||||
self,
|
||||
candidate_skills: list[str],
|
||||
skill_salaries: dict[str, list[Decimal]],
|
||||
skill_frequencies: dict[str, int],
|
||||
) -> list[tuple[str, float]]:
|
||||
skill_avg_salaries: dict[str, Decimal] = {
|
||||
skill: sum(salaries) / Decimal(str(len(salaries)))
|
||||
for skill, salaries in skill_salaries.items()
|
||||
if skill in candidate_skills
|
||||
}
|
||||
|
||||
frequencies = [skill_frequencies[skill] for skill in candidate_skills]
|
||||
avg_salaries = [float(skill_avg_salaries[skill]) for skill in candidate_skills]
|
||||
|
||||
min_freq = min(frequencies)
|
||||
max_freq = max(frequencies)
|
||||
min_salary = min(avg_salaries)
|
||||
max_salary = max(avg_salaries)
|
||||
|
||||
skill_scores: list[tuple[str, float]] = []
|
||||
for skill in candidate_skills:
|
||||
normalized_freq = self._normalize(float(skill_frequencies[skill]), min_freq, max_freq)
|
||||
normalized_salary = self._normalize(float(skill_avg_salaries[skill]), min_salary, max_salary)
|
||||
score = normalized_freq + normalized_salary
|
||||
skill_scores.append((skill, score))
|
||||
|
||||
return skill_scores
|
||||
|
||||
def _get_top_skills(self, skill_scores: list[tuple[str, float]], top_n: int = 3) -> list[str]:
|
||||
skill_scores.sort(key=itemgetter(1), reverse=True)
|
||||
return [skill for skill, _ in skill_scores[:top_n]]
|
||||
|
||||
def _normalize(self, value: float, min_val: float, max_val: float) -> float:
|
||||
if max_val == min_val:
|
||||
return 0.0
|
||||
return (value - min_val) / (max_val - min_val)
|
||||
|
||||
def _is_skill_similar(self, skill1: str, skill2: str, threshold: float = 0.7) -> bool:
|
||||
return ratio(skill1.lower().strip(), skill2.lower().strip()) >= threshold
|
||||
|
||||
def _calculate_skills_similarity(self, resume_skills: list[str], vacancy_skills: list[str]) -> float:
|
||||
if not resume_skills or not vacancy_skills:
|
||||
return 0.0
|
||||
|
||||
resume_skills_normalized = {skill.lower().strip() for skill in resume_skills}
|
||||
vacancy_skills_normalized = {skill.lower().strip() for skill in vacancy_skills}
|
||||
|
||||
matched_resume_skills = set()
|
||||
matched_vacancy_skills = set()
|
||||
|
||||
for resume_skill in resume_skills_normalized:
|
||||
best_match_ratio = 0.0
|
||||
best_match_skill = None
|
||||
|
||||
for vacancy_skill in vacancy_skills_normalized:
|
||||
if vacancy_skill in matched_vacancy_skills:
|
||||
continue
|
||||
|
||||
similarity_ratio = ratio(resume_skill, vacancy_skill)
|
||||
if similarity_ratio > best_match_ratio:
|
||||
best_match_ratio = similarity_ratio
|
||||
best_match_skill = vacancy_skill
|
||||
|
||||
if best_match_ratio >= 0.7 and best_match_skill is not None:
|
||||
matched_resume_skills.add(resume_skill)
|
||||
matched_vacancy_skills.add(best_match_skill)
|
||||
|
||||
intersection_size = len(matched_resume_skills)
|
||||
union_size = len(resume_skills_normalized | vacancy_skills_normalized)
|
||||
|
||||
if union_size == 0:
|
||||
return 0.0
|
||||
|
||||
return intersection_size / union_size
|
||||
@@ -14,7 +14,7 @@ from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
from template_project.ml.configuration import load_configuration
|
||||
from template_project.ml.ioc.make import make_ioc
|
||||
from template_project.ml.routes import embedding, healthcheck, predict
|
||||
from template_project.ml.routes import embed, healthcheck, predict
|
||||
|
||||
LOG_CONFIG: Final = {
|
||||
"version": 1,
|
||||
@@ -55,7 +55,7 @@ def make_asgi_application(
|
||||
allow_headers=["*"],
|
||||
)
|
||||
app.include_router(healthcheck.router)
|
||||
app.include_router(embedding.router)
|
||||
app.include_router(embed.router)
|
||||
app.include_router(predict.router)
|
||||
|
||||
setup_dishka(container=ioc, app=app)
|
||||
|
||||
@@ -1,38 +0,0 @@
|
||||
from decimal import Decimal
|
||||
|
||||
from template_project.application.common.data_structure import to_data_structure
|
||||
from template_project.application.common.interactor import to_interactor
|
||||
from template_project.application.resume.entity import ResumeId
|
||||
|
||||
|
||||
@to_data_structure
|
||||
class VacancyInput:
|
||||
vacancy_id: str
|
||||
from_salary: Decimal
|
||||
to_salary: Decimal
|
||||
key_skills: list[str]
|
||||
resume_similarity: float
|
||||
|
||||
|
||||
@to_data_structure
|
||||
class PredictSalaryRequest:
|
||||
resume_id: ResumeId
|
||||
key_skills: list[str]
|
||||
vacancies: list[VacancyInput]
|
||||
|
||||
|
||||
@to_data_structure
|
||||
class PredictSalaryResponse:
|
||||
salary_from: Decimal
|
||||
salary_to: Decimal
|
||||
recommended_skills: list[str]
|
||||
|
||||
|
||||
@to_interactor
|
||||
class PredictSalaryInteractor:
|
||||
async def execute(self, request: PredictSalaryRequest) -> PredictSalaryResponse:
|
||||
return PredictSalaryResponse(
|
||||
salary_from=Decimal(50000),
|
||||
salary_to=Decimal(80000),
|
||||
recommended_skills=["python", "django", "postgresql"],
|
||||
)
|
||||
@@ -1,6 +1,6 @@
|
||||
from dishka import BaseScope, Provider, Scope, provide_all
|
||||
|
||||
from template_project.ml.interactors.predict_salary import PredictSalaryInteractor
|
||||
from template_project.application.resume.interactors.predict_salary import PredictSalaryInteractor
|
||||
|
||||
|
||||
class InteractorProvider(Provider):
|
||||
|
||||
@@ -6,7 +6,7 @@ from fastapi import APIRouter
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from template_project.application.resume.entity import ResumeId
|
||||
from template_project.ml.interactors.predict_salary import (
|
||||
from template_project.application.resume.interactors.predict_salary import (
|
||||
PredictSalaryInteractor,
|
||||
PredictSalaryRequest,
|
||||
VacancyInput,
|
||||
@@ -82,14 +82,17 @@ class PredictSalaryResponseModel(BaseModel):
|
||||
|
||||
|
||||
@router.post(
|
||||
"/predict_salary",
|
||||
summary="Predict salary",
|
||||
"/predict",
|
||||
summary="Predict salary and recommend skills",
|
||||
description="Predict salary range and recommend skills based on resume and relevant vacancies",
|
||||
responses={
|
||||
200: {"description": "Salary prediction generated successfully", "model": PredictSalaryResponseModel},
|
||||
200: {
|
||||
"description": "Salary prediction and skills recommendation generated successfully",
|
||||
"model": PredictSalaryResponseModel,
|
||||
},
|
||||
},
|
||||
)
|
||||
async def predict_salary(
|
||||
async def predict(
|
||||
request: PredictSalaryRequestModel,
|
||||
interactor: FromDishka[PredictSalaryInteractor],
|
||||
) -> PredictSalaryResponseModel:
|
||||
|
||||
Reference in New Issue
Block a user