diff --git a/config.example.toml b/config.example.toml index 47fef47..09209ee 100644 --- a/config.example.toml +++ b/config.example.toml @@ -22,3 +22,6 @@ bucket_name = "" endpoint_url = "" access_key = "" secret_key = "" + +[ml_api] +url = "" diff --git a/infrastructure/configs/backend/config.toml b/infrastructure/configs/backend/config.toml index 1b9e089..e80ad1c 100644 --- a/infrastructure/configs/backend/config.toml +++ b/infrastructure/configs/backend/config.toml @@ -22,3 +22,6 @@ bucket_name = "" endpoint_url = "" access_key = "" secret_key = "" + +[ml_api] +url = "http://localhost:90" diff --git a/pyproject.toml b/pyproject.toml index bb8ccf7..df6568d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -177,8 +177,8 @@ ignore = [ "N813", "S106", "ERA", - "RUF", "PT022", + "RUF" ] external = ["WPS"] diff --git a/src/template_project/adapters/data_gateways/vacancy.py b/src/template_project/adapters/data_gateways/vacancy.py index 9e52c53..031215d 100644 --- a/src/template_project/adapters/data_gateways/vacancy.py +++ b/src/template_project/adapters/data_gateways/vacancy.py @@ -1,11 +1,12 @@ from collections.abc import Sequence from typing import override -from sqlalchemy import select +from sqlalchemy import label, select from sqlalchemy.ext.asyncio import AsyncSession from template_project.adapters.data_gateways.tables import vacancy_embedding_table, vacancy_table from template_project.application.vacancy.data_gateway import VacancyDataGateway +from template_project.application.vacancy.data_structure import SuitableVacancy from template_project.application.vacancy.entity import Vacancy, VacancyEmbedding @@ -14,11 +15,17 @@ class DefaultVacancyDataGateway(VacancyDataGateway): self._session = session @override - async def get_suitable(self, vector: list[float]) -> Sequence[Vacancy]: + async def get_suitable(self, vector: list[float]) -> Sequence[SuitableVacancy]: statement = ( - select(Vacancy) + select(Vacancy, label("resume_similarity", vacancy_embedding_table.c.vector.cosine_distance(vector))) .join(VacancyEmbedding, vacancy_embedding_table.c.id == vacancy_table.c.id) .where(vacancy_embedding_table.c.vector.cosine_distance(vector) > 0.5) ) result = await self._session.execute(statement) - return result.scalars().all() + return [ + SuitableVacancy( + vacancy=res[0], + resume_similarity=res[1], + ) + for res in result.scalars() + ] diff --git a/src/template_project/adapters/vector_generators/resume.py b/src/template_project/adapters/generators/resume_embedding_vector.py similarity index 94% rename from src/template_project/adapters/vector_generators/resume.py rename to src/template_project/adapters/generators/resume_embedding_vector.py index a2590c9..1370502 100644 --- a/src/template_project/adapters/vector_generators/resume.py +++ b/src/template_project/adapters/generators/resume_embedding_vector.py @@ -1,6 +1,6 @@ from typing import Final, override -from template_project.adapters.ml_client import MlApiGateway +from template_project.adapters.ml_api_gateway import MlApiGateway from template_project.application.common.enums import ExperienceType from template_project.application.resume.vector_generator import ResumeEmbeddingVectorGenerator diff --git a/src/template_project/adapters/generators/resume_prediction.py b/src/template_project/adapters/generators/resume_prediction.py new file mode 100644 index 0000000..a3ee6ab --- /dev/null +++ b/src/template_project/adapters/generators/resume_prediction.py @@ -0,0 +1,39 @@ +from collections.abc import Sequence +from typing import override + +from template_project.adapters.ml_api_gateway import MlApiGateway, SuitableVacancyDs +from template_project.application.resume.entity import Resume, ResumePrediction +from template_project.application.resume.resume_prediction_generator import ResumePredictionGenerator +from template_project.application.vacancy.data_structure import SuitableVacancy + + +class DefaultResumePredictionGenerator(ResumePredictionGenerator): + def __init__(self, ml_api_gateway: MlApiGateway) -> None: + self._ml_api_gateway = ml_api_gateway + + @override + async def generate( + self, + resume: Resume, + suituble_vacancies: Sequence[SuitableVacancy], + ) -> ResumePrediction: + response = await self._ml_api_gateway.generate_resume_prediction( + resume_id=resume.id, + key_skills=resume.key_skills, + suituble_vacancies=[ + SuitableVacancyDs( + vacancy_id=suituble_vacancy.vacancy.id, + from_salary=suituble_vacancy.vacancy.from_salary, + to_salary=suituble_vacancy.vacancy.to_salary, + key_skills=suituble_vacancy.vacancy.key_skills, + resume_similarity=suituble_vacancy.resume_similarity, + ) + for suituble_vacancy in suituble_vacancies + ], + ) + return ResumePrediction.factory( + resume_id=resume.id, + from_salary=response.salary_from, + to_salary=response.salary_to, + recommended_skills=response.recommended_skills, + ) diff --git a/src/template_project/adapters/ml_api_gateway.py b/src/template_project/adapters/ml_api_gateway.py new file mode 100644 index 0000000..e6bcfe7 --- /dev/null +++ b/src/template_project/adapters/ml_api_gateway.py @@ -0,0 +1,63 @@ +from collections.abc import Sequence +from decimal import Decimal +from typing import cast + +from httpx import AsyncClient + +from template_project.application.common.data_structure import to_data_structure +from template_project.application.resume.entity import ResumeId + + +@to_data_structure +class SuitableVacancyDs: + vacancy_id: str + from_salary: Decimal + to_salary: Decimal + key_skills: list[str] + resume_similarity: float + + +@to_data_structure +class GenerateResumePredictionResponse: + salary_from: Decimal + salary_to: Decimal + recommended_skills: list[str] + + +class MlApiGateway: + def __init__(self, client: AsyncClient) -> None: + self._client = client + + async def generate_embedding(self, text: str) -> list[float]: + response = await self._client.post("/get_embedding", json={"text": text}) + return cast(list[float], response.json()["embedding"]) + + async def generate_resume_prediction( + self, + resume_id: ResumeId, + key_skills: list[str], + suituble_vacancies: Sequence[SuitableVacancyDs], + ) -> GenerateResumePredictionResponse: + response = await self._client.post( + "/predict_salary", + json={ + "resume_id": resume_id, + "key_skills": key_skills, + "vacancies": [ + { + "vacancy_id": suituble_vacancy.vacancy_id, + "from_salary": suituble_vacancy.from_salary, + "to_salary": suituble_vacancy.to_salary, + "key_skills": suituble_vacancy.key_skills, + "resume_similarity": suituble_vacancy.resume_similarity, + } for suituble_vacancy in suituble_vacancies + ], + }, + ) + + response_json = response.json() + return GenerateResumePredictionResponse( + salary_from=response_json["salary_from"], + salary_to=response_json["salary_to"], + recommended_skills=response_json["recommended_skills"], + ) diff --git a/src/template_project/adapters/ml_client.py b/src/template_project/adapters/ml_client.py deleted file mode 100644 index 4bfd12f..0000000 --- a/src/template_project/adapters/ml_client.py +++ /dev/null @@ -1,12 +0,0 @@ -from typing import cast - -from httpx import AsyncClient - - -class MlApiGateway: - def __init__(self, client: AsyncClient) -> None: - self._client = client - - async def generate_embedding(self, text: str) -> list[float]: - response = await self._client.post("/get_embedding", json={"text": text}) - return cast(list[float], response.json()["embedding"]) diff --git a/src/template_project/application/common/entity.py b/src/template_project/application/common/entity.py index a624adf..19df759 100644 --- a/src/template_project/application/common/entity.py +++ b/src/template_project/application/common/entity.py @@ -1,7 +1,7 @@ from collections.abc import Hashable -from dataclasses import dataclass, replace +from dataclasses import dataclass from datetime import datetime -from typing import Self, cast, dataclass_transform, override +from typing import cast, dataclass_transform, override from uuid import UUID from template_project.application.common.errors import EntityAlreadyDeletedError @@ -22,9 +22,6 @@ class Entity[EntityId: UUID](Hashable): if self.deleted_at is not None: raise EntityAlreadyDeletedError(entity_name=self.__class__.__name__) - def __copy__(self) -> Self: - return replace(self) - @override def __eq__(self, other: object) -> bool: if isinstance(other, Entity): diff --git a/src/template_project/application/resume/entity.py b/src/template_project/application/resume/entity.py index 247aa7e..cbaffca 100644 --- a/src/template_project/application/resume/entity.py +++ b/src/template_project/application/resume/entity.py @@ -80,6 +80,23 @@ class ResumePrediction(Entity[ResumePredictionId]): to_salary: Decimal recommended_skills: list[str] + @classmethod + def factory( + cls, + resume_id: ResumeId, + from_salary: Decimal, + to_salary: Decimal, + recommended_skills: list[str], + ) -> Self: + return cls( + id=ResumePredictionId(uuid7()), + created_at=datetime.now(tz=UTC), + resume_id=resume_id, + from_salary=from_salary, + to_salary=to_salary, + recommended_skills=recommended_skills, + ) + @to_entity class ResumeExperience(Entity[ResumeExperienceId]): diff --git a/src/template_project/application/resume/resume_embedding_pipeline.py b/src/template_project/application/resume/interactors/resume_embedding.py similarity index 67% rename from src/template_project/application/resume/resume_embedding_pipeline.py rename to src/template_project/application/resume/interactors/resume_embedding.py index 408e9e2..dd9d579 100644 --- a/src/template_project/application/resume/resume_embedding_pipeline.py +++ b/src/template_project/application/resume/interactors/resume_embedding.py @@ -3,20 +3,21 @@ from collections.abc import Callable from Levenshtein import ratio from template_project.application.common.unit_of_work import UnitOfWork -from template_project.application.resume.entity import Resume, ResumeEmbedding, ResumePrediction +from template_project.application.resume.entity import Resume, ResumeEmbedding +from template_project.application.resume.resume_prediction_generator import ResumePredictionGenerator from template_project.application.resume.vector_generator import ResumeEmbeddingVectorGenerator from template_project.application.vacancy.data_gateway import VacancyDataGateway -from template_project.application.vacancy.entity import Vacancy +from template_project.application.vacancy.data_structure import SuitableVacancy def suitable_vacancies_key( resume: Resume, -) -> Callable[[Vacancy], bool]: - def wrapper(vacancy: Vacancy) -> bool: +) -> Callable[[SuitableVacancy], tuple[bool, bool]]: + def wrapper(suitable_vacancy: SuitableVacancy) -> tuple[bool, bool]: count_skills = 0 ratio_skill_sum = 0.0 for resum_key_skill in resume.key_skills: - for suitable_resume_key_skill in vacancy.key_skills: + for suitable_resume_key_skill in suitable_vacancy.vacancy.key_skills: ratio_skill = ratio(resum_key_skill, suitable_resume_key_skill) if ratio_skill != 0: count_skills += 1 @@ -27,26 +28,28 @@ def suitable_vacancies_key( except ZeroDivisionError: matching_skills = 0 - return resume.experience_type == vacancy.experience_type and matching_skills >= 50 + return resume.experience_type == suitable_vacancy.vacancy.experience_type, matching_skills >= 50 return wrapper -class ResumeEmbeddingPipeline: +class ResumeEmbeddingInteractor: def __init__( self, unit_of_work: UnitOfWork, vacancy_data_gateway: VacancyDataGateway, vector_generator: ResumeEmbeddingVectorGenerator, + resume_prediction_generator: ResumePredictionGenerator, ) -> None: self.unit_of_work = unit_of_work - self.vacancy_data_gateway = vacancy_data_gateway self.vector_generator = vector_generator + self.vacancy_data_gateway = vacancy_data_gateway + self.resume_prediction_generator = resume_prediction_generator async def run( self, resume: Resume, - ) -> ResumePrediction: + ) -> None: vector = await self.vector_generator.generate( position=resume.position, about_me=resume.about_me, @@ -62,13 +65,12 @@ class ResumeEmbeddingPipeline: suitable_vacancies_filtered = sorted( suitable_vacancies, key=suitable_vacancies_key(resume), + )[:50] + + resume_prediction = await self.resume_prediction_generator.generate( + resume=resume, + suituble_vacancies=suitable_vacancies_filtered, ) - suitable_vacancies = suitable_vacancies_filtered[:50] - - # TODO: тут надо сделать отправку в ИИ - - await self.unit_of_work.add(resume_embedding) + await self.unit_of_work.add(resume_embedding, resume_prediction) await self.unit_of_work.commit() - - raise NotImplementedError diff --git a/src/template_project/application/resume/resume_prediction.py b/src/template_project/application/resume/resume_prediction.py deleted file mode 100644 index 16f79c2..0000000 --- a/src/template_project/application/resume/resume_prediction.py +++ /dev/null @@ -1 +0,0 @@ -# class ResumePredicition diff --git a/src/template_project/application/resume/resume_prediction_generator.py b/src/template_project/application/resume/resume_prediction_generator.py new file mode 100644 index 0000000..3ce63a8 --- /dev/null +++ b/src/template_project/application/resume/resume_prediction_generator.py @@ -0,0 +1,16 @@ +from abc import abstractmethod +from collections.abc import Sequence +from typing import Protocol + +from template_project.application.resume.entity import Resume, ResumePrediction +from template_project.application.vacancy.data_structure import SuitableVacancy + + +class ResumePredictionGenerator(Protocol): + @abstractmethod + async def generate( + self, + resume: Resume, + suituble_vacancies: Sequence[SuitableVacancy], + ) -> ResumePrediction: + raise NotImplementedError diff --git a/src/template_project/application/vacancy/data_gateway.py b/src/template_project/application/vacancy/data_gateway.py index b0f83f3..2f357a5 100644 --- a/src/template_project/application/vacancy/data_gateway.py +++ b/src/template_project/application/vacancy/data_gateway.py @@ -2,10 +2,10 @@ from abc import abstractmethod from collections.abc import Sequence from typing import Protocol -from template_project.application.vacancy.entity import Vacancy +from template_project.application.vacancy.data_structure import SuitableVacancy class VacancyDataGateway(Protocol): @abstractmethod - async def get_suitable(self, vector: list[float]) -> Sequence[Vacancy]: + async def get_suitable(self, vector: list[float]) -> Sequence[SuitableVacancy]: raise NotImplementedError diff --git a/src/template_project/application/vacancy/data_structure.py b/src/template_project/application/vacancy/data_structure.py new file mode 100644 index 0000000..aeed661 --- /dev/null +++ b/src/template_project/application/vacancy/data_structure.py @@ -0,0 +1,8 @@ +from template_project.application.common.data_structure import to_data_structure +from template_project.application.vacancy.entity import Vacancy + + +@to_data_structure +class SuitableVacancy: + vacancy: Vacancy + resume_similarity: float diff --git a/src/template_project/web_api/configuration.py b/src/template_project/web_api/configuration.py index 0d6ba1a..452aecb 100644 --- a/src/template_project/web_api/configuration.py +++ b/src/template_project/web_api/configuration.py @@ -28,6 +28,11 @@ class S3Config: secret_key: str +@to_configuration +class MlApiConfiguration: + url: str + + @to_configuration class AccessTokenConfiguration: crypto_key: str @@ -71,6 +76,7 @@ class Configuration: access_token: AccessTokenConfiguration yandex_oauth: YandexOAuthConfiguration firebase: FirebaseConfiguration + ml_api: MlApiConfiguration retort = Retort( diff --git a/src/template_project/web_api/ioc/connection.py b/src/template_project/web_api/ioc/connection.py index e1e4841..769c05e 100644 --- a/src/template_project/web_api/ioc/connection.py +++ b/src/template_project/web_api/ioc/connection.py @@ -2,10 +2,12 @@ from collections.abc import AsyncIterable from aioboto3.session import Session from dishka import Provider, Scope, provide +from httpx import AsyncClient from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession, create_async_engine +from template_project.adapters.ml_api_gateway import MlApiGateway from template_project.adapters.s3_storage import AioBoto3ClientLike -from template_project.web_api.configuration import DatabaseConfiguration, S3Config +from template_project.web_api.configuration import DatabaseConfiguration, MlApiConfiguration, S3Config class ConnectionProvider(Provider): @@ -35,3 +37,8 @@ class ConnectionProvider(Provider): aws_secret_access_key=config.secret_key, ) as s3_client: yield s3_client + + @provide(scope=Scope.APP) + async def ml_api_gateway(self, config: MlApiConfiguration) -> AsyncIterable[MlApiGateway]: + async with AsyncClient(base_url=config.url) as client: + yield MlApiGateway(client) diff --git a/src/template_project/web_api/ioc/make.py b/src/template_project/web_api/ioc/make.py index b3409b3..6078f3a 100644 --- a/src/template_project/web_api/ioc/make.py +++ b/src/template_project/web_api/ioc/make.py @@ -6,6 +6,7 @@ from template_project.web_api.configuration import ( Configuration, DatabaseConfiguration, FirebaseConfiguration, + MlApiConfiguration, S3Config, ServerConfiguration, YandexOAuthConfiguration, @@ -20,6 +21,7 @@ from template_project.web_api.ioc.notifications import ( NotificationServiceProvider, ) from template_project.web_api.ioc.oauth import OAuthClientProvider +from template_project.web_api.ioc.other import OtherProvider from template_project.web_api.ioc.storage import StorageProvider @@ -35,6 +37,7 @@ def make_ioc(configuration: Configuration) -> AsyncContainer: OAuthClientProvider(), NotificationServiceProvider(), StorageProvider(), + OtherProvider(), validation_settings=STRICT_VALIDATION, context={ ServerConfiguration: configuration.server, @@ -44,5 +47,6 @@ def make_ioc(configuration: Configuration) -> AsyncContainer: FirebaseConfiguration: configuration.firebase, Configuration: configuration, S3Config: configuration.s3, + MlApiConfiguration: configuration.ml_api, }, ) diff --git a/src/template_project/web_api/ioc/other.py b/src/template_project/web_api/ioc/other.py new file mode 100644 index 0000000..6eae97c --- /dev/null +++ b/src/template_project/web_api/ioc/other.py @@ -0,0 +1,13 @@ +from dishka import BaseScope, Provider, Scope, WithParents, provide_all + +from template_project.adapters.generators.resume_embedding_vector import DefaultResumeEmbeddingVectorGenerator +from template_project.adapters.generators.resume_prediction import DefaultResumePredictionGenerator + + +class OtherProvider(Provider): + scope: BaseScope | None = Scope.REQUEST + + other_providers = provide_all( + WithParents[DefaultResumePredictionGenerator], + WithParents[DefaultResumeEmbeddingVectorGenerator], + ) diff --git a/tests/web_api/ioc.py b/tests/web_api/ioc.py index 1885969..047f75f 100644 --- a/tests/web_api/ioc.py +++ b/tests/web_api/ioc.py @@ -23,6 +23,7 @@ from template_project.web_api.configuration import ( Configuration, DatabaseConfiguration, FirebaseConfiguration, + MlApiConfiguration, S3Config, ServerConfiguration, YandexOAuthConfiguration, @@ -35,6 +36,7 @@ from template_project.web_api.ioc.idp import IdPProvider from template_project.web_api.ioc.interactor import InteractorProvider from template_project.web_api.ioc.notifications import NotificationServiceProvider from template_project.web_api.ioc.oauth import OAuthClientProvider +from template_project.web_api.ioc.other import OtherProvider from template_project.web_api.ioc.storage import StorageProvider from tests.web_api.test_api_gateway import TestApiGateway @@ -91,6 +93,7 @@ def make_ioc(configuration: Configuration, app: FastAPI) -> AsyncContainer: NotificationServiceProvider(), StorageProvider(), TestProvider(), + OtherProvider(), validation_settings=STRICT_VALIDATION, context={ ServerConfiguration: configuration.server, @@ -100,6 +103,7 @@ def make_ioc(configuration: Configuration, app: FastAPI) -> AsyncContainer: FirebaseConfiguration: configuration.firebase, Configuration: configuration, S3Config: configuration.s3, + MlApiConfiguration: configuration.ml_api, FastAPI: app, }, )