feat(): prediction pipeline

This commit is contained in:
gitgernit
2025-11-23 04:11:52 +03:00
parent 2e6214a5ec
commit d1c7641698
25 changed files with 224 additions and 244 deletions
@@ -26,7 +26,6 @@ from template_project.application.resume.entity import (
ResumePrediction,
ResumeProject,
)
from template_project.application.resume.errors import ResumeNotFoundError
from template_project.application.user.entity import UserId
@@ -35,12 +34,8 @@ class DefaultResumeDataGateway(ResumeDataGateway):
self._session = session
@override
async def load(self, resume_id: ResumeId) -> Resume:
resume = await self._session.get(Resume, resume_id)
if resume is None:
raise ResumeNotFoundError(resume_id=resume_id)
return resume
async def load_by_resume_id(self, resume_id: ResumeId) -> Resume | None:
return await self._session.get(Resume, resume_id)
@override
async def list_by_user_id(self, user_id: UserId, limit: int, offset: int) -> Sequence[Resume]:
@@ -64,11 +59,15 @@ class DefaultResumeDataGateway(ResumeDataGateway):
async def get_history(self, resume_id: ResumeId) -> Sequence[Resume]:
# TODO: N+1
history: list[Resume] = []
current_resume = await self.load(resume_id)
current_resume = await self.load_by_resume_id(resume_id)
if current_resume is None:
return history
history.append(current_resume)
while current_resume.down_resume_id is not None:
current_resume = await self.load(current_resume.down_resume_id)
current_resume = await self.load_by_resume_id(current_resume.down_resume_id)
if current_resume is None:
break
history.append(current_resume)
return history
@@ -70,6 +70,34 @@ class StringArrayType(TypeDecorator[list[str]]):
return []
class ExperienceTypeType(TypeDecorator[ExperienceType]):
impl: Any = String
cache_ok: bool | None = True
@override
def process_bind_param(self, value: Any, dialect: Any) -> Any:
if value is None:
return None
if isinstance(value, ExperienceType):
return value.value
if isinstance(value, str):
return value
return None
@override
def process_result_value(self, value: Any, dialect: Any) -> ExperienceType:
if value is None:
raise ValueError("experience_type cannot be None")
if isinstance(value, ExperienceType):
return value
if isinstance(value, str):
try:
return ExperienceType(value)
except ValueError:
raise ValueError(f"Invalid experience_type value: {value}")
raise ValueError(f"Cannot convert {type(value)} to ExperienceType")
user_table: Final = Table(
"users",
meta_data,
@@ -138,7 +166,7 @@ resume_table: Final = Table(
Column("location", String, nullable=False),
Column("about_me", String, nullable=False),
Column("key_skills", StringArrayType(), nullable=False, server_default=text("'[]'::jsonb")),
Column("experience_type", String, nullable=False),
Column("experience_type", ExperienceTypeType(), nullable=False),
Column("down_resume_id", UUID, ForeignKey("resume.id", ondelete="CASCADE"), nullable=True, default=None),
Column("up_resume_id", UUID, ForeignKey("resume.id", ondelete="CASCADE"), nullable=True, default=None),
)
@@ -241,6 +269,7 @@ mapper_registry.map_imperatively(
resume_table,
properties={
"key_skills": resume_table.c.key_skills,
"experience_type": resume_table.c.experience_type,
},
)
mapper_registry.map_imperatively(ResumeEmbedding, resume_embedding_table)
@@ -20,6 +20,8 @@ class DefaultVacancyDataGateway(VacancyDataGateway):
select(Vacancy, label("resume_similarity", vacancy_embedding_table.c.vector.cosine_distance(vector)))
.join(VacancyEmbedding, vacancy_embedding_table.c.vacancy_id == vacancy_table.c.id)
.where(vacancy_embedding_table.c.vector.cosine_distance(vector) > 0.5)
.order_by(vacancy_embedding_table.c.vector.cosine_distance(vector).asc())
.limit(100)
)
result = await self._session.execute(statement)
return [