this will left production in ruins

This commit is contained in:
ITQ
2025-03-02 17:04:44 +03:00
parent c139f88a20
commit 9612b67721
15 changed files with 692 additions and 8 deletions
+18
View File
@@ -54,6 +54,24 @@ build_backend-staticfiles:
DOCKERFILE_PATH: "Dockerfile.staticfiles" DOCKERFILE_PATH: "Dockerfile.staticfiles"
IMAGE_NAME: "$CI_REGISTRY_IMAGE/backend-staticfiles" IMAGE_NAME: "$CI_REGISTRY_IMAGE/backend-staticfiles"
build_checker:
<<: *build-template
rules:
- if: '$CI_COMMIT_REF_NAME == "master"'
variables:
CONTEXT: "${CI_PROJECT_DIR}/services/checker"
DOCKERFILE_PATH: "Dockerfile"
IMAGE_NAME: "$CI_REGISTRY_IMAGE/checker"
build_custom-python:
<<: *build-template
rules:
- if: '$CI_COMMIT_REF_NAME == "master"'
variables:
CONTEXT: "${CI_PROJECT_DIR}/services/checker"
DOCKERFILE_PATH: "Dockerfile.checker"
IMAGE_NAME: "$CI_REGISTRY_IMAGE/custom-python"
build_docs: build_docs:
<<: *build-template <<: *build-template
rules: rules:
+11
View File
@@ -370,6 +370,17 @@ services:
restart: unless-stopped restart: unless-stopped
shm_size: 4mb shm_size: 4mb
checker:
image: gitlab.prodcontest.ru:5050/team-15/project/checker:latest
build:
context: ./services/checker
dockerfile: Dockerfile
restart: unless-stopped
volumes:
- type: bind
source: /var/run/docker.sock
target: /prometheus
proxy: proxy:
image: docker.io/nginx:1.27-alpine3.21 image: docker.io/nginx:1.27-alpine3.21
configs: configs:
+2
View File
@@ -24,4 +24,6 @@ FROM docker.io/nginx:latest
COPY --from=builder /app/static /usr/share/nginx/html COPY --from=builder /app/static /usr/share/nginx/html
COPY ../checker/checher_requirements.txt .
CMD ["nginx", "-g", "daemon off;"] CMD ["nginx", "-g", "daemon off;"]
+4 -8
View File
@@ -7,7 +7,9 @@ from pathlib import Path
import django_stubs_ext import django_stubs_ext
import environ import environ
from health_check.plugins import plugin_dir
from django.utils.translation import gettext_lazy as _ from django.utils.translation import gettext_lazy as _
from integrations.checker.healthcheck import CheckerHealthCheck
BASE_DIR = Path(__file__).resolve().parent.parent BASE_DIR = Path(__file__).resolve().parent.parent
@@ -30,18 +32,12 @@ ALLOWED_HOSTS = env(
# Integrations # Integrations
YANDEX_CLOUD_FOLDER_ID = env("YANDEX_CLOUD_FOLDER_ID", default=None) CHECKER_API_ENDPOINT = env("CHECKER_API_ENDPOINT", default=None)
YANDEX_CLOUD_API_KEY = env("YANDEX_CLOUD_API_KEY", default=None)
YANDEX_CLOUD_INTEGRATION_ENABLED = (
YANDEX_CLOUD_FOLDER_ID and YANDEX_CLOUD_API_KEY
)
# Register healthchecks # Register healthchecks
# plugin_dir.register(SomeHealthCheckClass) plugin_dir.register(CheckerHealthCheck)
# Caching # Caching
@@ -0,0 +1,22 @@
from http import HTTPStatus as status
import httpx
from django.conf import settings
from health_check.backends import BaseHealthCheckBackend
class CheckerHealthCheck(BaseHealthCheckBackend):
critical_service = False
def check_status(self) -> None:
try:
response = httpx.get(
f"{settings.ANTIFRAUD_ADDRESS}/ping", timeout=1
)
if response.status_code >= status.INTERNAL_SERVER_ERROR:
self.add_error("Checker service is unaccessible")
except httpx.HTTPError:
self.add_error("Checker service is unaccessible")
def identifier(self) -> str:
return self.__class__.__name__
+170
View File
@@ -0,0 +1,170 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
Pipfile.lock
# UV
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
uv.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/
# PyPI configuration file
.pypirc
# Ruff files
.ruff_cache
+42
View File
@@ -0,0 +1,42 @@
# Stage 1: Install dependencies
FROM docker.io/python:3.11-alpine3.20 AS builder
COPY --from=ghcr.io/astral-sh/uv:0.4.30 /uv /uvx /bin/
WORKDIR /app
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
PYTHONOPTIMIZE=2 \
UV_COMPILE_BYTECODE=1 \
UV_PROJECT_ENVIRONMENT=/opt/venv
COPY pyproject.toml .
RUN uv sync --no-dev --no-install-project --no-cache
# Stage 2: Start the application
FROM docker.io/python:3.11-alpine3.20
WORKDIR /app
COPY --from=builder /opt/venv /opt/venv
COPY . .
RUN adduser -D -g '' app && chown -R app:app ./
USER app
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
PYTHONOPTIMIZE=2 \
PATH="/opt/venv/bin:$PATH"
EXPOSE 8080
HEALTHCHECK --interval=30s --timeout=5s --start-period=5s --start-interval=2s --retries=3 \
CMD wget --no-verbose --tries=1 --spider http://127.0.0.1:8000/ping || exit 1
CMD uvicorn main:app --host 0.0.0.0 --port 8000
+19
View File
@@ -0,0 +1,19 @@
FROM docker.io/python:3.11-slim
ENV PYTHONUNBUFFERED=1 \
PIP_NO_CACHE_DIR=1
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
&& rm -rf /var/lib/apt/lists/*
COPY checker_requirements.txt .
RUN pip install --no-cache-dir -r checker_requirements.txt
RUN useradd -m appuser && chown -R appuser /app
USER appuser
WORKDIR /app
CMD ["python"]
+87
View File
@@ -0,0 +1,87 @@
# DataRush Checker
## Prerequisites
Ensure you have the following installed on your system:
- [Python](https://www.python.org/) (>=3.10,<3.12)
- [uv](https://docs.astral.sh/uv/)
- [Docker](https://www.docker.com/) (for containerized setup)
## Basic setup
### Installation
#### Clone the project
```bash
git clone git@gitlab.prodcontest.ru:team-15/project.git
```
#### Go to the project directory
```bash
cd project/services/checker
```
#### Install dependencies
##### For dev environment
```bash
uv sync --all-extras
```
##### For prod environment
```bash
uv sync --no-dev
```
#### Running
##### Apply migrations
```bash
uv run python manage.py migrate
```
##### Start celery worker
```bash
celery -A config worker -l INFO
```
##### Start server
In dev mode:
```bash
uv run python manage.py runserver
```
In prod mode:
```bash
uv run gunicorn config.wsgi
```
## Containerized setup
### Clone the project
```bash
git clone git@gitlab.prodcontest.ru:team-15/project.git
```
### Go to the project directory
```bash
cd project/services/checker
```
### Build docker image
```bash
docker build -t datarush-checker .
```
@@ -0,0 +1,7 @@
pandas==2.2.3
numpy==2.2.3
matplotlib==3.10.1
scipy==1.15.2
scikit-learn==1.6.1
seaborn==0.13.2
statsmodels==0.14.4
+289
View File
@@ -0,0 +1,289 @@
from fastapi import FastAPI, HTTPException, status
from pydantic import BaseModel, Field, HttpUrl, constr
import aiohttp
import asyncio
import docker
import hashlib
import os
import base64
import tempfile
import logging
from urllib.parse import urlparse
import re
app = FastAPI()
docker_client = docker.from_env()
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
DOCKER_IMAGE = "gitlab.python:3-slim"
CONTAINER_TIMEOUT = 60
MAX_FILE_SIZE = 4 * 1024 * 1024
ALLOWED_FILENAME_CHARS = r"[^a-zA-Z0-9_\-.]"
class FileDetails(BaseModel):
url: HttpUrl = Field(
..., description="URL to download the file from (supports HTTP/HTTPS)"
)
bind_path: str = Field(
...,
description="Container path to bind the file (absolute)",
)
class ExecutionRequest(BaseModel):
code: str = Field(..., description="Base64 encoded Python code to execute")
answer_file_path: str = Field(
"stdout", description="Base64 encoded path to result file or 'stdout'"
)
expected_hash: str | None = Field(
None, description="Optional SHA-256 hash of expected output"
)
files: list[FileDetails] = Field(
[], description="List of files to mount in container"
)
class ExecutionResponse(BaseModel):
success: bool = Field(..., description="Execution success status")
hash_match: bool | None = Field(
None, description="Output hash matches expected (if provided)"
)
output: str = Field(..., description="Captured stdout or file contents")
result_hash: str = Field(..., description="SHA-256 hash of output")
error: str = Field(..., description="Execution errors or stderr")
class HealthCheckResponse(BaseModel):
status: str = Field(..., description="Service health status")
docker: str = Field(..., description="Docker daemon status")
def decode_base64(encoded_str: str, field_name: str) -> str:
try:
return base64.b64decode(encoded_str).decode("utf-8")
except Exception as e:
logger.error(f"Base64 decode failed for {field_name}: {str(e)}")
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Invalid Base64 in {field_name}",
)
def sanitize_filename(url: str) -> str:
parsed = urlparse(url)
base_name = os.path.basename(parsed.path)
if not base_name:
base_name = "file"
clean = re.sub(ALLOWED_FILENAME_CHARS, "", base_name)[:255]
return clean or "file"
async def download_file(
session: aiohttp.ClientSession, url: str, dest_path: str
) -> None:
try:
async with session.get(
url, timeout=aiohttp.ClientTimeout(total=30)
) as resp:
if resp.status != 200:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Failed to download {url} - Status {resp.status}",
)
content = b""
async for chunk in resp.content.iter_chunked(8192):
content += chunk
if len(content) > MAX_FILE_SIZE:
raise HTTPException(
status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
detail="File size exceeds 4MB limit",
)
with open(dest_path, "wb") as f:
f.write(content)
logger.info(f"Downloaded {url} to {dest_path}")
except aiohttp.ClientError as e:
logger.error(f"Download error for {url}: {str(e)}")
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Download failed: {str(e)}",
)
def run_container_safely(
tmp_dir: str,
command: list[str],
bound_files: dict[str, str],
timeout: int = CONTAINER_TIMEOUT,
) -> dict:
container = None
try:
volumes = {tmp_dir: {"bind": "/execution", "mode": "rw"}}
for host_path, container_path in bound_files.items():
volumes[host_path] = {"bind": container_path, "mode": "ro"}
container = docker_client.containers.run(
image=DOCKER_IMAGE,
command=command,
volumes=volumes,
working_dir="/execution",
stdout=True,
stderr=True,
detach=True,
mem_limit="100m",
network_mode="none",
cpu_period=100000,
cpu_quota=50000,
user="root",
security_opt=["no-new-privileges"],
)
exit_code = container.wait(timeout=timeout)["StatusCode"]
stdout = container.logs(stdout=True, stderr=False).decode().strip()
stderr = container.logs(stdout=False, stderr=True).decode().strip()
return {"stdout": stdout, "stderr": stderr, "status": exit_code}
except docker.errors.DockerException as e:
logger.error(f"Docker error: {str(e)}")
return {
"stdout": "",
"stderr": f"Container error: {str(e)}",
"status": -1,
}
finally:
if container:
try:
container.remove(force=True)
except docker.errors.DockerException:
pass
@app.post("/execute", response_model=ExecutionResponse)
async def execute_code(request: ExecutionRequest) -> ExecutionResponse:
try:
code = decode_base64(request.code, "code")
answer_path = (
decode_base64(request.answer_file_path, "answer_file_path")
if request.answer_file_path != "stdout"
else "stdout"
)
except HTTPException as e:
return ExecutionResponse(
success=False,
output="",
result_hash="",
error=e.detail,
hash_match=None,
)
if answer_path != "stdout":
if os.path.isabs(answer_path) or not validate_file_path(answer_path):
return ExecutionResponse(
success=False,
output="",
result_hash="",
error="Invalid answer file path",
hash_match=None,
)
with tempfile.TemporaryDirectory() as tmp_dir:
bound_files = {}
if request.files:
async with aiohttp.ClientSession() as session:
download_tasks = []
for file in request.files:
filename = sanitize_filename(str(file.url))
dest_path = os.path.join(tmp_dir, filename)
bound_files[dest_path] = file.bind_path
download_tasks.append(
download_file(session, str(file.url), dest_path)
)
try:
await asyncio.gather(*download_tasks)
except HTTPException as e:
return ExecutionResponse(
success=False,
output="",
result_hash="",
error=e.detail,
hash_match=None,
)
code_path = os.path.join(tmp_dir, "submission.py")
with open(code_path, "w") as f:
f.write(code)
os.chmod(code_path, 0o444)
if answer_path == "stdout":
cmd = ["python", "submission.py"]
else:
cmd = [
"sh",
"-c",
f"python submission.py && cat {answer_path} || echo 'EXECUTION_FAILED'",
]
try:
result = await asyncio.to_thread(
run_container_safely,
tmp_dir,
cmd,
bound_files,
CONTAINER_TIMEOUT,
)
except Exception as e:
logger.error(f"Container execution failed: {str(e)}")
return ExecutionResponse(
success=False,
output="",
result_hash="",
error=f"Execution failed: {str(e)}",
hash_match=None,
)
output = result["stdout"]
error = result["stderr"]
success = result["status"] == 0
if answer_path != "stdout" and not output:
error += "\nNo output captured - check answer file path"
result_hash = hashlib.sha256(output.encode()).hexdigest()
return ExecutionResponse(
success=success,
hash_match=(
result_hash == request.expected_hash
if request.expected_hash
else None
),
output=output[:5000],
result_hash=result_hash,
error=error[:5000],
)
@app.get("/health", response_model=HealthCheckResponse)
async def health_check() -> HealthCheckResponse:
try:
docker_client.ping()
return HealthCheckResponse(status="healthy", docker="connected")
except docker.errors.DockerException:
return HealthCheckResponse(status="degraded", docker="unavailable")
def validate_file_path(path: str) -> bool:
return (
not os.path.isabs(path)
and os.path.basename(path) == path
and all(c.isalnum() or c in {"_", "-", "."} for c in path)
)
+13
View File
@@ -0,0 +1,13 @@
[project]
name = "checker"
version = "0.1.0"
readme = "README.md"
requires-python = ">=3.11"
dependencies = [
"aiohttp>=3.11.13",
"docker>=7.1.0",
"fastapi>=0.115.11",
"python-multipart>=0.0.20",
"regex>=2024.11.6",
"uvicorn>=0.34.0",
]
+8
View File
@@ -0,0 +1,8 @@
#!/bin/sh
GREEN='\033[1;32m'
NC='\033[0m'
uvx ruff format .
uvx ruff check . --fix
printf "${GREEN}Linters/formatters runned${NC}\n"