mirror of
https://gitlab.com/megazordpobeda/DataRush.git
synced 2026-05-23 01:37:11 +00:00
this will left production in ruins
This commit is contained in:
@@ -0,0 +1,170 @@
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
.pybuilder/
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
# For a library or package, you might want to ignore these files since the code is
|
||||
# intended to run in multiple environments; otherwise, check them in:
|
||||
.python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
Pipfile.lock
|
||||
|
||||
# UV
|
||||
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
uv.lock
|
||||
|
||||
# poetry
|
||||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||
poetry.lock
|
||||
|
||||
# pdm
|
||||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||
#pdm.lock
|
||||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||
# in version control.
|
||||
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
||||
.pdm.toml
|
||||
.pdm-python
|
||||
.pdm-build/
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# pytype static type analyzer
|
||||
.pytype/
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
|
||||
# PyCharm
|
||||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
.idea/
|
||||
|
||||
# PyPI configuration file
|
||||
.pypirc
|
||||
|
||||
# Ruff files
|
||||
.ruff_cache
|
||||
@@ -0,0 +1,42 @@
|
||||
# Stage 1: Install dependencies
|
||||
FROM docker.io/python:3.11-alpine3.20 AS builder
|
||||
|
||||
COPY --from=ghcr.io/astral-sh/uv:0.4.30 /uv /uvx /bin/
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
PYTHONOPTIMIZE=2 \
|
||||
UV_COMPILE_BYTECODE=1 \
|
||||
UV_PROJECT_ENVIRONMENT=/opt/venv
|
||||
|
||||
COPY pyproject.toml .
|
||||
|
||||
RUN uv sync --no-dev --no-install-project --no-cache
|
||||
|
||||
|
||||
# Stage 2: Start the application
|
||||
FROM docker.io/python:3.11-alpine3.20
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY --from=builder /opt/venv /opt/venv
|
||||
|
||||
COPY . .
|
||||
|
||||
RUN adduser -D -g '' app && chown -R app:app ./
|
||||
|
||||
USER app
|
||||
|
||||
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
PYTHONOPTIMIZE=2 \
|
||||
PATH="/opt/venv/bin:$PATH"
|
||||
|
||||
EXPOSE 8080
|
||||
|
||||
HEALTHCHECK --interval=30s --timeout=5s --start-period=5s --start-interval=2s --retries=3 \
|
||||
CMD wget --no-verbose --tries=1 --spider http://127.0.0.1:8000/ping || exit 1
|
||||
|
||||
CMD uvicorn main:app --host 0.0.0.0 --port 8000
|
||||
@@ -0,0 +1,19 @@
|
||||
FROM docker.io/python:3.11-slim
|
||||
|
||||
ENV PYTHONUNBUFFERED=1 \
|
||||
PIP_NO_CACHE_DIR=1
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY checker_requirements.txt .
|
||||
|
||||
RUN pip install --no-cache-dir -r checker_requirements.txt
|
||||
|
||||
RUN useradd -m appuser && chown -R appuser /app
|
||||
USER appuser
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
CMD ["python"]
|
||||
@@ -0,0 +1,87 @@
|
||||
# DataRush Checker
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Ensure you have the following installed on your system:
|
||||
|
||||
- [Python](https://www.python.org/) (>=3.10,<3.12)
|
||||
- [uv](https://docs.astral.sh/uv/)
|
||||
- [Docker](https://www.docker.com/) (for containerized setup)
|
||||
|
||||
## Basic setup
|
||||
|
||||
### Installation
|
||||
|
||||
#### Clone the project
|
||||
|
||||
```bash
|
||||
git clone git@gitlab.prodcontest.ru:team-15/project.git
|
||||
```
|
||||
|
||||
#### Go to the project directory
|
||||
|
||||
```bash
|
||||
cd project/services/checker
|
||||
```
|
||||
|
||||
#### Install dependencies
|
||||
|
||||
##### For dev environment
|
||||
|
||||
```bash
|
||||
uv sync --all-extras
|
||||
```
|
||||
|
||||
##### For prod environment
|
||||
|
||||
```bash
|
||||
uv sync --no-dev
|
||||
```
|
||||
|
||||
#### Running
|
||||
|
||||
##### Apply migrations
|
||||
|
||||
```bash
|
||||
uv run python manage.py migrate
|
||||
```
|
||||
|
||||
##### Start celery worker
|
||||
|
||||
```bash
|
||||
celery -A config worker -l INFO
|
||||
```
|
||||
|
||||
##### Start server
|
||||
|
||||
In dev mode:
|
||||
|
||||
```bash
|
||||
uv run python manage.py runserver
|
||||
```
|
||||
|
||||
In prod mode:
|
||||
|
||||
```bash
|
||||
uv run gunicorn config.wsgi
|
||||
```
|
||||
|
||||
## Containerized setup
|
||||
|
||||
### Clone the project
|
||||
|
||||
```bash
|
||||
git clone git@gitlab.prodcontest.ru:team-15/project.git
|
||||
```
|
||||
|
||||
### Go to the project directory
|
||||
|
||||
```bash
|
||||
cd project/services/checker
|
||||
```
|
||||
|
||||
### Build docker image
|
||||
|
||||
```bash
|
||||
docker build -t datarush-checker .
|
||||
```
|
||||
@@ -0,0 +1,7 @@
|
||||
pandas==2.2.3
|
||||
numpy==2.2.3
|
||||
matplotlib==3.10.1
|
||||
scipy==1.15.2
|
||||
scikit-learn==1.6.1
|
||||
seaborn==0.13.2
|
||||
statsmodels==0.14.4
|
||||
@@ -0,0 +1,289 @@
|
||||
from fastapi import FastAPI, HTTPException, status
|
||||
from pydantic import BaseModel, Field, HttpUrl, constr
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import docker
|
||||
import hashlib
|
||||
import os
|
||||
import base64
|
||||
import tempfile
|
||||
import logging
|
||||
from urllib.parse import urlparse
|
||||
import re
|
||||
|
||||
app = FastAPI()
|
||||
docker_client = docker.from_env()
|
||||
logger = logging.getLogger(__name__)
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
|
||||
DOCKER_IMAGE = "gitlab.python:3-slim"
|
||||
CONTAINER_TIMEOUT = 60
|
||||
MAX_FILE_SIZE = 4 * 1024 * 1024
|
||||
ALLOWED_FILENAME_CHARS = r"[^a-zA-Z0-9_\-.]"
|
||||
|
||||
|
||||
class FileDetails(BaseModel):
|
||||
url: HttpUrl = Field(
|
||||
..., description="URL to download the file from (supports HTTP/HTTPS)"
|
||||
)
|
||||
bind_path: str = Field(
|
||||
...,
|
||||
description="Container path to bind the file (absolute)",
|
||||
)
|
||||
|
||||
|
||||
class ExecutionRequest(BaseModel):
|
||||
code: str = Field(..., description="Base64 encoded Python code to execute")
|
||||
answer_file_path: str = Field(
|
||||
"stdout", description="Base64 encoded path to result file or 'stdout'"
|
||||
)
|
||||
expected_hash: str | None = Field(
|
||||
None, description="Optional SHA-256 hash of expected output"
|
||||
)
|
||||
files: list[FileDetails] = Field(
|
||||
[], description="List of files to mount in container"
|
||||
)
|
||||
|
||||
|
||||
class ExecutionResponse(BaseModel):
|
||||
success: bool = Field(..., description="Execution success status")
|
||||
hash_match: bool | None = Field(
|
||||
None, description="Output hash matches expected (if provided)"
|
||||
)
|
||||
output: str = Field(..., description="Captured stdout or file contents")
|
||||
result_hash: str = Field(..., description="SHA-256 hash of output")
|
||||
error: str = Field(..., description="Execution errors or stderr")
|
||||
|
||||
|
||||
class HealthCheckResponse(BaseModel):
|
||||
status: str = Field(..., description="Service health status")
|
||||
docker: str = Field(..., description="Docker daemon status")
|
||||
|
||||
|
||||
def decode_base64(encoded_str: str, field_name: str) -> str:
|
||||
try:
|
||||
return base64.b64decode(encoded_str).decode("utf-8")
|
||||
except Exception as e:
|
||||
logger.error(f"Base64 decode failed for {field_name}: {str(e)}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=f"Invalid Base64 in {field_name}",
|
||||
)
|
||||
|
||||
|
||||
def sanitize_filename(url: str) -> str:
|
||||
parsed = urlparse(url)
|
||||
base_name = os.path.basename(parsed.path)
|
||||
|
||||
if not base_name:
|
||||
base_name = "file"
|
||||
|
||||
clean = re.sub(ALLOWED_FILENAME_CHARS, "", base_name)[:255]
|
||||
return clean or "file"
|
||||
|
||||
|
||||
async def download_file(
|
||||
session: aiohttp.ClientSession, url: str, dest_path: str
|
||||
) -> None:
|
||||
try:
|
||||
async with session.get(
|
||||
url, timeout=aiohttp.ClientTimeout(total=30)
|
||||
) as resp:
|
||||
if resp.status != 200:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=f"Failed to download {url} - Status {resp.status}",
|
||||
)
|
||||
|
||||
content = b""
|
||||
async for chunk in resp.content.iter_chunked(8192):
|
||||
content += chunk
|
||||
if len(content) > MAX_FILE_SIZE:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
|
||||
detail="File size exceeds 4MB limit",
|
||||
)
|
||||
|
||||
with open(dest_path, "wb") as f:
|
||||
f.write(content)
|
||||
logger.info(f"Downloaded {url} to {dest_path}")
|
||||
|
||||
except aiohttp.ClientError as e:
|
||||
logger.error(f"Download error for {url}: {str(e)}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=f"Download failed: {str(e)}",
|
||||
)
|
||||
|
||||
|
||||
def run_container_safely(
|
||||
tmp_dir: str,
|
||||
command: list[str],
|
||||
bound_files: dict[str, str],
|
||||
timeout: int = CONTAINER_TIMEOUT,
|
||||
) -> dict:
|
||||
container = None
|
||||
try:
|
||||
volumes = {tmp_dir: {"bind": "/execution", "mode": "rw"}}
|
||||
for host_path, container_path in bound_files.items():
|
||||
volumes[host_path] = {"bind": container_path, "mode": "ro"}
|
||||
|
||||
container = docker_client.containers.run(
|
||||
image=DOCKER_IMAGE,
|
||||
command=command,
|
||||
volumes=volumes,
|
||||
working_dir="/execution",
|
||||
stdout=True,
|
||||
stderr=True,
|
||||
detach=True,
|
||||
mem_limit="100m",
|
||||
network_mode="none",
|
||||
cpu_period=100000,
|
||||
cpu_quota=50000,
|
||||
user="root",
|
||||
security_opt=["no-new-privileges"],
|
||||
)
|
||||
|
||||
exit_code = container.wait(timeout=timeout)["StatusCode"]
|
||||
stdout = container.logs(stdout=True, stderr=False).decode().strip()
|
||||
stderr = container.logs(stdout=False, stderr=True).decode().strip()
|
||||
|
||||
return {"stdout": stdout, "stderr": stderr, "status": exit_code}
|
||||
|
||||
except docker.errors.DockerException as e:
|
||||
logger.error(f"Docker error: {str(e)}")
|
||||
return {
|
||||
"stdout": "",
|
||||
"stderr": f"Container error: {str(e)}",
|
||||
"status": -1,
|
||||
}
|
||||
finally:
|
||||
if container:
|
||||
try:
|
||||
container.remove(force=True)
|
||||
except docker.errors.DockerException:
|
||||
pass
|
||||
|
||||
|
||||
@app.post("/execute", response_model=ExecutionResponse)
|
||||
async def execute_code(request: ExecutionRequest) -> ExecutionResponse:
|
||||
try:
|
||||
code = decode_base64(request.code, "code")
|
||||
answer_path = (
|
||||
decode_base64(request.answer_file_path, "answer_file_path")
|
||||
if request.answer_file_path != "stdout"
|
||||
else "stdout"
|
||||
)
|
||||
except HTTPException as e:
|
||||
return ExecutionResponse(
|
||||
success=False,
|
||||
output="",
|
||||
result_hash="",
|
||||
error=e.detail,
|
||||
hash_match=None,
|
||||
)
|
||||
|
||||
if answer_path != "stdout":
|
||||
if os.path.isabs(answer_path) or not validate_file_path(answer_path):
|
||||
return ExecutionResponse(
|
||||
success=False,
|
||||
output="",
|
||||
result_hash="",
|
||||
error="Invalid answer file path",
|
||||
hash_match=None,
|
||||
)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
bound_files = {}
|
||||
if request.files:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
download_tasks = []
|
||||
for file in request.files:
|
||||
filename = sanitize_filename(str(file.url))
|
||||
dest_path = os.path.join(tmp_dir, filename)
|
||||
bound_files[dest_path] = file.bind_path
|
||||
download_tasks.append(
|
||||
download_file(session, str(file.url), dest_path)
|
||||
)
|
||||
|
||||
try:
|
||||
await asyncio.gather(*download_tasks)
|
||||
except HTTPException as e:
|
||||
return ExecutionResponse(
|
||||
success=False,
|
||||
output="",
|
||||
result_hash="",
|
||||
error=e.detail,
|
||||
hash_match=None,
|
||||
)
|
||||
|
||||
code_path = os.path.join(tmp_dir, "submission.py")
|
||||
with open(code_path, "w") as f:
|
||||
f.write(code)
|
||||
os.chmod(code_path, 0o444)
|
||||
|
||||
if answer_path == "stdout":
|
||||
cmd = ["python", "submission.py"]
|
||||
else:
|
||||
cmd = [
|
||||
"sh",
|
||||
"-c",
|
||||
f"python submission.py && cat {answer_path} || echo 'EXECUTION_FAILED'",
|
||||
]
|
||||
|
||||
try:
|
||||
result = await asyncio.to_thread(
|
||||
run_container_safely,
|
||||
tmp_dir,
|
||||
cmd,
|
||||
bound_files,
|
||||
CONTAINER_TIMEOUT,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Container execution failed: {str(e)}")
|
||||
return ExecutionResponse(
|
||||
success=False,
|
||||
output="",
|
||||
result_hash="",
|
||||
error=f"Execution failed: {str(e)}",
|
||||
hash_match=None,
|
||||
)
|
||||
|
||||
output = result["stdout"]
|
||||
error = result["stderr"]
|
||||
success = result["status"] == 0
|
||||
|
||||
if answer_path != "stdout" and not output:
|
||||
error += "\nNo output captured - check answer file path"
|
||||
|
||||
result_hash = hashlib.sha256(output.encode()).hexdigest()
|
||||
|
||||
return ExecutionResponse(
|
||||
success=success,
|
||||
hash_match=(
|
||||
result_hash == request.expected_hash
|
||||
if request.expected_hash
|
||||
else None
|
||||
),
|
||||
output=output[:5000],
|
||||
result_hash=result_hash,
|
||||
error=error[:5000],
|
||||
)
|
||||
|
||||
|
||||
@app.get("/health", response_model=HealthCheckResponse)
|
||||
async def health_check() -> HealthCheckResponse:
|
||||
try:
|
||||
docker_client.ping()
|
||||
return HealthCheckResponse(status="healthy", docker="connected")
|
||||
except docker.errors.DockerException:
|
||||
return HealthCheckResponse(status="degraded", docker="unavailable")
|
||||
|
||||
|
||||
def validate_file_path(path: str) -> bool:
|
||||
return (
|
||||
not os.path.isabs(path)
|
||||
and os.path.basename(path) == path
|
||||
and all(c.isalnum() or c in {"_", "-", "."} for c in path)
|
||||
)
|
||||
@@ -0,0 +1,13 @@
|
||||
[project]
|
||||
name = "checker"
|
||||
version = "0.1.0"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.11"
|
||||
dependencies = [
|
||||
"aiohttp>=3.11.13",
|
||||
"docker>=7.1.0",
|
||||
"fastapi>=0.115.11",
|
||||
"python-multipart>=0.0.20",
|
||||
"regex>=2024.11.6",
|
||||
"uvicorn>=0.34.0",
|
||||
]
|
||||
Executable
+8
@@ -0,0 +1,8 @@
|
||||
#!/bin/sh
|
||||
|
||||
GREEN='\033[1;32m'
|
||||
NC='\033[0m'
|
||||
|
||||
uvx ruff format .
|
||||
uvx ruff check . --fix
|
||||
printf "${GREEN}Linters/formatters runned${NC}\n"
|
||||
Reference in New Issue
Block a user