feat: write the evaluation as a parquet on-disk
Not all fields, just some for now. Signed-off-by: Raito Bezarius <masterancpp@gmail.com>
This commit is contained in:
parent
64fee32cc0
commit
71dded6ef3
|
@ -38,6 +38,8 @@ in
|
|||
aiofiles
|
||||
pydantic-settings
|
||||
uvicorn
|
||||
polars
|
||||
dataclass-wizard
|
||||
];
|
||||
};
|
||||
}
|
||||
|
|
130
src/api/evaluation/models.py
Normal file
130
src/api/evaluation/models.py
Normal file
|
@ -0,0 +1,130 @@
|
|||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
from dataclass_wizard import DumpMixin, JSONWizard, LoadMixin
|
||||
|
||||
|
||||
@dataclass
|
||||
class MaintainerAttribute(JSONWizard):
|
||||
name: str
|
||||
github: str | None = None
|
||||
github_id: int | None = None
|
||||
email: str | None = None
|
||||
matrix: str | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class LicenseAttribute(JSONWizard):
|
||||
full_name: str | None = None
|
||||
deprecated: bool = False
|
||||
free: bool = False
|
||||
redistributable: bool = False
|
||||
short_name: str | None = None
|
||||
spdx_id: str | None = None
|
||||
url: str | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class MetadataAttribute(JSONWizard, LoadMixin, DumpMixin):
|
||||
outputs_to_install: list[str] = field(default_factory=list)
|
||||
available: bool = True
|
||||
broken: bool = False
|
||||
unfree: bool = False
|
||||
unsupported: bool = False
|
||||
insecure: bool = False
|
||||
main_program: str | None = None
|
||||
position: str | None = None
|
||||
homepage: str | None = None
|
||||
description: str | None = None
|
||||
name: str | None = None
|
||||
maintainers: list[MaintainerAttribute] = field(default_factory=list)
|
||||
license: list[LicenseAttribute] = field(default_factory=list)
|
||||
platforms: list[str] = field(default_factory=list)
|
||||
known_vulnerabilities: list[str] = field(default_factory=list)
|
||||
|
||||
def __pre_as_dict__(self) -> None:
|
||||
linearized_maintainers = []
|
||||
for maintainer in self.maintainers:
|
||||
if maintainer.get("scope") is not None: # pyright: ignore generalTypeIssue
|
||||
linearized_maintainers.extend(
|
||||
maintainer.get("members", []) # pyright: ignore generalTypeIssue
|
||||
)
|
||||
else:
|
||||
linearized_maintainers.append(maintainer)
|
||||
self.maintainers = linearized_maintainers
|
||||
|
||||
|
||||
@dataclass
|
||||
class EvaluatedAttribute(JSONWizard):
|
||||
"""
|
||||
This is a totally evaluated attribute.
|
||||
"""
|
||||
|
||||
attr: str
|
||||
attr_path: list[str]
|
||||
name: str
|
||||
drv_path: str
|
||||
# drv -> list of outputs.
|
||||
input_drvs: dict[str, list[str]]
|
||||
meta: MetadataAttribute | None
|
||||
outputs: dict[str, str]
|
||||
system: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class PartialEvaluatedAttribute:
|
||||
"""
|
||||
This represents a potentially invalid partially
|
||||
evaluated attribute for some reasons.
|
||||
Open the `evaluation` for more or read the `error`.
|
||||
"""
|
||||
|
||||
attr: str
|
||||
attr_path: list[str]
|
||||
error: str | None = None
|
||||
evaluation: EvaluatedAttribute | None = None
|
||||
|
||||
|
||||
def parse_total_evaluation(raw: dict[str, Any]) -> EvaluatedAttribute:
|
||||
# Various fixups to deal with... things.
|
||||
# my lord...
|
||||
if raw.get("meta", {}) is None:
|
||||
print(raw)
|
||||
|
||||
if (
|
||||
raw.get("meta", {}) is not None
|
||||
and "license" in raw.get("meta", {})
|
||||
and not isinstance(raw.get("meta", {})["license"], list)
|
||||
):
|
||||
if raw["meta"]["license"] == "unknown":
|
||||
raw["meta"]["license"] = []
|
||||
elif isinstance(raw["meta"]["license"], str):
|
||||
raw["meta"]["license"] = [{"fullName": raw["meta"]["license"]}]
|
||||
else:
|
||||
raw["meta"]["license"] = [raw["meta"]["license"]]
|
||||
|
||||
new_maintainers = []
|
||||
if (
|
||||
raw.get("meta", {}) is not None
|
||||
and "maintainers" in raw.get("meta", {})
|
||||
and isinstance(raw.get("meta", {})["maintainers"], list)
|
||||
):
|
||||
for maintainer in raw.get("meta", {})["maintainers"]:
|
||||
if maintainer.get("scope") is not None:
|
||||
new_maintainers.extend(maintainer["members"])
|
||||
else:
|
||||
new_maintainers.append(maintainer)
|
||||
raw["meta"]["maintainers"] = new_maintainers
|
||||
|
||||
return EvaluatedAttribute.from_dict(raw)
|
||||
|
||||
|
||||
def parse_evaluation_result(line: str) -> PartialEvaluatedAttribute:
|
||||
raw = json.loads(line)
|
||||
return PartialEvaluatedAttribute(
|
||||
attr=raw.get("attr"),
|
||||
attr_path=raw.get("attr_path"),
|
||||
error=raw.get("error"),
|
||||
evaluation=parse_total_evaluation(raw) if raw.get("error") is None else None,
|
||||
)
|
|
@ -1,19 +1,72 @@
|
|||
from collections.abc import AsyncGenerator
|
||||
from typing import Annotated
|
||||
import time
|
||||
from collections.abc import AsyncGenerator, Generator
|
||||
from typing import Annotated, Any
|
||||
|
||||
from fastapi import FastAPI, Path
|
||||
import polars as pl
|
||||
from fastapi import FastAPI, Path, WebSocket
|
||||
from fastapi.exceptions import HTTPException
|
||||
from fastapi.responses import StreamingResponse
|
||||
|
||||
from api.config import settings
|
||||
from api.evaluation import evaluation_entrypoint
|
||||
from api.evaluation.models import parse_evaluation_result
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
|
||||
async def stream_evaluation(commit_sha1: str) -> AsyncGenerator[bytes, None]:
|
||||
rows = []
|
||||
async for lines in evaluation_entrypoint(commit_sha1):
|
||||
for line in lines:
|
||||
yield line.encode("utf8")
|
||||
eval_result = parse_evaluation_result(line)
|
||||
if eval_result.evaluation is not None:
|
||||
eval_result = eval_result.evaluation
|
||||
rows.append(
|
||||
[
|
||||
eval_result.attr,
|
||||
eval_result.attr_path,
|
||||
eval_result.name,
|
||||
eval_result.drv_path,
|
||||
eval_result.input_drvs,
|
||||
eval_result.system,
|
||||
]
|
||||
)
|
||||
df = pl.DataFrame(
|
||||
rows,
|
||||
schema={
|
||||
"attr": pl.String,
|
||||
"attr_path": pl.List,
|
||||
"name": pl.String,
|
||||
"drv_path": pl.String,
|
||||
"input_drvs": pl.Struct,
|
||||
"system": pl.String,
|
||||
},
|
||||
)
|
||||
df.write_parquet(f"/tmp/nixpkgs-{commit_sha1}-success-eval.parquet")
|
||||
|
||||
|
||||
def follow(thefile: Any) -> Generator[str, None, None]:
|
||||
thefile.seek(0, 2)
|
||||
while True:
|
||||
line = thefile.readline()
|
||||
if not line:
|
||||
time.sleep(0.1)
|
||||
continue
|
||||
yield line
|
||||
|
||||
|
||||
@app.websocket("/logs/{project_slug}/{revision}")
|
||||
async def stream_evaluation_log(
|
||||
websocket: WebSocket,
|
||||
project_slug: Annotated[str, Path(title="The slug of the project, e.g. nixpkgs")],
|
||||
revision: Annotated[str, Path(title="The SHA1 revision for this repository")],
|
||||
) -> None:
|
||||
await websocket.accept()
|
||||
while True:
|
||||
with open(settings.evaluation_logs_dir / f"evaluation-{revision}.log") as log:
|
||||
for line in follow(log):
|
||||
await websocket.send_text(line)
|
||||
|
||||
|
||||
@app.post("/evaluations/{project_slug}/{revision}")
|
||||
|
|
Loading…
Reference in a new issue