Source code for fanoutqa.eval.models

from dataclasses import asdict, dataclass
from typing import TypedDict


[docs] @dataclass class AccuracyScore: loose: float """Loose accuracy: The mean proportion of reference strings found in the generation.""" strict: float """Strict accuracy: The proportion of questions with a loose accuracy of 1.0."""
[docs] @dataclass class RougeScorePart: precision: float recall: float fscore: float
[docs] @dataclass class RougeScore: rouge1: RougeScorePart rouge2: RougeScorePart rougeL: RougeScorePart
[docs] @dataclass class EvaluationSingleScore: question_id: str acc: float rouge: RougeScore bleurt: float gpt: int
[docs] @dataclass class EvaluationScore: acc: AccuracyScore rouge: RougeScore bleurt: float gpt: float raw: list[EvaluationSingleScore] def to_dict(self, include_raw: bool = False): data = asdict(self) if not include_raw: data.pop("raw", None) return data
[docs] class Answer(TypedDict): """A dictionary of the form ``{"id": "...", "answer": "..."}``.""" id: str answer: str