HEX
Server: Apache/2.4.52 (Ubuntu)
System: Linux spn-python 5.15.0-89-generic #99-Ubuntu SMP Mon Oct 30 20:42:41 UTC 2023 x86_64
User: arjun (1000)
PHP: 8.1.2-1ubuntu2.20
Disabled: NONE
Upload Files
File: //home/arjun/.local/lib/python3.10/site-packages/langsmith/evaluation/string_evaluator.py
"""This module contains the StringEvaluator class."""

from typing import Callable, Dict, Optional

from pydantic import BaseModel

from langsmith.evaluation.evaluator import EvaluationResult, RunEvaluator
from langsmith.schemas import Example, Run


class StringEvaluator(RunEvaluator, BaseModel):
    """Grades the run's string input, output, and optional answer."""

    evaluation_name: Optional[str] = None
    """The name evaluation, such as 'Accuracy' or 'Salience'."""
    input_key: str = "input"
    """The key in the run inputs to extract the input string."""
    prediction_key: str = "output"
    """The key in the run outputs to extra the prediction string."""
    answer_key: Optional[str] = "output"
    """The key in the example outputs the answer string."""
    grading_function: Callable[[str, str, Optional[str]], Dict]
    """Function that grades the run output against the example output."""

    def evaluate_run(
        self, run: Run, example: Optional[Example] = None
    ) -> EvaluationResult:
        """Evaluate a single run."""
        if run.outputs is None:
            raise ValueError("Run outputs cannot be None.")
        if not example or example.outputs is None or self.answer_key is None:
            answer = None
        else:
            answer = example.outputs.get(self.answer_key)
        run_input = run.inputs[self.input_key]
        run_output = run.outputs[self.prediction_key]
        grading_results = self.grading_function(run_input, run_output, answer)
        return EvaluationResult(**{"key": self.evaluation_name, **grading_results})