> ## Documentation Index
> Fetch the complete documentation index at: https://docs.adaptive-ml.com/llms.txt
> Use this file to discover all available pages before exploring further.

# Reward servers

> Deploy custom grading functions as external servers

Reward servers let you deploy custom grading logic as an external API that Adaptive Engine calls during training and evaluation.

Use reward servers when your reward depends on external systems like databases, simulated environments, or sandboxes.

## Building a reward server

Implement a subclass of `RewardServer` with two methods:

```python theme={null}
from adaptive_sdk.external import RewardServer, ValidatedRequest, Response, ServerInfo
from pydantic import BaseModel, Field

# Define metadata schema (validated against dataset records)
class ScaryLetterMetadata(BaseModel):
    scary_letter: str = Field(min_length=1, max_length=1)

class ScaryLetterRewardServer(RewardServer[ScaryLetterMetadata]):
    def __init__(self, port: int = 8000, blocking: bool = True, **kwargs):
        super().__init__(port, ScaryLetterMetadata, blocking, **kwargs)

    async def score(self, request: ValidatedRequest[ScaryLetterMetadata]) -> Response:
        last_completion = request.turns[-1].content
        has_scary = request.metadata.scary_letter in last_completion
        return Response(
            reward=0.0 if has_scary else 1.0,
            metadata={"feedback": "No scary letters!" if not has_scary else "Found scary letters!"}
        )

    async def info(self) -> ServerInfo:
        return ServerInfo(
            version="1.0",
            name="Scary Letter Detector",
            description="Rewards completions that avoid a specific letter"
        )

if __name__ == "__main__":
    server = ScaryLetterRewardServer(port=50056)
```

The `score` method receives:

* `request.turns` - conversation history (list of role/content pairs)
* `request.metadata` - metadata from dataset record

Returns:

* `reward` - float score
* `metadata` - optional dict with feedback or debug info

### No metadata

If your reward doesn't need metadata, use `EmptyMetadata`:

```python theme={null}
from adaptive_sdk.external import RewardServer, EmptyMetadata, ValidatedRequest, Response, ServerInfo

class LengthRewardServer(RewardServer[EmptyMetadata]):
    def __init__(self, port: int = 8000, blocking: bool = True, **kwargs):
        super().__init__(port, EmptyMetadata, blocking, **kwargs)

    async def score(self, request: ValidatedRequest[EmptyMetadata]) -> Response:
        completion = request.turns[-1].content
        return Response(reward=min(len(completion) / 100, 1.0), metadata={})

    async def info(self) -> ServerInfo:
        return ServerInfo(version="1.0", name="Length Rewarder", description="Rewards longer responses")

if __name__ == "__main__":
    server = LengthRewardServer(port=50056)
```

### SQL execution example

A more realistic example that executes SQL queries and compares results:

```python theme={null}
import sqlite3
import os
import pandas as pd
from adaptive_sdk.external import RewardServer, ValidatedRequest, Response, ServerInfo
from pydantic import BaseModel, Field
from typing import List, Dict, Any

class SQLMetadata(BaseModel):
    ground_truth_results: List[Dict[str, Any]]
    db_path: str

class SQLRewardServer(RewardServer[SQLMetadata]):
    def __init__(self, db_base_path: str, port=8000, blocking=True, **kwargs):
        self.db_base_path = db_base_path
        super().__init__(port, SQLMetadata, blocking, **kwargs)

    async def score(self, request: ValidatedRequest[SQLMetadata]) -> Response:
        sql_query = request.turns[-1].content

        if not sql_query.upper().startswith("SELECT"):
            return Response(reward=-1.0, metadata={"status": "invalid_query"})

        try:
            conn = sqlite3.connect(os.path.join(self.db_base_path, request.metadata.db_path))
            df_actual = pd.read_sql_query(sql_query, conn)
            actual_results = df_actual.to_dict(orient='records')

            match = self._results_match(actual_results, request.metadata.ground_truth_results)
            return Response(
                reward=float(match),
                metadata={"status": "success" if match else "wrong_result"}
            )
        except Exception as e:
            return Response(reward=-1.0, metadata={"status": "error", "message": str(e)})
        finally:
            if 'conn' in locals():
                conn.close()

    def _results_match(self, actual, expected):
        if len(actual) != len(expected):
            return False
        actual_set = set(frozenset(d.items()) for d in actual)
        expected_set = set(frozenset(d.items()) for d in expected)
        return actual_set == expected_set

    async def info(self) -> ServerInfo:
        return ServerInfo(version="1.0", name="SQL Evaluator", description="Evaluates SQL queries")

if __name__ == "__main__":
    server = SQLRewardServer(db_base_path="/path/to/dbs/", port=50056)
```

## Testing locally

Use `RewardClient` to test before deployment:

```python theme={null}
import asyncio
from adaptive_sdk.external import RewardClient, Request, Turn

async def test_reward_server():
    client = RewardClient(base_url="http://localhost:50056")
    response = await client.score(
        Request(
            turns=[
                Turn(role="user", content="What sea touches Stockholm?"),
                Turn(role="assistant", content="Baltic Sea."),
            ],
            metadata={"scary_letter": "d"},
        )
    )
    assert response.reward == 1.0

asyncio.run(test_reward_server())
```

## Deployment

### Docker

```dockerfile theme={null}
FROM python:3.10-slim
COPY . /app
WORKDIR /app
RUN pip install -r requirements.txt
CMD ["python", "sql_reward_server.py"]
```

```bash theme={null}
docker build -t sql-reward-server:1.0 .
```

### Docker Compose

```yaml theme={null}
sql-reward-server:
  image: sql-reward-server:1.0
  restart: on-failure
  healthcheck:
    test: ["CMD-SHELL", "curl -f http://localhost:50056/info || exit 1"]
    interval: 30s
    timeout: 10s
    retries: 5
```

Access at `http://sql-reward-server:50056` within the Docker network.

### Kubernetes

```yaml theme={null}
apiVersion: v1
kind: Service
metadata:
  name: sql-reward-server
spec:
  ports:
  - port: 50056
    targetPort: 50056
  selector:
    app: sql-reward-server
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: sql-reward-server
spec:
  replicas: 1
  selector:
    matchLabels:
      app: sql-reward-server
  template:
    metadata:
      labels:
        app: sql-reward-server
    spec:
      containers:
      - name: sql-reward-server
        image: sql-reward-server:1.0
        ports:
        - containerPort: 50056
        livenessProbe:
          httpGet:
            path: /info
            port: 50056
          initialDelaySeconds: 30
          periodSeconds: 30
```

Access at `http://sql-reward-server:50056` (same namespace) or `http://sql-reward-server.<namespace>.svc.cluster.local:50056`.

## Connecting to Adaptive Engine

Test connectivity:

```python theme={null}
result = adaptive.graders.test_external_endpoint("http://sql-reward-server:50056")

if hasattr(result, "error"):
    print(f"Error: {result.error}")
else:
    print(f"Connected: {result.name}")
```

Register as a grader:

```python theme={null}
adaptive.graders.create.external_endpoint(
    key="sql-reward-server",
    url="http://sql-reward-server:50056",
    feedback_key="sql-execution-reward",
    project="my-project",
)
```

List and delete graders:

```python theme={null}
graders = adaptive.graders.list(project="my-project")
adaptive.graders.delete(grader_key="sql-reward-server", project="my-project")
```
