From b45e2aa5f64e5ea19a0025edee13d987b4b27a04 Mon Sep 17 00:00:00 2001
From: Julian Risch <julian.risch@deepset.ai>
Date: Mon, 22 Jun 2026 12:20:33 +0200
Subject: [PATCH 1/9] feat: add MockChatGenerator for testing and prototyping

Add `MockChatGenerator`, a Chat Generator that returns predefined
responses without calling any API. It is a deterministic, zero-cost
drop-in replacement for real Chat Generators in tests, smoke tests, and
quick prototypes, inspired by model-layer fakes in other frameworks
(LangChain `FakeListChatModel`, LlamaIndex `MockLLM`, PydanticAI
`FunctionModel`).

It supports:
- a fixed response (string or ChatMessage),
- a list of responses cycled across calls (to drive Agent-like loops),
- a `response_fn` callable for input-dependent replies,
- an echo mode (the default) that returns the last user message.

It implements the full Chat Generator interface: `run`, `run_async`,
streaming callbacks, and serialization.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../components/generators/chat/__init__.py    |   2 +
 haystack/components/generators/chat/mock.py   | 358 ++++++++++++++++++
 pydoc/generators_api.yml                      |   1 +
 ...-mock-chat-generator-718176608c1cd73f.yaml |   9 +
 test/components/generators/chat/test_mock.py  | 192 ++++++++++
 5 files changed, 562 insertions(+)
 create mode 100644 haystack/components/generators/chat/mock.py
 create mode 100644 releasenotes/notes/add-mock-chat-generator-718176608c1cd73f.yaml
 create mode 100644 test/components/generators/chat/test_mock.py

diff --git a/haystack/components/generators/chat/__init__.py b/haystack/components/generators/chat/__init__.py
index 85f2847853a..7542db6d482 100644
--- a/haystack/components/generators/chat/__init__.py
+++ b/haystack/components/generators/chat/__init__.py
@@ -14,6 +14,7 @@
     "azure_responses": ["AzureOpenAIResponsesChatGenerator"],
     "fallback": ["FallbackChatGenerator"],
     "llm": ["LLM"],
+    "mock": ["MockChatGenerator"],
 }
 
 if TYPE_CHECKING:
@@ -21,6 +22,7 @@
     from .azure_responses import AzureOpenAIResponsesChatGenerator as AzureOpenAIResponsesChatGenerator
     from .fallback import FallbackChatGenerator as FallbackChatGenerator
     from .llm import LLM as LLM
+    from .mock import MockChatGenerator as MockChatGenerator
     from .openai import OpenAIChatGenerator as OpenAIChatGenerator
     from .openai_responses import OpenAIResponsesChatGenerator as OpenAIResponsesChatGenerator
 
diff --git a/haystack/components/generators/chat/mock.py b/haystack/components/generators/chat/mock.py
new file mode 100644
index 00000000000..fc6d20ece54
--- /dev/null
+++ b/haystack/components/generators/chat/mock.py
@@ -0,0 +1,358 @@
+# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from __future__ import annotations
+
+import json
+import re
+from collections.abc import Callable, Sequence
+from dataclasses import replace
+from typing import Any
+
+from haystack import component, default_from_dict, default_to_dict, logging
+from haystack.components.generators.utils import _normalize_messages
+from haystack.dataclasses import (
+    ChatMessage,
+    ChatRole,
+    ComponentInfo,
+    FinishReason,
+    StreamingCallbackT,
+    StreamingChunk,
+    select_streaming_callback,
+)
+from haystack.dataclasses.streaming_chunk import ToolCallDelta, _invoke_streaming_callback
+from haystack.tools import ToolsType
+from haystack.utils import deserialize_callable, serialize_callable
+
+logger = logging.getLogger(__name__)
+
+# A callable that derives a response from the input messages. It receives the (normalized) list of input
+# ``ChatMessage`` objects and returns either the text of the assistant reply or a full ``ChatMessage``.
+ResponseFn = Callable[[list[ChatMessage]], "str | ChatMessage"]
+
+
+@component
+class MockChatGenerator:
+    """
+    A Chat Generator that returns predefined responses without calling any API.
+
+    It is a drop-in replacement for real Chat Generators (such as `OpenAIChatGenerator`) in tests, smoke tests, and
+    quick prototypes. It implements the same interface (`run`, `run_async`, streaming, serialization) but never
+    contacts an external service, so it is fully deterministic and free to run.
+
+    The response is selected based on how the component is configured:
+
+    - **Fixed response**: pass a single string or `ChatMessage`. The same reply is returned on every call.
+    - **Cycling responses**: pass a list of strings and/or `ChatMessage` objects. Each call returns the next item,
+      wrapping around to the start once the list is exhausted. This is useful to drive multi-step flows such as
+      Agents, where the first call returns a tool call and a later call returns the final answer.
+    - **Dynamic response**: pass a `response_fn` callable that receives the input messages and returns the reply.
+      This is useful when the reply should depend on the input, for example to echo back part of the prompt.
+    - **Echo (default)**: with no configuration, the component echoes back the text of the last user message. This
+      makes it usable out of the box for quick prototyping.
+
+    Pass `ChatMessage` objects (rather than plain strings) to return tool calls or reasoning content, which is handy
+    for exercising tool-calling pipelines without a real model.
+
+    ### Usage example
+
+    ```python
+    from haystack.components.generators.chat import MockChatGenerator
+    from haystack.dataclasses import ChatMessage
+
+    # Fixed response
+    generator = MockChatGenerator(responses="Hello, this is a mock response.")
+    result = generator.run([ChatMessage.from_user("Hi!")])
+    print(result["replies"][0].text)  # "Hello, this is a mock response."
+
+    # Cycling responses to drive an Agent-like loop
+    generator = MockChatGenerator(
+        responses=[
+            ChatMessage.from_assistant(tool_calls=[ToolCall(tool_name="search", arguments={"query": "Haystack"})]),
+            "Here is the final answer.",
+        ]
+    )
+    ```
+    """
+
+    def __init__(
+        self,
+        responses: str | ChatMessage | Sequence[str | ChatMessage] | None = None,
+        *,
+        response_fn: ResponseFn | None = None,
+        model: str = "mock-model",
+        meta: dict[str, Any] | None = None,
+        streaming_callback: StreamingCallbackT | None = None,
+    ) -> None:
+        """
+        Creates an instance of MockChatGenerator.
+
+        :param responses: The predefined response(s) to return. Accepts a single string or `ChatMessage` (returned on
+            every call), or a non-empty list of strings and/or `ChatMessage` objects that are returned in order,
+            cycling back to the start once exhausted. Strings are wrapped into assistant `ChatMessage` objects.
+            Mutually exclusive with `response_fn`. If neither is provided, the component echoes the last user message.
+        :param response_fn: An optional callable that receives the input messages and returns the reply as a string or
+            `ChatMessage`. Use this for input-dependent responses. Mutually exclusive with `responses`. To support
+            serialization, pass a named function (lambdas and nested functions cannot be serialized).
+        :param model: The model name reported in the response metadata. Purely cosmetic; no model is loaded.
+        :param meta: Additional metadata merged into the `meta` of every returned `ChatMessage`. A per-response
+            `ChatMessage`'s own metadata takes precedence over this value.
+        :param streaming_callback: An optional callback invoked with `StreamingChunk` objects reconstructed from the
+            predefined response. It lets the mock exercise streaming code paths without a real model.
+        :raises ValueError: If both `responses` and `response_fn` are provided, or if `responses` is an empty list.
+        """
+        if responses is not None and response_fn is not None:
+            raise ValueError("Pass either 'responses' or 'response_fn', not both.")
+
+        self._responses = self._normalize_responses(responses)
+        self.response_fn = response_fn
+        self.model = model
+        self.meta = meta or {}
+        self.streaming_callback = streaming_callback
+        self._call_count = 0
+        self._is_warmed_up = False
+
+    @staticmethod
+    def _normalize_responses(
+        responses: str | ChatMessage | Sequence[str | ChatMessage] | None,
+    ) -> list[ChatMessage] | None:
+        """Normalize the `responses` argument into a non-empty list of `ChatMessage`, or `None` for echo mode."""
+        if responses is None:
+            return None
+
+        items: list[str | ChatMessage]
+        if isinstance(responses, (str, ChatMessage)):
+            items = [responses]
+        elif isinstance(responses, Sequence):
+            items = list(responses)
+        else:
+            raise TypeError(f"'responses' must be a string, ChatMessage, or a sequence of them, got {type(responses)}.")
+
+        if len(items) == 0:
+            raise ValueError("'responses' must not be an empty list.")
+
+        normalized: list[ChatMessage] = []
+        for item in items:
+            if isinstance(item, str):
+                normalized.append(ChatMessage.from_assistant(item))
+            elif isinstance(item, ChatMessage):
+                normalized.append(item)
+            else:
+                raise TypeError(f"Each response must be a string or ChatMessage, got {type(item)}.")
+        return normalized
+
+    def to_dict(self) -> dict[str, Any]:
+        """Serialize the component to a dictionary."""
+        responses = [msg.to_dict() for msg in self._responses] if self._responses is not None else None
+        response_fn = serialize_callable(self.response_fn) if self.response_fn is not None else None
+        streaming_callback = serialize_callable(self.streaming_callback) if self.streaming_callback else None
+        return default_to_dict(
+            self,
+            responses=responses,
+            response_fn=response_fn,
+            model=self.model,
+            meta=self.meta,
+            streaming_callback=streaming_callback,
+        )
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> MockChatGenerator:
+        """Deserialize the component from a dictionary."""
+        init_params = data.get("init_parameters", {})
+        responses = init_params.get("responses")
+        if responses is not None:
+            init_params["responses"] = [ChatMessage.from_dict(msg) for msg in responses]
+        response_fn = init_params.get("response_fn")
+        if response_fn:
+            init_params["response_fn"] = deserialize_callable(response_fn)
+        streaming_callback = init_params.get("streaming_callback")
+        if streaming_callback:
+            init_params["streaming_callback"] = deserialize_callable(streaming_callback)
+        return default_from_dict(cls, data)
+
+    def warm_up(self) -> None:
+        """No-op warm up, provided for interface compatibility with real Chat Generators."""
+        self._is_warmed_up = True
+
+    @staticmethod
+    def _echo_text(messages: list[ChatMessage]) -> str | None:
+        """Return the text of the last user message, or the last message with text, for echo mode."""
+        for message in reversed(messages):
+            if message.role == ChatRole.USER and message.text:
+                return message.text
+        for message in reversed(messages):
+            if message.text:
+                return message.text
+        return None
+
+    @staticmethod
+    def _coerce_to_message(result: Any) -> ChatMessage:
+        """Coerce the output of `response_fn` into an assistant `ChatMessage`."""
+        if isinstance(result, str):
+            return ChatMessage.from_assistant(result)
+        if isinstance(result, ChatMessage):
+            return result
+        raise TypeError(f"'response_fn' must return a string or ChatMessage, got {type(result)}.")
+
+    @staticmethod
+    def _estimate_usage(messages: list[ChatMessage], reply: ChatMessage) -> dict[str, int]:
+        """
+        Roughly estimate token usage as whitespace-separated word counts.
+
+        This is an approximation (not real tokenization) intended to give downstream code realistic-looking metadata.
+        """
+        prompt_tokens = sum(len((message.text or "").split()) for message in messages)
+        completion_tokens = len((reply.text or "").split())
+        return {
+            "prompt_tokens": prompt_tokens,
+            "completion_tokens": completion_tokens,
+            "total_tokens": prompt_tokens + completion_tokens,
+        }
+
+    def _build_meta(self, messages: list[ChatMessage], base: ChatMessage) -> dict[str, Any]:
+        """Build the metadata attached to the returned reply, merging defaults, init meta, and per-response meta."""
+        meta: dict[str, Any] = {
+            "model": self.model,
+            "index": 0,
+            "finish_reason": "tool_calls" if base.tool_calls else "stop",
+            "usage": self._estimate_usage(messages, base),
+        }
+        meta.update(self.meta)
+        meta.update(base.meta)
+        return meta
+
+    def _build_reply(self, messages: list[ChatMessage]) -> ChatMessage | None:
+        """Select and finalize the reply for the given input messages. Returns `None` when there is nothing to echo."""
+        if self.response_fn is not None:
+            base = self._coerce_to_message(self.response_fn(messages))
+        elif self._responses is not None:
+            base = self._responses[self._call_count % len(self._responses)]
+            self._call_count += 1
+        else:
+            text = self._echo_text(messages)
+            if text is None:
+                return None
+            base = ChatMessage.from_assistant(text)
+
+        return replace(base, _meta=self._build_meta(messages, base))
+
+    def _make_chunks(self, reply: ChatMessage) -> list[StreamingChunk]:
+        """Reconstruct streaming chunks from a finalized reply so streaming callbacks can be exercised."""
+        component_info = ComponentInfo.from_component(self)
+        chunks: list[StreamingChunk] = []
+
+        # Stream the text content word by word in content block 0.
+        parts = re.findall(r"\S+\s*", reply.text) if reply.text else []
+        for idx, part in enumerate(parts):
+            chunks.append(
+                StreamingChunk(
+                    content=part, component_info=component_info, index=0, start=(idx == 0), meta={"model": self.model}
+                )
+            )
+
+        # Stream each tool call in its own content block.
+        block_index = 1 if parts else 0
+        for tool_call in reply.tool_calls:
+            chunks.append(
+                StreamingChunk(
+                    content="",
+                    component_info=component_info,
+                    index=block_index,
+                    start=True,
+                    tool_calls=[
+                        ToolCallDelta(
+                            index=block_index,
+                            tool_name=tool_call.tool_name,
+                            arguments=json.dumps(tool_call.arguments),
+                            id=tool_call.id,
+                        )
+                    ],
+                    meta={"model": self.model},
+                )
+            )
+            block_index += 1
+
+        if not chunks:
+            chunks.append(
+                StreamingChunk(content="", component_info=component_info, index=0, meta={"model": self.model})
+            )
+
+        finish_reason: FinishReason = "tool_calls" if reply.tool_calls else "stop"
+        last = chunks[-1]
+        chunks[-1] = replace(last, finish_reason=finish_reason, meta={**last.meta, "finish_reason": finish_reason})
+        return chunks
+
+    @component.output_types(replies=list[ChatMessage])
+    def run(
+        self,
+        messages: list[ChatMessage] | str,
+        generation_kwargs: dict[str, Any] | None = None,  # noqa: ARG002
+        tools: ToolsType | None = None,  # noqa: ARG002
+        streaming_callback: StreamingCallbackT | None = None,
+    ) -> dict[str, list[ChatMessage]]:
+        """
+        Return a predefined reply for the given messages without calling any API.
+
+        :param messages: The conversation history as a list of `ChatMessage` instances or a single string.
+        :param generation_kwargs: Accepted for interface compatibility and ignored.
+        :param tools: Accepted for interface compatibility and ignored.
+        :param streaming_callback: An optional callback invoked with reconstructed `StreamingChunk` objects. Overrides
+            the callback set at initialization.
+        :returns: A dictionary with a single key `replies` containing the predefined reply as a list of one
+            `ChatMessage` (empty in echo mode when there is no message to echo).
+        """
+        if not self._is_warmed_up:
+            self.warm_up()
+
+        messages = _normalize_messages(messages)
+        streaming_callback = select_streaming_callback(
+            init_callback=self.streaming_callback, runtime_callback=streaming_callback, requires_async=False
+        )
+
+        reply = self._build_reply(messages)
+        if reply is None:
+            return {"replies": []}
+
+        if streaming_callback is not None:
+            for chunk in self._make_chunks(reply):
+                streaming_callback(chunk)
+
+        return {"replies": [reply]}
+
+    @component.output_types(replies=list[ChatMessage])
+    async def run_async(
+        self,
+        messages: list[ChatMessage] | str,
+        generation_kwargs: dict[str, Any] | None = None,  # noqa: ARG002
+        tools: ToolsType | None = None,  # noqa: ARG002
+        streaming_callback: StreamingCallbackT | None = None,
+    ) -> dict[str, list[ChatMessage]]:
+        """
+        Asynchronously return a predefined reply for the given messages without calling any API.
+
+        :param messages: The conversation history as a list of `ChatMessage` instances or a single string.
+        :param generation_kwargs: Accepted for interface compatibility and ignored.
+        :param tools: Accepted for interface compatibility and ignored.
+        :param streaming_callback: An optional callback invoked with reconstructed `StreamingChunk` objects. Overrides
+            the callback set at initialization.
+        :returns: A dictionary with a single key `replies` containing the predefined reply as a list of one
+            `ChatMessage` (empty in echo mode when there is no message to echo).
+        """
+        if not self._is_warmed_up:
+            self.warm_up()
+
+        messages = _normalize_messages(messages)
+        streaming_callback = select_streaming_callback(
+            init_callback=self.streaming_callback, runtime_callback=streaming_callback, requires_async=True
+        )
+
+        reply = self._build_reply(messages)
+        if reply is None:
+            return {"replies": []}
+
+        if streaming_callback is not None:
+            for chunk in self._make_chunks(reply):
+                await _invoke_streaming_callback(streaming_callback, chunk)
+
+        return {"replies": [reply]}
diff --git a/pydoc/generators_api.yml b/pydoc/generators_api.yml
index eb6f10c9d94..9a1f495cdc9 100644
--- a/pydoc/generators_api.yml
+++ b/pydoc/generators_api.yml
@@ -6,6 +6,7 @@ loaders:
         "chat/azure_responses",
         "chat/fallback",
         "chat/llm",
+        "chat/mock",
         "chat/openai",
         "chat/openai_responses",
         "openai_image_generator",
diff --git a/releasenotes/notes/add-mock-chat-generator-718176608c1cd73f.yaml b/releasenotes/notes/add-mock-chat-generator-718176608c1cd73f.yaml
new file mode 100644
index 00000000000..67feff06b86
--- /dev/null
+++ b/releasenotes/notes/add-mock-chat-generator-718176608c1cd73f.yaml
@@ -0,0 +1,9 @@
+---
+features:
+  - |
+    Added ``MockChatGenerator``, a Chat Generator that returns predefined responses without calling any API.
+    It is a deterministic, zero-cost drop-in replacement for real Chat Generators in tests, smoke tests, and
+    quick prototypes. It supports a fixed response, a list of responses cycled across calls (useful to drive
+    Agent-like loops), a ``response_fn`` callable for input-dependent replies, and an echo mode (the default) that
+    returns the last user message. It implements the full Chat Generator interface, including ``run``, ``run_async``,
+    streaming callbacks, and serialization.
diff --git a/test/components/generators/chat/test_mock.py b/test/components/generators/chat/test_mock.py
new file mode 100644
index 00000000000..e84a5007d60
--- /dev/null
+++ b/test/components/generators/chat/test_mock.py
@@ -0,0 +1,192 @@
+# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+
+import pytest
+
+from haystack import Pipeline
+from haystack.components.generators.chat import MockChatGenerator
+from haystack.dataclasses import ChatMessage, StreamingChunk, ToolCall
+
+
+def _exclaim(messages: list[ChatMessage]) -> str:
+    """Module-level response function used to test `response_fn` serialization."""
+    return f"{messages[-1].text}!"
+
+
+class TestMockChatGenerator:
+    def test_init_default_echo(self):
+        gen = MockChatGenerator()
+        assert gen._responses is None
+        assert gen.response_fn is None
+        assert gen.model == "mock-model"
+        assert gen.meta == {}
+
+    def test_init_normalizes_string(self):
+        gen = MockChatGenerator("hello")
+        assert len(gen._responses) == 1
+        assert gen._responses[0].text == "hello"
+        assert gen._responses[0].role.value == "assistant"
+
+    def test_init_normalizes_list(self):
+        gen = MockChatGenerator(["a", ChatMessage.from_assistant("b")])
+        assert [msg.text for msg in gen._responses] == ["a", "b"]
+
+    def test_init_rejects_responses_and_response_fn(self):
+        with pytest.raises(ValueError, match="either 'responses' or 'response_fn'"):
+            MockChatGenerator("a", response_fn=_exclaim)
+
+    def test_init_rejects_empty_list(self):
+        with pytest.raises(ValueError, match="must not be an empty list"):
+            MockChatGenerator([])
+
+    def test_init_rejects_invalid_response_type(self):
+        with pytest.raises(TypeError):
+            MockChatGenerator([123])  # type: ignore[list-item]
+
+    def test_fixed_response(self):
+        gen = MockChatGenerator("the same answer")
+        for _ in range(3):
+            result = gen.run([ChatMessage.from_user("anything")])
+            assert result["replies"][0].text == "the same answer"
+
+    def test_cycling_responses(self):
+        gen = MockChatGenerator(["one", "two", "three"])
+        texts = [gen.run([ChatMessage.from_user("hi")])["replies"][0].text for _ in range(4)]
+        assert texts == ["one", "two", "three", "one"]
+
+    def test_echo_default_returns_last_user_message(self):
+        gen = MockChatGenerator()
+        result = gen.run(
+            [ChatMessage.from_system("be helpful"), ChatMessage.from_user("first"), ChatMessage.from_user("second")]
+        )
+        assert result["replies"][0].text == "second"
+
+    def test_echo_default_empty_messages_returns_no_replies(self):
+        gen = MockChatGenerator()
+        assert gen.run([])["replies"] == []
+
+    def test_response_fn(self):
+        gen = MockChatGenerator(response_fn=_exclaim)
+        result = gen.run([ChatMessage.from_user("hello")])
+        assert result["replies"][0].text == "hello!"
+
+    def test_response_fn_invalid_return_raises(self):
+        gen = MockChatGenerator(response_fn=lambda messages: 123)
+        with pytest.raises(TypeError, match="must return a string or ChatMessage"):
+            gen.run([ChatMessage.from_user("hi")])
+
+    def test_string_input_is_normalized(self):
+        gen = MockChatGenerator(response_fn=_exclaim)
+        result = gen.run("plain string")
+        assert result["replies"][0].text == "plain string!"
+
+    def test_tool_call_response(self):
+        tool_call = ToolCall(tool_name="search", arguments={"query": "Haystack"})
+        gen = MockChatGenerator(ChatMessage.from_assistant(tool_calls=[tool_call]))
+        reply = gen.run([ChatMessage.from_user("search for Haystack")])["replies"][0]
+        assert reply.tool_calls == [tool_call]
+        assert reply.meta["finish_reason"] == "tool_calls"
+
+    def test_meta_defaults(self):
+        gen = MockChatGenerator("hello world")
+        meta = gen.run([ChatMessage.from_user("a b c")])["replies"][0].meta
+        assert meta["model"] == "mock-model"
+        assert meta["finish_reason"] == "stop"
+        assert meta["usage"] == {"prompt_tokens": 3, "completion_tokens": 2, "total_tokens": 5}
+
+    def test_meta_merging_precedence(self):
+        # init meta overrides defaults; per-response meta overrides init meta
+        response = ChatMessage.from_assistant("hi", meta={"custom": "from-response", "finish_reason": "length"})
+        gen = MockChatGenerator(response, model="custom-model", meta={"custom": "from-init", "extra": "init"})
+        meta = gen.run([ChatMessage.from_user("x")])["replies"][0].meta
+        assert meta["model"] == "custom-model"
+        assert meta["custom"] == "from-response"
+        assert meta["finish_reason"] == "length"
+        assert meta["extra"] == "init"
+
+    def test_does_not_mutate_stored_responses(self):
+        gen = MockChatGenerator("hello")
+        gen.run([ChatMessage.from_user("a b")])
+        # the stored response must keep its original (empty) meta, untouched by the per-run meta
+        assert gen._responses[0].meta == {}
+
+    async def test_run_async(self):
+        gen = MockChatGenerator(["one", "two"])
+        first = await gen.run_async([ChatMessage.from_user("hi")])
+        second = await gen.run_async([ChatMessage.from_user("hi")])
+        assert first["replies"][0].text == "one"
+        assert second["replies"][0].text == "two"
+
+    def test_streaming_callback_sync(self):
+        chunks: list[StreamingChunk] = []
+        gen = MockChatGenerator("hello there friend")
+        result = gen.run([ChatMessage.from_user("hi")], streaming_callback=chunks.append)
+        assert "".join(chunk.content for chunk in chunks) == "hello there friend"
+        assert chunks[0].start is True
+        assert chunks[-1].finish_reason == "stop"
+        # the returned reply matches the predefined response
+        assert result["replies"][0].text == "hello there friend"
+
+    async def test_streaming_callback_async(self):
+        chunks: list[StreamingChunk] = []
+
+        async def callback(chunk: StreamingChunk) -> None:
+            chunks.append(chunk)
+
+        gen = MockChatGenerator("hello world")
+        await gen.run_async([ChatMessage.from_user("hi")], streaming_callback=callback)
+        assert "".join(chunk.content for chunk in chunks) == "hello world"
+        assert chunks[-1].finish_reason == "stop"
+
+    def test_streaming_callback_with_tool_call(self):
+        chunks: list[StreamingChunk] = []
+        tool_call = ToolCall(tool_name="search", arguments={"query": "x"})
+        gen = MockChatGenerator(ChatMessage.from_assistant(tool_calls=[tool_call]))
+        gen.run([ChatMessage.from_user("hi")], streaming_callback=chunks.append)
+        assert any(chunk.tool_calls for chunk in chunks)
+        assert chunks[-1].finish_reason == "tool_calls"
+
+    def test_init_level_streaming_callback(self):
+        chunks: list[StreamingChunk] = []
+        gen = MockChatGenerator("hello", streaming_callback=chunks.append)
+        gen.run([ChatMessage.from_user("hi")])
+        assert chunks
+
+    def test_to_dict_from_dict_roundtrip(self):
+        gen = MockChatGenerator(["a", ChatMessage.from_assistant("b")], model="m", meta={"k": "v"})
+        data = gen.to_dict()
+        assert data["type"] == "haystack.components.generators.chat.mock.MockChatGenerator"
+        assert data["init_parameters"]["model"] == "m"
+        assert data["init_parameters"]["response_fn"] is None
+
+        restored = MockChatGenerator.from_dict(data)
+        texts = [restored.run([ChatMessage.from_user("hi")])["replies"][0].text for _ in range(2)]
+        assert texts == ["a", "b"]
+        assert restored.meta == {"k": "v"}
+
+    def test_to_dict_from_dict_with_response_fn(self):
+        gen = MockChatGenerator(response_fn=_exclaim)
+        data = gen.to_dict()
+        assert data["init_parameters"]["response_fn"].endswith("test_mock._exclaim")
+        restored = MockChatGenerator.from_dict(data)
+        assert restored.run([ChatMessage.from_user("hello")])["replies"][0].text == "hello!"
+
+    def test_to_dict_echo_mode(self):
+        gen = MockChatGenerator()
+        data = gen.to_dict()
+        assert data["init_parameters"]["responses"] is None
+        restored = MockChatGenerator.from_dict(data)
+        assert restored.run([ChatMessage.from_user("echo me")])["replies"][0].text == "echo me"
+
+    def test_in_pipeline(self):
+        pipeline = Pipeline()
+        pipeline.add_component("generator", MockChatGenerator("from the pipeline"))
+        result = pipeline.run({"generator": {"messages": [ChatMessage.from_user("hi")]}})
+        assert result["generator"]["replies"][0].text == "from the pipeline"
+
+        # the pipeline (and its mock component) survives a serialization roundtrip
+        restored = Pipeline.from_dict(pipeline.to_dict())
+        result = restored.run({"generator": {"messages": [ChatMessage.from_user("hi")]}})
+        assert result["generator"]["replies"][0].text == "from the pipeline"

From 670333a688548423833cc9ebddf73588cb14987e Mon Sep 17 00:00:00 2001
From: Julian Risch <julian.risch@deepset.ai>
Date: Mon, 22 Jun 2026 13:02:10 +0200
Subject: [PATCH 2/9] test: consolidate MockChatGenerator tests; fix docstring
 import

Reduce the number of test functions via parametrization (init validation,
echo modes, response_fn return types, serialization roundtrips) without
losing coverage, and cover the previously-missed defensive branches.
mock.py is now at 100% statement coverage.

Also fix the docstring usage example to import ToolCall.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 haystack/components/generators/chat/mock.py  |   2 +-
 test/components/generators/chat/test_mock.py | 166 +++++++++----------
 2 files changed, 76 insertions(+), 92 deletions(-)

diff --git a/haystack/components/generators/chat/mock.py b/haystack/components/generators/chat/mock.py
index fc6d20ece54..73a3fb5d78a 100644
--- a/haystack/components/generators/chat/mock.py
+++ b/haystack/components/generators/chat/mock.py
@@ -59,7 +59,7 @@ class MockChatGenerator:
 
     ```python
     from haystack.components.generators.chat import MockChatGenerator
-    from haystack.dataclasses import ChatMessage
+    from haystack.dataclasses import ChatMessage, ToolCall
 
     # Fixed response
     generator = MockChatGenerator(responses="Hello, this is a mock response.")
diff --git a/test/components/generators/chat/test_mock.py b/test/components/generators/chat/test_mock.py
index e84a5007d60..4d0f4250924 100644
--- a/test/components/generators/chat/test_mock.py
+++ b/test/components/generators/chat/test_mock.py
@@ -2,7 +2,6 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-
 import pytest
 
 from haystack import Pipeline
@@ -11,39 +10,32 @@
 
 
 def _exclaim(messages: list[ChatMessage]) -> str:
-    """Module-level response function used to test `response_fn` serialization."""
+    """Module-level response function (returns a string) used to test `response_fn` and its serialization."""
     return f"{messages[-1].text}!"
 
 
-class TestMockChatGenerator:
-    def test_init_default_echo(self):
-        gen = MockChatGenerator()
-        assert gen._responses is None
-        assert gen.response_fn is None
-        assert gen.model == "mock-model"
-        assert gen.meta == {}
-
-    def test_init_normalizes_string(self):
-        gen = MockChatGenerator("hello")
-        assert len(gen._responses) == 1
-        assert gen._responses[0].text == "hello"
-        assert gen._responses[0].role.value == "assistant"
+def _assistant_reply(messages: list[ChatMessage]) -> ChatMessage:
+    """Module-level response function that returns a full ChatMessage."""
+    return ChatMessage.from_assistant("canned message")
 
-    def test_init_normalizes_list(self):
-        gen = MockChatGenerator(["a", ChatMessage.from_assistant("b")])
-        assert [msg.text for msg in gen._responses] == ["a", "b"]
 
-    def test_init_rejects_responses_and_response_fn(self):
-        with pytest.raises(ValueError, match="either 'responses' or 'response_fn'"):
-            MockChatGenerator("a", response_fn=_exclaim)
+def _noop_callback(chunk: StreamingChunk) -> None:
+    """Module-level streaming callback used to test init-level callback serialization."""
 
-    def test_init_rejects_empty_list(self):
-        with pytest.raises(ValueError, match="must not be an empty list"):
-            MockChatGenerator([])
 
-    def test_init_rejects_invalid_response_type(self):
-        with pytest.raises(TypeError):
-            MockChatGenerator([123])  # type: ignore[list-item]
+class TestMockChatGenerator:
+    @pytest.mark.parametrize(
+        ("args", "kwargs", "exception", "match"),
+        [
+            (("a",), {"response_fn": _exclaim}, ValueError, "either 'responses' or 'response_fn'"),
+            (([],), {}, ValueError, "must not be an empty list"),
+            ((123,), {}, TypeError, "must be a string, ChatMessage, or a sequence"),
+            (([123],), {}, TypeError, "Each response must be a string or ChatMessage"),
+        ],
+    )
+    def test_init_rejects_invalid_config(self, args, kwargs, exception, match):
+        with pytest.raises(exception, match=match):
+            MockChatGenerator(*args, **kwargs)
 
     def test_fixed_response(self):
         gen = MockChatGenerator("the same answer")
@@ -52,25 +44,33 @@ def test_fixed_response(self):
             assert result["replies"][0].text == "the same answer"
 
     def test_cycling_responses(self):
-        gen = MockChatGenerator(["one", "two", "three"])
+        # a mix of strings and ChatMessage objects, returned in order and wrapping around
+        gen = MockChatGenerator(["one", ChatMessage.from_assistant("two"), "three"])
         texts = [gen.run([ChatMessage.from_user("hi")])["replies"][0].text for _ in range(4)]
         assert texts == ["one", "two", "three", "one"]
 
-    def test_echo_default_returns_last_user_message(self):
-        gen = MockChatGenerator()
-        result = gen.run(
-            [ChatMessage.from_system("be helpful"), ChatMessage.from_user("first"), ChatMessage.from_user("second")]
-        )
-        assert result["replies"][0].text == "second"
-
-    def test_echo_default_empty_messages_returns_no_replies(self):
-        gen = MockChatGenerator()
-        assert gen.run([])["replies"] == []
-
-    def test_response_fn(self):
-        gen = MockChatGenerator(response_fn=_exclaim)
-        result = gen.run([ChatMessage.from_user("hello")])
-        assert result["replies"][0].text == "hello!"
+    @pytest.mark.parametrize(
+        ("messages", "expected"),
+        [
+            (
+                [ChatMessage.from_system("sys"), ChatMessage.from_user("first"), ChatMessage.from_user("second")],
+                "second",
+            ),
+            ([ChatMessage.from_system("only system")], "only system"),  # falls back to the last message with text
+            ([], None),  # nothing to echo
+        ],
+    )
+    def test_echo_default(self, messages, expected):
+        replies = MockChatGenerator().run(messages)["replies"]
+        if expected is None:
+            assert replies == []
+        else:
+            assert replies[0].text == expected
+
+    @pytest.mark.parametrize(("fn", "expected"), [(_exclaim, "hello!"), (_assistant_reply, "canned message")])
+    def test_response_fn(self, fn, expected):
+        result = MockChatGenerator(response_fn=fn).run([ChatMessage.from_user("hello")])
+        assert result["replies"][0].text == expected
 
     def test_response_fn_invalid_return_raises(self):
         gen = MockChatGenerator(response_fn=lambda messages: 123)
@@ -79,8 +79,7 @@ def test_response_fn_invalid_return_raises(self):
 
     def test_string_input_is_normalized(self):
         gen = MockChatGenerator(response_fn=_exclaim)
-        result = gen.run("plain string")
-        assert result["replies"][0].text == "plain string!"
+        assert gen.run("plain string")["replies"][0].text == "plain string!"
 
     def test_tool_call_response(self):
         tool_call = ToolCall(tool_name="search", arguments={"query": "Haystack"})
@@ -90,8 +89,7 @@ def test_tool_call_response(self):
         assert reply.meta["finish_reason"] == "tool_calls"
 
     def test_meta_defaults(self):
-        gen = MockChatGenerator("hello world")
-        meta = gen.run([ChatMessage.from_user("a b c")])["replies"][0].meta
+        meta = MockChatGenerator("hello world").run([ChatMessage.from_user("a b c")])["replies"][0].meta
         assert meta["model"] == "mock-model"
         assert meta["finish_reason"] == "stop"
         assert meta["usage"] == {"prompt_tokens": 3, "completion_tokens": 2, "total_tokens": 5}
@@ -109,20 +107,21 @@ def test_meta_merging_precedence(self):
     def test_does_not_mutate_stored_responses(self):
         gen = MockChatGenerator("hello")
         gen.run([ChatMessage.from_user("a b")])
-        # the stored response must keep its original (empty) meta, untouched by the per-run meta
+        # the stored response keeps its original (empty) meta, untouched by the per-run meta
         assert gen._responses[0].meta == {}
 
     async def test_run_async(self):
         gen = MockChatGenerator(["one", "two"])
-        first = await gen.run_async([ChatMessage.from_user("hi")])
-        second = await gen.run_async([ChatMessage.from_user("hi")])
-        assert first["replies"][0].text == "one"
-        assert second["replies"][0].text == "two"
+        assert (await gen.run_async([ChatMessage.from_user("hi")]))["replies"][0].text == "one"
+        assert (await gen.run_async([ChatMessage.from_user("hi")]))["replies"][0].text == "two"
+        # echo mode with empty input returns no replies (async path)
+        assert (await MockChatGenerator().run_async([]))["replies"] == []
 
     def test_streaming_callback_sync(self):
         chunks: list[StreamingChunk] = []
-        gen = MockChatGenerator("hello there friend")
-        result = gen.run([ChatMessage.from_user("hi")], streaming_callback=chunks.append)
+        result = MockChatGenerator("hello there friend").run(
+            [ChatMessage.from_user("hi")], streaming_callback=chunks.append
+        )
         assert "".join(chunk.content for chunk in chunks) == "hello there friend"
         assert chunks[0].start is True
         assert chunks[-1].finish_reason == "stop"
@@ -135,11 +134,15 @@ async def test_streaming_callback_async(self):
         async def callback(chunk: StreamingChunk) -> None:
             chunks.append(chunk)
 
-        gen = MockChatGenerator("hello world")
-        await gen.run_async([ChatMessage.from_user("hi")], streaming_callback=callback)
+        await MockChatGenerator("hello world").run_async([ChatMessage.from_user("hi")], streaming_callback=callback)
         assert "".join(chunk.content for chunk in chunks) == "hello world"
         assert chunks[-1].finish_reason == "stop"
 
+    def test_streaming_empty_reply(self):
+        chunks: list[StreamingChunk] = []
+        MockChatGenerator("").run([ChatMessage.from_user("hi")], streaming_callback=chunks.append)
+        assert chunks[-1].finish_reason == "stop"
+
     def test_streaming_callback_with_tool_call(self):
         chunks: list[StreamingChunk] = []
         tool_call = ToolCall(tool_name="search", arguments={"query": "x"})
@@ -148,45 +151,26 @@ def test_streaming_callback_with_tool_call(self):
         assert any(chunk.tool_calls for chunk in chunks)
         assert chunks[-1].finish_reason == "tool_calls"
 
-    def test_init_level_streaming_callback(self):
-        chunks: list[StreamingChunk] = []
-        gen = MockChatGenerator("hello", streaming_callback=chunks.append)
-        gen.run([ChatMessage.from_user("hi")])
-        assert chunks
-
-    def test_to_dict_from_dict_roundtrip(self):
-        gen = MockChatGenerator(["a", ChatMessage.from_assistant("b")], model="m", meta={"k": "v"})
-        data = gen.to_dict()
-        assert data["type"] == "haystack.components.generators.chat.mock.MockChatGenerator"
-        assert data["init_parameters"]["model"] == "m"
-        assert data["init_parameters"]["response_fn"] is None
-
-        restored = MockChatGenerator.from_dict(data)
-        texts = [restored.run([ChatMessage.from_user("hi")])["replies"][0].text for _ in range(2)]
-        assert texts == ["a", "b"]
-        assert restored.meta == {"k": "v"}
-
-    def test_to_dict_from_dict_with_response_fn(self):
-        gen = MockChatGenerator(response_fn=_exclaim)
-        data = gen.to_dict()
-        assert data["init_parameters"]["response_fn"].endswith("test_mock._exclaim")
-        restored = MockChatGenerator.from_dict(data)
-        assert restored.run([ChatMessage.from_user("hello")])["replies"][0].text == "hello!"
-
-    def test_to_dict_echo_mode(self):
-        gen = MockChatGenerator()
-        data = gen.to_dict()
-        assert data["init_parameters"]["responses"] is None
-        restored = MockChatGenerator.from_dict(data)
-        assert restored.run([ChatMessage.from_user("echo me")])["replies"][0].text == "echo me"
+    @pytest.mark.parametrize(
+        "generator",
+        [
+            MockChatGenerator(["a", ChatMessage.from_assistant("b")], model="m", meta={"k": "v"}),
+            MockChatGenerator(response_fn=_exclaim),
+            MockChatGenerator(),  # echo mode
+            MockChatGenerator("hi", streaming_callback=_noop_callback),  # serialized init-level callback
+        ],
+        ids=["responses", "response_fn", "echo", "streaming_callback"],
+    )
+    def test_serialization_roundtrip(self, generator):
+        restored = MockChatGenerator.from_dict(generator.to_dict())
+        assert isinstance(restored, MockChatGenerator)
+        # behavior is preserved across the roundtrip
+        messages = [ChatMessage.from_user("hi")]
+        assert restored.run(messages)["replies"][0].text == generator.run(messages)["replies"][0].text
 
     def test_in_pipeline(self):
         pipeline = Pipeline()
         pipeline.add_component("generator", MockChatGenerator("from the pipeline"))
-        result = pipeline.run({"generator": {"messages": [ChatMessage.from_user("hi")]}})
-        assert result["generator"]["replies"][0].text == "from the pipeline"
-
-        # the pipeline (and its mock component) survives a serialization roundtrip
         restored = Pipeline.from_dict(pipeline.to_dict())
         result = restored.run({"generator": {"messages": [ChatMessage.from_user("hi")]}})
         assert result["generator"]["replies"][0].text == "from the pipeline"

From 58810f114f5eeced0d02f488b7ca9cd8dc8a0fe0 Mon Sep 17 00:00:00 2001
From: Julian Risch <julian.risch@deepset.ai>
Date: Mon, 22 Jun 2026 14:08:19 +0200
Subject: [PATCH 3/9] docs: use single backticks in MockChatGenerator module
 comment

Address review feedback: avoid RST-style double backticks in code comments.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 haystack/components/generators/chat/mock.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/haystack/components/generators/chat/mock.py b/haystack/components/generators/chat/mock.py
index 73a3fb5d78a..b1293c075e5 100644
--- a/haystack/components/generators/chat/mock.py
+++ b/haystack/components/generators/chat/mock.py
@@ -28,7 +28,7 @@
 logger = logging.getLogger(__name__)
 
 # A callable that derives a response from the input messages. It receives the (normalized) list of input
-# ``ChatMessage`` objects and returns either the text of the assistant reply or a full ``ChatMessage``.
+# `ChatMessage` objects and returns either the text of the assistant reply or a full `ChatMessage`.
 ResponseFn = Callable[[list[ChatMessage]], "str | ChatMessage"]
 
 

From bf6f4513c662fada097bbbdbd1ef045322071c70 Mon Sep 17 00:00:00 2001
From: Julian Risch <julian.risch@deepset.ai>
Date: Mon, 22 Jun 2026 14:20:55 +0200
Subject: [PATCH 4/9] Apply suggestions from code review

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
---
 haystack/components/generators/chat/mock.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/haystack/components/generators/chat/mock.py b/haystack/components/generators/chat/mock.py
index b1293c075e5..6dad13844cd 100644
--- a/haystack/components/generators/chat/mock.py
+++ b/haystack/components/generators/chat/mock.py
@@ -29,7 +29,7 @@
 
 # A callable that derives a response from the input messages. It receives the (normalized) list of input
 # `ChatMessage` objects and returns either the text of the assistant reply or a full `ChatMessage`.
-ResponseFn = Callable[[list[ChatMessage]], "str | ChatMessage"]
+ResponseFn = Callable[[list[ChatMessage]], str | ChatMessage]
 
 
 @component
@@ -187,7 +187,7 @@ def _echo_text(messages: list[ChatMessage]) -> str | None:
         return None
 
     @staticmethod
-    def _coerce_to_message(result: Any) -> ChatMessage:
+    def _coerce_to_message(result: str | ChatMessage) -> ChatMessage:
         """Coerce the output of `response_fn` into an assistant `ChatMessage`."""
         if isinstance(result, str):
             return ChatMessage.from_assistant(result)

From ba37cbbe23e8c8c374be6976f4f74d7c7ba33348 Mon Sep 17 00:00:00 2001
From: Julian Risch <julian.risch@deepset.ai>
Date: Mon, 22 Jun 2026 14:52:08 +0200
Subject: [PATCH 5/9] refactor: simplify MockChatGenerator echo to last message
 with text

Address review feedback: echo the last message that has text content
instead of preferring the last user message and then falling back. This
is behaviorally identical for the typical case (the last message is the
user turn) and removes the now-unused ChatRole import.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 haystack/components/generators/chat/mock.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/haystack/components/generators/chat/mock.py b/haystack/components/generators/chat/mock.py
index 6dad13844cd..65f09ad1e74 100644
--- a/haystack/components/generators/chat/mock.py
+++ b/haystack/components/generators/chat/mock.py
@@ -14,7 +14,6 @@
 from haystack.components.generators.utils import _normalize_messages
 from haystack.dataclasses import (
     ChatMessage,
-    ChatRole,
     ComponentInfo,
     FinishReason,
     StreamingCallbackT,
@@ -49,8 +48,8 @@ class MockChatGenerator:
       Agents, where the first call returns a tool call and a later call returns the final answer.
     - **Dynamic response**: pass a `response_fn` callable that receives the input messages and returns the reply.
       This is useful when the reply should depend on the input, for example to echo back part of the prompt.
-    - **Echo (default)**: with no configuration, the component echoes back the text of the last user message. This
-      makes it usable out of the box for quick prototyping.
+    - **Echo (default)**: with no configuration, the component echoes back the text of the last message that has
+      text content. This makes it usable out of the box for quick prototyping.
 
     Pass `ChatMessage` objects (rather than plain strings) to return tool calls or reasoning content, which is handy
     for exercising tool-calling pipelines without a real model.
@@ -91,7 +90,8 @@ def __init__(
         :param responses: The predefined response(s) to return. Accepts a single string or `ChatMessage` (returned on
             every call), or a non-empty list of strings and/or `ChatMessage` objects that are returned in order,
             cycling back to the start once exhausted. Strings are wrapped into assistant `ChatMessage` objects.
-            Mutually exclusive with `response_fn`. If neither is provided, the component echoes the last user message.
+            Mutually exclusive with `response_fn`. If neither is provided, the component echoes the last message with
+            text content.
         :param response_fn: An optional callable that receives the input messages and returns the reply as a string or
             `ChatMessage`. Use this for input-dependent responses. Mutually exclusive with `responses`. To support
             serialization, pass a named function (lambdas and nested functions cannot be serialized).
@@ -177,10 +177,7 @@ def warm_up(self) -> None:
 
     @staticmethod
     def _echo_text(messages: list[ChatMessage]) -> str | None:
-        """Return the text of the last user message, or the last message with text, for echo mode."""
-        for message in reversed(messages):
-            if message.role == ChatRole.USER and message.text:
-                return message.text
+        """Return the text of the last message that has text content, for echo mode."""
         for message in reversed(messages):
             if message.text:
                 return message.text

From 9ee0a23e4c27d93b36282125ed4d856831a3288c Mon Sep 17 00:00:00 2001
From: Julian Risch <julian.risch@deepset.ai>
Date: Mon, 22 Jun 2026 15:12:23 +0200
Subject: [PATCH 6/9] refactor: match OpenAIChatGenerator run() argument order
 in MockChatGenerator

Reorder run()/run_async() to (messages, streaming_callback, generation_kwargs,
*, tools, tools_strict), mirroring OpenAIChatGenerator so the mock is a true
positional drop-in. Previously the order followed FallbackChatGenerator, which
puts streaming_callback last and tools positionally.

Add a regression test pinning the parameter order and verifying a callback
passed as the 2nd positional arg is treated as streaming_callback.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 haystack/components/generators/chat/mock.py  | 23 +++++++++++++++-----
 test/components/generators/chat/test_mock.py | 21 ++++++++++++++++++
 2 files changed, 38 insertions(+), 6 deletions(-)

diff --git a/haystack/components/generators/chat/mock.py b/haystack/components/generators/chat/mock.py
index 65f09ad1e74..e02bb067059 100644
--- a/haystack/components/generators/chat/mock.py
+++ b/haystack/components/generators/chat/mock.py
@@ -284,18 +284,23 @@ def _make_chunks(self, reply: ChatMessage) -> list[StreamingChunk]:
     def run(
         self,
         messages: list[ChatMessage] | str,
+        streaming_callback: StreamingCallbackT | None = None,
         generation_kwargs: dict[str, Any] | None = None,  # noqa: ARG002
+        *,
         tools: ToolsType | None = None,  # noqa: ARG002
-        streaming_callback: StreamingCallbackT | None = None,
+        tools_strict: bool | None = None,  # noqa: ARG002
     ) -> dict[str, list[ChatMessage]]:
         """
         Return a predefined reply for the given messages without calling any API.
 
+        The signature mirrors `OpenAIChatGenerator.run` so the mock can be used as a positional drop-in replacement.
+
         :param messages: The conversation history as a list of `ChatMessage` instances or a single string.
-        :param generation_kwargs: Accepted for interface compatibility and ignored.
-        :param tools: Accepted for interface compatibility and ignored.
         :param streaming_callback: An optional callback invoked with reconstructed `StreamingChunk` objects. Overrides
             the callback set at initialization.
+        :param generation_kwargs: Accepted for interface compatibility and ignored.
+        :param tools: Accepted for interface compatibility and ignored.
+        :param tools_strict: Accepted for interface compatibility and ignored.
         :returns: A dictionary with a single key `replies` containing the predefined reply as a list of one
             `ChatMessage` (empty in echo mode when there is no message to echo).
         """
@@ -321,18 +326,24 @@ def run(
     async def run_async(
         self,
         messages: list[ChatMessage] | str,
+        streaming_callback: StreamingCallbackT | None = None,
         generation_kwargs: dict[str, Any] | None = None,  # noqa: ARG002
+        *,
         tools: ToolsType | None = None,  # noqa: ARG002
-        streaming_callback: StreamingCallbackT | None = None,
+        tools_strict: bool | None = None,  # noqa: ARG002
     ) -> dict[str, list[ChatMessage]]:
         """
         Asynchronously return a predefined reply for the given messages without calling any API.
 
+        The signature mirrors `OpenAIChatGenerator.run_async` so the mock can be used as a positional drop-in
+        replacement.
+
         :param messages: The conversation history as a list of `ChatMessage` instances or a single string.
-        :param generation_kwargs: Accepted for interface compatibility and ignored.
-        :param tools: Accepted for interface compatibility and ignored.
         :param streaming_callback: An optional callback invoked with reconstructed `StreamingChunk` objects. Overrides
             the callback set at initialization.
+        :param generation_kwargs: Accepted for interface compatibility and ignored.
+        :param tools: Accepted for interface compatibility and ignored.
+        :param tools_strict: Accepted for interface compatibility and ignored.
         :returns: A dictionary with a single key `replies` containing the predefined reply as a list of one
             `ChatMessage` (empty in echo mode when there is no message to echo).
         """
diff --git a/test/components/generators/chat/test_mock.py b/test/components/generators/chat/test_mock.py
index 4d0f4250924..70d1e042e5a 100644
--- a/test/components/generators/chat/test_mock.py
+++ b/test/components/generators/chat/test_mock.py
@@ -2,6 +2,8 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+import inspect
+
 import pytest
 
 from haystack import Pipeline
@@ -128,6 +130,25 @@ def test_streaming_callback_sync(self):
         # the returned reply matches the predefined response
         assert result["replies"][0].text == "hello there friend"
 
+    def test_run_signature_matches_openai_order(self):
+        # run()/run_async() must mirror OpenAIChatGenerator's parameter order so the mock is a positional drop-in.
+        expected = [
+            ("self", inspect.Parameter.POSITIONAL_OR_KEYWORD),
+            ("messages", inspect.Parameter.POSITIONAL_OR_KEYWORD),
+            ("streaming_callback", inspect.Parameter.POSITIONAL_OR_KEYWORD),
+            ("generation_kwargs", inspect.Parameter.POSITIONAL_OR_KEYWORD),
+            ("tools", inspect.Parameter.KEYWORD_ONLY),
+            ("tools_strict", inspect.Parameter.KEYWORD_ONLY),
+        ]
+        for method in ("run", "run_async"):
+            params = list(inspect.signature(getattr(MockChatGenerator, method)).parameters.values())
+            assert [(p.name, p.kind) for p in params] == expected
+
+        # passing the callback as the 2nd positional arg must be treated as streaming_callback, not generation_kwargs
+        chunks: list[StreamingChunk] = []
+        MockChatGenerator("hi").run([ChatMessage.from_user("x")], chunks.append)
+        assert chunks
+
     async def test_streaming_callback_async(self):
         chunks: list[StreamingChunk] = []
 

From 015a703533b29e09002181933a9c421c0d7b32b9 Mon Sep 17 00:00:00 2001
From: Julian Risch <julian.risch@deepset.ai>
Date: Mon, 22 Jun 2026 15:23:16 +0200
Subject: [PATCH 7/9] feat: validate that ChatMessage responses are
 assistant-role

A Chat Generator's replies are always assistant messages, so reject a
non-assistant ChatMessage supplied via `responses` (at construction) or
returned from `response_fn` (at run time) with a clear error, instead of
emitting a user/system/tool message as a reply.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 haystack/components/generators/chat/mock.py  |  7 +++++++
 test/components/generators/chat/test_mock.py | 15 +++++++++++----
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/haystack/components/generators/chat/mock.py b/haystack/components/generators/chat/mock.py
index e02bb067059..607fc6996b2 100644
--- a/haystack/components/generators/chat/mock.py
+++ b/haystack/components/generators/chat/mock.py
@@ -14,6 +14,7 @@
 from haystack.components.generators.utils import _normalize_messages
 from haystack.dataclasses import (
     ChatMessage,
+    ChatRole,
     ComponentInfo,
     FinishReason,
     StreamingCallbackT,
@@ -137,6 +138,10 @@ def _normalize_responses(
             if isinstance(item, str):
                 normalized.append(ChatMessage.from_assistant(item))
             elif isinstance(item, ChatMessage):
+                if item.role != ChatRole.ASSISTANT:
+                    raise ValueError(
+                        f"Each ChatMessage response must have the 'assistant' role, got '{item.role.value}'."
+                    )
                 normalized.append(item)
             else:
                 raise TypeError(f"Each response must be a string or ChatMessage, got {type(item)}.")
@@ -189,6 +194,8 @@ def _coerce_to_message(result: str | ChatMessage) -> ChatMessage:
         if isinstance(result, str):
             return ChatMessage.from_assistant(result)
         if isinstance(result, ChatMessage):
+            if result.role != ChatRole.ASSISTANT:
+                raise ValueError(f"'response_fn' must return an assistant ChatMessage, got '{result.role.value}'.")
             return result
         raise TypeError(f"'response_fn' must return a string or ChatMessage, got {type(result)}.")
 
diff --git a/test/components/generators/chat/test_mock.py b/test/components/generators/chat/test_mock.py
index 70d1e042e5a..c51548f4435 100644
--- a/test/components/generators/chat/test_mock.py
+++ b/test/components/generators/chat/test_mock.py
@@ -33,6 +33,7 @@ class TestMockChatGenerator:
             (([],), {}, ValueError, "must not be an empty list"),
             ((123,), {}, TypeError, "must be a string, ChatMessage, or a sequence"),
             (([123],), {}, TypeError, "Each response must be a string or ChatMessage"),
+            ((ChatMessage.from_user("hi"),), {}, ValueError, "must have the 'assistant' role"),
         ],
     )
     def test_init_rejects_invalid_config(self, args, kwargs, exception, match):
@@ -74,10 +75,16 @@ def test_response_fn(self, fn, expected):
         result = MockChatGenerator(response_fn=fn).run([ChatMessage.from_user("hello")])
         assert result["replies"][0].text == expected
 
-    def test_response_fn_invalid_return_raises(self):
-        gen = MockChatGenerator(response_fn=lambda messages: 123)
-        with pytest.raises(TypeError, match="must return a string or ChatMessage"):
-            gen.run([ChatMessage.from_user("hi")])
+    @pytest.mark.parametrize(
+        ("fn", "exception", "match"),
+        [
+            (lambda messages: 123, TypeError, "must return a string or ChatMessage"),
+            (lambda messages: ChatMessage.from_user("nope"), ValueError, "must return an assistant ChatMessage"),
+        ],
+    )
+    def test_response_fn_invalid_return_raises(self, fn, exception, match):
+        with pytest.raises(exception, match=match):
+            MockChatGenerator(response_fn=fn).run([ChatMessage.from_user("hi")])
 
     def test_string_input_is_normalized(self):
         gen = MockChatGenerator(response_fn=_exclaim)

From 97044edb371542a415b7b36222b05cff75d67b18 Mon Sep 17 00:00:00 2001
From: Julian Risch <julian.risch@deepset.ai>
Date: Mon, 22 Jun 2026 15:36:11 +0200
Subject: [PATCH 8/9] docs: document assistant-role requirement for
 MockChatGenerator responses

Note in the class/__init__ docstrings (and the ValueError list) that any
ChatMessage passed via `responses` or returned from `response_fn` must
have the assistant role, and reword `_coerce_to_message`'s docstring to
reflect that it validates rather than coerces the role.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 haystack/components/generators/chat/mock.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/haystack/components/generators/chat/mock.py b/haystack/components/generators/chat/mock.py
index 607fc6996b2..521370d56d4 100644
--- a/haystack/components/generators/chat/mock.py
+++ b/haystack/components/generators/chat/mock.py
@@ -44,6 +44,7 @@ class MockChatGenerator:
     The response is selected based on how the component is configured:
 
     - **Fixed response**: pass a single string or `ChatMessage`. The same reply is returned on every call.
+      Any `ChatMessage` passed as a response must have the `assistant` role.
     - **Cycling responses**: pass a list of strings and/or `ChatMessage` objects. Each call returns the next item,
       wrapping around to the start once the list is exhausted. This is useful to drive multi-step flows such as
       Agents, where the first call returns a tool call and a later call returns the final answer.
@@ -90,18 +91,19 @@ def __init__(
 
         :param responses: The predefined response(s) to return. Accepts a single string or `ChatMessage` (returned on
             every call), or a non-empty list of strings and/or `ChatMessage` objects that are returned in order,
-            cycling back to the start once exhausted. Strings are wrapped into assistant `ChatMessage` objects.
-            Mutually exclusive with `response_fn`. If neither is provided, the component echoes the last message with
-            text content.
+            cycling back to the start once exhausted. Strings are wrapped into assistant `ChatMessage` objects, and any
+            `ChatMessage` passed must have the `assistant` role. Mutually exclusive with `response_fn`. If neither is
+            provided, the component echoes the last message with text content.
         :param response_fn: An optional callable that receives the input messages and returns the reply as a string or
-            `ChatMessage`. Use this for input-dependent responses. Mutually exclusive with `responses`. To support
-            serialization, pass a named function (lambdas and nested functions cannot be serialized).
+            an assistant `ChatMessage`. Use this for input-dependent responses. Mutually exclusive with `responses`. To
+            support serialization, pass a named function (lambdas and nested functions cannot be serialized).
         :param model: The model name reported in the response metadata. Purely cosmetic; no model is loaded.
         :param meta: Additional metadata merged into the `meta` of every returned `ChatMessage`. A per-response
             `ChatMessage`'s own metadata takes precedence over this value.
         :param streaming_callback: An optional callback invoked with `StreamingChunk` objects reconstructed from the
             predefined response. It lets the mock exercise streaming code paths without a real model.
-        :raises ValueError: If both `responses` and `response_fn` are provided, or if `responses` is an empty list.
+        :raises ValueError: If both `responses` and `response_fn` are provided, if `responses` is an empty list, or if
+            a `ChatMessage` response does not have the `assistant` role.
         """
         if responses is not None and response_fn is not None:
             raise ValueError("Pass either 'responses' or 'response_fn', not both.")
@@ -190,7 +192,7 @@ def _echo_text(messages: list[ChatMessage]) -> str | None:
 
     @staticmethod
     def _coerce_to_message(result: str | ChatMessage) -> ChatMessage:
-        """Coerce the output of `response_fn` into an assistant `ChatMessage`."""
+        """Turn the output of `response_fn` into a `ChatMessage`, wrapping strings and requiring the assistant role."""
         if isinstance(result, str):
             return ChatMessage.from_assistant(result)
         if isinstance(result, ChatMessage):

From 17abca6996b8bbcf2d791a53c463b2919dfbbcd6 Mon Sep 17 00:00:00 2001
From: Julian Risch <julian.risch@deepset.ai>
Date: Mon, 22 Jun 2026 15:37:11 +0200
Subject: [PATCH 9/9] Update haystack/components/generators/chat/mock.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
---
 haystack/components/generators/chat/mock.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/haystack/components/generators/chat/mock.py b/haystack/components/generators/chat/mock.py
index 521370d56d4..f4fb438d872 100644
--- a/haystack/components/generators/chat/mock.py
+++ b/haystack/components/generators/chat/mock.py
@@ -313,8 +313,7 @@ def run(
         :returns: A dictionary with a single key `replies` containing the predefined reply as a list of one
             `ChatMessage` (empty in echo mode when there is no message to echo).
         """
-        if not self._is_warmed_up:
-            self.warm_up()
+        self.warm_up()
 
         messages = _normalize_messages(messages)
         streaming_callback = select_streaming_callback(