scrapy-plugins · Gallaecio · Jun 14, 2026 · Jun 11, 2026 · Jun 11, 2026 · Jun 11, 2026
diff --git a/docs/reference/inputs.rst b/docs/reference/inputs.rst
@@ -50,9 +50,15 @@ Built-in inputs
 .. autoclass:: scrapy_zyte_api.Actions
     :members:
 
+.. autoclass:: scrapy_zyte_api.CapturedResponse
+    :members:
+
 .. autoclass:: scrapy_zyte_api.Geolocation
     :members:
 
+.. autoclass:: scrapy_zyte_api.NetworkCapture
+    :members:
+
 .. autoclass:: scrapy_zyte_api.Screenshot
     :members:
 
@@ -66,3 +72,5 @@ Built-in input annotations
 .. autofunction:: scrapy_zyte_api.actions
 
 .. autofunction:: scrapy_zyte_api.custom_attrs
+
+.. autofunction:: scrapy_zyte_api.network_capture
diff --git a/docs/usage/scrapy-poet.rst b/docs/usage/scrapy-poet.rst
@@ -129,6 +129,57 @@ resulting page object:
                 return Product(is_valid=False)
         return None
 
+.. _network-capture:
+
+Network capture
+---------------
+
+You can capture network responses made during browser rendering by adding a
+:class:`scrapy_zyte_api.NetworkCapture` dependency and annotating it with
+filters passed to the :func:`scrapy_zyte_api.network_capture` function:
+
+.. code-block:: python
+
+    from typing import Annotated
+
+    from scrapy_zyte_api import NetworkCapture, network_capture
+
+
+    @attrs.define
+    class MyPageObject(BasePage):
+        response: BrowserResponse
+        network: Annotated[
+            NetworkCapture,
+            network_capture(
+                [
+                    {
+                        "filterType": "url",
+                        "value": "/api/",
+                        "matchType": "contains",
+                        "httpResponseBody": True,
+                    },
+                    {"filterType": "resourceType", "value": "xhr"},
+                ]
+            ),
+        ]
+
+Each filter is a :class:`~scrapy_zyte_api.NetworkCaptureFilter` dict. A
+response is captured if it matches any filter. Set ``httpResponseBody`` to
+``True`` on a filter to include the decoded response body for responses matched
+by that filter.
+
+You can access the captured responses as
+:class:`~scrapy_zyte_api.CapturedResponse` objects in the
+:attr:`.NetworkCapture.results` attribute:
+
+.. code-block:: python
+
+    def parse_network_capture(self):
+        for captured in self.network.results:
+            if "/api/products" in captured.url:
+                data = json.loads(captured.body)
+                ...
+
 .. _custom-attrs:
 
 Custom attribute extraction

diff --git a/scrapy_zyte_api/__init__.py b/scrapy_zyte_api/__init__.py
@@ -7,13 +7,19 @@
 
 # Register web-poet serializers
 from . import _serialization  # noqa: F401
-from ._annotations import ExtractFrom, actions, custom_attrs
+from ._annotations import ExtractFrom, actions, custom_attrs, network_capture
 from ._middlewares import (
     ScrapyZyteAPIDownloaderMiddleware,
     ScrapyZyteAPIRefererSpiderMiddleware,
     ScrapyZyteAPISpiderMiddleware,
 )
-from ._page_inputs import Actions, Geolocation, Screenshot
+from ._page_inputs import (
+    Actions,
+    CapturedResponse,
+    Geolocation,
+    NetworkCapture,
+    Screenshot,
+)
 from ._request_fingerprinter import ScrapyZyteAPIRequestFingerprinter
 from ._session import (
     SESSION_AGGRESSIVE_RETRY_POLICY as _SESSION_AGGRESSIVE_RETRY_POLICY,
@@ -54,9 +60,11 @@
     "SESSION_DEFAULT_RETRY_POLICY",
     "Actions",
     "Addon",
+    "CapturedResponse",
     "ExtractFrom",
     "Geolocation",
     "LocationSessionConfig",
+    "NetworkCapture",
     "ScrapyZyteAPIDownloadHandler",
     "ScrapyZyteAPIDownloaderMiddleware",
     "ScrapyZyteAPIRefererSpiderMiddleware",
@@ -69,6 +77,7 @@
     "custom_attrs",
     "get_request_session_id",
     "is_session_init_request",
+    "network_capture",
     "session_config",
     "session_config_registry",
 ]
diff --git a/scrapy_zyte_api/_annotations.py b/scrapy_zyte_api/_annotations.py
@@ -85,6 +85,23 @@ def actions(value: Iterable[Action]) -> tuple[Any, ...]:
     return tuple(make_hashable(action) for action in value)
 
 
+class NetworkCaptureFilter(TypedDict, total=False):
+    """A filter for :func:`~scrapy_zyte_api.network_capture`.
+
+    See :ref:`network-capture`.
+    """
+
+    filterType: str
+    value: str
+    matchType: str
+    httpResponseBody: bool
+
+
+def network_capture(filters: Iterable[NetworkCaptureFilter]) -> tuple[Any, ...]:
+    """Convert an iterable of :class:`~scrapy_zyte_api.NetworkCaptureFilter` dicts into a hashable value."""
+    return tuple(make_hashable(f) for f in filters)
+
+
 def custom_attrs(
     input: dict[str, Any],  # noqa: A002
     options: dict[str, Any] | None = None,

diff --git a/scrapy_zyte_api/_page_inputs.py b/scrapy_zyte_api/_page_inputs.py
@@ -1,4 +1,5 @@
 from base64 import b64decode
+from typing import Any
 
 import attrs
 
@@ -36,3 +37,46 @@ class Screenshot:
     @classmethod
     def from_base64(cls, body):
         return cls(body=b64decode(body.encode()))
+
+
+@attrs.define
+class CapturedResponse:
+    """A network response captured during browser page rendering.
+
+    Part of :class:`NetworkCapture`.
+    """
+
+    #: Response URL.
+    url: str
+
+    #: HTTP status code.
+    status: int
+
+    #: Response headers.
+    headers: dict[str, str]
+
+    #: Response body. ``None`` if ``httpResponseBody`` was not set to ``True``
+    #: on the matching filter.
+    body: bytes | None
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> "CapturedResponse":
+        body_b64: str | None = data.get("httpResponseBody")
+        return cls(
+            url=data["url"],
+            status=data["statusCode"],
+            headers=data.get("headers", {}),
+            body=b64decode(body_b64) if body_b64 is not None else None,
+        )
+
+
+@attrs.define
+class NetworkCapture:
+    """A page input that specifies network capture filters and contains captured responses.
+
+    The filters must be :ref:`specified with an annotation
+    <network-capture>` using :func:`~scrapy_zyte_api.network_capture`.
+    """
+
+    #: Captured responses.
+    results: list[CapturedResponse]
diff --git a/scrapy_zyte_api/providers.py b/scrapy_zyte_api/providers.py
@@ -45,6 +45,7 @@
 
 from scrapy_zyte_api import Actions, ExtractFrom, Geolocation, Screenshot
 from scrapy_zyte_api._annotations import _ActionResult, _from_hashable
+from scrapy_zyte_api._page_inputs import CapturedResponse, NetworkCapture
 from scrapy_zyte_api.utils import _ENGINE_HAS_DOWNLOAD_ASYNC, maybe_deferred_to_future
 
 if TYPE_CHECKING:
@@ -101,6 +102,7 @@ class ZyteApiProvider(PageObjectInputProvider):
         Geolocation,
         JobPosting,
         JobPostingNavigation,
+        NetworkCapture,
         Product,
         ProductList,
         ProductNavigation,
@@ -202,6 +204,16 @@ async def __call__(
                 for action in cls.__metadata__[0]:  # type: ignore[attr-defined]
                     zyte_api_meta["actions"].append(_from_hashable(action))
                 continue
+            if cls_stripped is NetworkCapture:
+                if not is_typing_annotated(cls):
+                    raise ValueError(
+                        "NetworkCapture dependencies must be annotated, "
+                        "e.g. Annotated[NetworkCapture, network_capture([...list of filters...])]."
+                    )
+                zyte_api_meta["networkCapture"] = []
+                for f in cls.__metadata__[0]:  # type: ignore[attr-defined]
+                    zyte_api_meta["networkCapture"].append(_from_hashable(f))
+                continue
             if cls_stripped in {CustomAttributes, CustomAttributesValues}:
                 custom_attrs_input, custom_attrs_options = cls.__metadata__[0]  # type: ignore[attr-defined]
                 zyte_api_meta["customAttributes"] = _from_hashable(custom_attrs_input)
@@ -358,6 +370,14 @@ async def __call__(
                 result = AnnotatedInstance(Actions(actions_result), cls.__metadata__)  # type: ignore[attr-defined]
                 results.append(result)
                 continue
+            if cls_stripped is NetworkCapture and is_typing_annotated(cls):
+                captured = [
+                    CapturedResponse.from_dict(item)
+                    for item in api_response.raw_api_response.get("networkCapture", [])
+                ]
+                result = AnnotatedInstance(NetworkCapture(captured), cls.__metadata__)  # type: ignore[attr-defined]
+                results.append(result)
+                continue
             if cls_stripped is CustomAttributes and is_typing_annotated(cls):
                 custom_attrs_result = api_response.raw_api_response["customAttributes"]
                 result = AnnotatedInstance(

diff --git a/tests/mockserver.py b/tests/mockserver.py
@@ -232,6 +232,24 @@ def render_POST(self, request):
                 results.append(result)
             response_data["actions"] = results  # type: ignore[assignment]
 
+        network_capture_filters = request_data.get("networkCapture")
+        if network_capture_filters:
+            captured = []
+            for f in network_capture_filters:
+                entry: dict = {
+                    "url": f"https://api.example.com/data?filter={f.get('value', '')}",
+                    "statusCode": 200,
+                    "headers": {"content-type": "application/json"},
+                    "filter": f,
+                    "interceptionStatus": "success",
+                }
+                if f.get("httpResponseBody"):
+                    entry["httpResponseBody"] = b64encode(
+                        b'{"captured": true}'
+                    ).decode()
+                captured.append(entry)
+            response_data["networkCapture"] = captured  # type: ignore[assignment]
+
         if request_data.get("product") is True:
             response_data["product"] = {
                 "url": response_data["url"],

diff --git a/tests/test_providers.py b/tests/test_providers.py
@@ -40,11 +40,14 @@
 
 from scrapy_zyte_api import (
     Actions,
+    CapturedResponse,
     ExtractFrom,
     Geolocation,
+    NetworkCapture,
     Screenshot,
     actions,
     custom_attrs,
+    network_capture,
 )
 from scrapy_zyte_api._params import _EXTRACT_KEYS
 from scrapy_zyte_api.handler import ScrapyZyteAPIDownloadHandler
@@ -1097,6 +1100,73 @@ def parse_(self, response: DummyResponse, page: ActionProductPage):  # type: ign
     )
 
 
+@deferred_f_from_coro_f
+async def test_provider_network_capture(mockserver):
+    @attrs.define
+    class NetworkCapturePage(BasePage):
+        product: Product
+        captured: Annotated[
+            NetworkCapture,
+            network_capture(
+                [
+                    {
+                        "filterType": "url",
+                        "value": "/api/",
+                        "matchType": "contains",
+                        "httpResponseBody": True,
+                    },
+                    {"filterType": "resourceType", "value": "xhr"},
+                ]
+            ),
+        ]
+
+    class NetworkCaptureSpider(ZyteAPISpider):
+        def parse_(self, response: DummyResponse, page: NetworkCapturePage):  # type: ignore[override]
+            yield {"captured": page.captured}
+
+    settings = deepcopy(SETTINGS)
+    settings["ZYTE_API_URL"] = mockserver.urljoin("/")
+    settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0}
+
+    item, *_ = await _crawl_single_item(NetworkCaptureSpider, HtmlResource, settings)
+    nc: NetworkCapture = item["captured"]
+    assert isinstance(nc, NetworkCapture)
+    assert len(nc.results) == 2
+
+    first = nc.results[0]
+    assert isinstance(first, CapturedResponse)
+    assert first.url == "https://api.example.com/data?filter=/api/"
+    assert first.status == 200
+    assert first.headers == {"content-type": "application/json"}
+    assert first.body == b'{"captured": true}'
+
+    second = nc.results[1]
+    assert isinstance(second, CapturedResponse)
+    assert second.url == "https://api.example.com/data?filter=xhr"
+    assert second.status == 200
+    assert second.body is None
+
+
+@deferred_f_from_coro_f
+async def test_provider_network_capture_unannotated(mockserver, caplog):
+    @attrs.define
+    class NetworkCapturePage(BasePage):
+        product: Product
+        captured: NetworkCapture
+
+    class NetworkCaptureSpider(ZyteAPISpider):
+        def parse_(self, response: DummyResponse, page: NetworkCapturePage):  # type: ignore[override]
+            pass
+
+    settings = deepcopy(SETTINGS)
+    settings["ZYTE_API_URL"] = mockserver.urljoin("/")
+    settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0}
+
+    item, *_ = await _crawl_single_item(NetworkCaptureSpider, HtmlResource, settings)
+    assert item is None
+    assert "NetworkCapture dependencies must be annotated" in caplog.text
+
+
 def test_item_keywords():
     assert set(_EXTRACT_KEYS) == set(_ITEM_KEYWORDS.values())