Skip to content

Commit 2813617

Browse files
committed
Raise default client timeout, add status_code property, fix integration tests
- Raise ScrapeDoClient default timeout from httpx's 5s to 60s to accommodate proxy round-trips (browser rendering, geo-routing, fingerprinting); update docstring accordingly - Add ScrapeDoResponse.status_code property as a raw passthrough to the underlying httpx response, distinct from the envelope-aware target_status_code and scrape_do_status_code accessors - Fix two integration tests that violated the 5000ms timeout floor in RequestParameters (2000 -> 5000); intent preserved against httpbingo.org/delay/10 - Update integration tests to use response.httpx_response.json() since the wrapper does not proxy json() - Add unit tests for the new status_code property and the new default timeout
1 parent b15e74b commit 2813617

5 files changed

Lines changed: 69 additions & 17 deletions

File tree

src/scrape_do/client.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,7 @@
1717
BaseTransport,
1818
RequestError
1919
)
20-
from httpx._config import (
21-
DEFAULT_TIMEOUT_CONFIG,
22-
DEFAULT_LIMITS
23-
)
20+
from httpx._config import DEFAULT_LIMITS
2421
from httpx._types import (
2522
TimeoutTypes,
2623
CertTypes,
@@ -164,8 +161,10 @@ class ScrapeDoClient:
164161
authentication.
165162
http1 (bool): Enable HTTP/1.1 support.
166163
http2 (bool): Enable HTTP/2 multiplexing for higher concurrency.
167-
timeout (TimeoutTypes): The default timeout configuration applied to
168-
all network requests.
164+
timeout (TimeoutTypes): The default timeout (in seconds) applied to
165+
all network phases. Defaults to 60s, raised from httpx's 5s
166+
default to accommodate Scrape.do proxy round-trips
167+
(browser rendering, geo-routing, fingerprinting).
169168
limits (Limits): Configuration for maximum connection pool sizes.
170169
event_hooks (Optional[Mapping[str, list[Callable[..., Any]]]]): Custom
171170
hooks injected into the request/response lifecycle for logging or
@@ -186,7 +185,7 @@ def __init__(
186185
cert: Optional[CertTypes] = None,
187186
http1: bool = True,
188187
http2: bool = False,
189-
timeout: TimeoutTypes = DEFAULT_TIMEOUT_CONFIG,
188+
timeout: TimeoutTypes = 60.0,
190189
limits: Limits = DEFAULT_LIMITS,
191190
event_hooks: Optional[Mapping[str, list[Callable[..., Any]]]] = None,
192191
transport: Optional[BaseTransport] = None,

src/scrape_do/models/response.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,19 @@ def httpx_response(self) -> httpx.Response:
418418
"""
419419
return self._raw_response
420420

421+
@property
422+
def status_code(self) -> int:
423+
"""Convenience accessor for the underlying HTTPX response status code.
424+
425+
Equivalent to `response.httpx_response.status_code`. Distinct from
426+
`target_status_code` and `scrape_do_status_code`, which interpret the
427+
Scrape.do response envelope.
428+
429+
Returns:
430+
The HTTP status code of the response received from `api.scrape.do`.
431+
"""
432+
return self.httpx_response.status_code
433+
421434
@property
422435
def request(self) -> PreparedScrapeDoRequest:
423436
"""Exposes the original, validated request configuration.

tests/integration/test_client.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def test_proxy_timeout_logic(
109109
"""
110110
response = no_retry_sync_client.get(
111111
f"{HTTPBIN_BASE}/delay/10",
112-
timeout=2000
112+
timeout=5000
113113
)
114114

115115
# True proxy error
@@ -164,7 +164,7 @@ def test_transparent_proxy_timeout_logic(
164164
"""
165165
response = no_retry_sync_client.get(
166166
f"{HTTPBIN_BASE}/delay/10",
167-
timeout=2000,
167+
timeout=5000,
168168
transparent_response=True
169169
)
170170

@@ -201,7 +201,7 @@ def test_live_post_json_payload(
201201
assert response.status_code == 200
202202

203203
# Parse the echo response
204-
echoed_data = response.json()
204+
echoed_data = response.httpx_response.json()
205205

206206
# httpbin puts JSON payloads inside the 'json' key of its response
207207
assert echoed_data.get("json") == test_payload
@@ -246,7 +246,7 @@ def test_live_cookie_injection(
246246

247247
assert response.status_code == 200
248248

249-
echoed_data = response.json()
249+
echoed_data = response.httpx_response.json()
250250
returned_cookies = echoed_data.get("cookies", {})
251251

252252
assert returned_cookies.get("session_token") == "secret_123"
@@ -274,7 +274,7 @@ def test_live_session_stickiness_and_ip_validation(
274274
session_id=888,
275275
super=True
276276
)
277-
ip1 = resp1.json().get("origin")
277+
ip1 = resp1.httpx_response.json().get("origin")
278278
rid1 = resp1.rid
279279

280280
logger.info(f"Session 888 (Req 1) -> RID: {rid1} | IP: {ip1}")
@@ -285,7 +285,7 @@ def test_live_session_stickiness_and_ip_validation(
285285
session_id=888,
286286
super=True
287287
)
288-
ip2 = resp2.json().get("origin")
288+
ip2 = resp2.httpx_response.json().get("origin")
289289
rid2 = resp2.rid
290290

291291
logger.info(f"Session 888 (Req 2) -> RID: {rid2} | IP: {ip2}")
@@ -312,7 +312,7 @@ def test_live_session_isolation(
312312
session_id=101,
313313
super=True
314314
)
315-
ip_a = resp_a.json().get("origin")
315+
ip_a = resp_a.httpx_response.json().get("origin")
316316
rid_a = resp_a.rid
317317

318318
logger.info(f"Session 101 -> RID: {rid_a} | IP: {ip_a}")
@@ -322,7 +322,7 @@ def test_live_session_isolation(
322322
session_id=909,
323323
super=True
324324
)
325-
ip_b = resp_b.json().get("origin")
325+
ip_b = resp_b.httpx_response.json().get("origin")
326326
rid_b = resp_b.rid
327327

328328
logger.info(f"Session 909 -> RID: {rid_b} | IP: {ip_b}")
@@ -355,7 +355,7 @@ def test_live_session_exhaustion_and_rotation(
355355
session_id=session_id,
356356
super=True
357357
)
358-
initial_ip = resp_baseline.json().get("origin")
358+
initial_ip = resp_baseline.httpx_response.json().get("origin")
359359
initial_rid = resp_baseline.rid
360360

361361
logger.info(
@@ -376,7 +376,7 @@ def test_live_session_exhaustion_and_rotation(
376376
session_id=session_id,
377377
super=True
378378
)
379-
current_ip = resp_next.json().get("origin")
379+
current_ip = resp_next.httpx_response.json().get("origin")
380380
current_rid = resp_next.rid
381381

382382
logger.info((

tests/unit/models/test_response.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,34 @@ def test_raise_for_status_success(make_request, make_response):
287287

288288
resp = ScrapeDoResponse(req, resp).raise_for_status()
289289

290+
@staticmethod
291+
def test_status_code_passthrough(example_url, mock_json_payload):
292+
"""
293+
Ensures `status_code` is a raw passthrough to the underlying
294+
httpx.Response status code, distinct from envelope-aware accessors
295+
like `target_status_code`.
296+
"""
297+
req = PreparedScrapeDoRequest(
298+
api_params=RequestParameters(
299+
url=example_url,
300+
render=True,
301+
return_json=True
302+
)
303+
)
304+
# JSON mode pulls the target's reported status (envelope) and the
305+
# proxy's gateway status (raw httpx) apart.
306+
headers = {"scrape.do-initial-status-code": "200"}
307+
http_resp = httpx.Response(
308+
202, json=mock_json_payload, headers=headers
309+
)
310+
response = ScrapeDoResponse(request=req, response=http_resp)
311+
312+
# Raw outer status from api.scrape.do.
313+
assert response.status_code == 202
314+
# Envelope-aware: target reported 200 via JSON statusCode.
315+
assert response.target_status_code == 200
316+
assert response.scrape_do_status_code == 202
317+
290318

291319
class TestScrapeDoResponseSerialization:
292320

tests/unit/test_client.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,18 @@ def test_httpx_client_default_values(self, mock_env_vars):
7676
assert not transport._pool._http2
7777
assert transport._pool._max_connections == 50
7878

79+
def test_default_timeout_is_60_seconds(self, mock_env_vars):
80+
"""
81+
Ensures the SDK's default timeout (60s across all phases) overrides
82+
httpx's 5s default to comfortably accommodate proxy round-trips.
83+
"""
84+
with ScrapeDoClient(api_token="test") as client:
85+
timeout = client._http_client.timeout
86+
assert timeout.connect == 60.0
87+
assert timeout.read == 60.0
88+
assert timeout.write == 60.0
89+
assert timeout.pool == 60.0
90+
7991
def test_explicit_close(self, mock_env_vars, mocker):
8092
"""
8193
Ensures calling client.close() delegates to the httpx.Client

0 commit comments

Comments
 (0)