Skip to content

Commit e6d0ea1

Browse files
committed
Read chunked HTTP responses.
Fix #1550.
1 parent 2abf77f commit e6d0ea1

File tree

6 files changed

+167
-79
lines changed

6 files changed

+167
-79
lines changed

src/websockets/client.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,7 @@ def parse(self) -> Generator[None]:
333333
self.logger.debug("< HTTP/1.1 %d %s", code, phrase)
334334
for key, value in response.headers.raw_items():
335335
self.logger.debug("< %s: %s", key, value)
336-
if response.body is not None:
336+
if response.body:
337337
self.logger.debug("< [body] (%d bytes)", len(response.body))
338338

339339
try:

src/websockets/http11.py

+89-56
Original file line numberDiff line numberDiff line change
@@ -185,14 +185,14 @@ class Response:
185185
status_code: Response code.
186186
reason_phrase: Response reason.
187187
headers: Response headers.
188-
body: Response body, if any.
188+
body: Response body.
189189
190190
"""
191191

192192
status_code: int
193193
reason_phrase: str
194194
headers: Headers
195-
body: bytes | None = None
195+
body: bytes = b""
196196

197197
_exception: Exception | None = None
198198

@@ -266,36 +266,9 @@ def parse(
266266

267267
headers = yield from parse_headers(read_line)
268268

269-
# https://datatracker.ietf.org/doc/html/rfc7230#section-3.3.3
270-
271-
if "Transfer-Encoding" in headers:
272-
raise NotImplementedError("transfer codings aren't supported")
273-
274-
# Since websockets only does GET requests (no HEAD, no CONNECT), all
275-
# responses except 1xx, 204, and 304 include a message body.
276-
if 100 <= status_code < 200 or status_code == 204 or status_code == 304:
277-
body = None
278-
else:
279-
content_length: int | None
280-
try:
281-
# MultipleValuesError is sufficiently unlikely that we don't
282-
# attempt to handle it. Instead we document that its parent
283-
# class, LookupError, may be raised.
284-
raw_content_length = headers["Content-Length"]
285-
except KeyError:
286-
content_length = None
287-
else:
288-
content_length = int(raw_content_length)
289-
290-
if content_length is None:
291-
try:
292-
body = yield from read_to_eof(MAX_BODY_SIZE)
293-
except RuntimeError:
294-
raise SecurityError(f"body too large: over {MAX_BODY_SIZE} bytes")
295-
elif content_length > MAX_BODY_SIZE:
296-
raise SecurityError(f"body too large: {content_length} bytes")
297-
else:
298-
body = yield from read_exact(content_length)
269+
body = yield from read_body(
270+
status_code, headers, read_line, read_exact, read_to_eof
271+
)
299272

300273
return cls(status_code, reason, headers, body)
301274

@@ -308,11 +281,37 @@ def serialize(self) -> bytes:
308281
# we can keep this simple.
309282
response = f"HTTP/1.1 {self.status_code} {self.reason_phrase}\r\n".encode()
310283
response += self.headers.serialize()
311-
if self.body is not None:
312-
response += self.body
284+
response += self.body
313285
return response
314286

315287

288+
def parse_line(
289+
read_line: Callable[[int], Generator[None, None, bytes]],
290+
) -> Generator[None, None, bytes]:
291+
"""
292+
Parse a single line.
293+
294+
CRLF is stripped from the return value.
295+
296+
Args:
297+
read_line: Generator-based coroutine that reads a LF-terminated line
298+
or raises an exception if there isn't enough data.
299+
300+
Raises:
301+
EOFError: If the connection is closed without a CRLF.
302+
SecurityError: If the response exceeds a security limit.
303+
304+
"""
305+
try:
306+
line = yield from read_line(MAX_LINE_LENGTH)
307+
except RuntimeError:
308+
raise SecurityError("line too long")
309+
# Not mandatory but safe - https://datatracker.ietf.org/doc/html/rfc7230#section-3.5
310+
if not line.endswith(b"\r\n"):
311+
raise EOFError("line without CRLF")
312+
return line[:-2]
313+
314+
316315
def parse_headers(
317316
read_line: Callable[[int], Generator[None, None, bytes]],
318317
) -> Generator[None, None, Headers]:
@@ -364,28 +363,62 @@ def parse_headers(
364363
return headers
365364

366365

367-
def parse_line(
366+
def read_body(
367+
status_code: int,
368+
headers: Headers,
368369
read_line: Callable[[int], Generator[None, None, bytes]],
370+
read_exact: Callable[[int], Generator[None, None, bytes]],
371+
read_to_eof: Callable[[int], Generator[None, None, bytes]],
369372
) -> Generator[None, None, bytes]:
370-
"""
371-
Parse a single line.
372-
373-
CRLF is stripped from the return value.
374-
375-
Args:
376-
read_line: Generator-based coroutine that reads a LF-terminated line
377-
or raises an exception if there isn't enough data.
378-
379-
Raises:
380-
EOFError: If the connection is closed without a CRLF.
381-
SecurityError: If the response exceeds a security limit.
373+
# https://datatracker.ietf.org/doc/html/rfc7230#section-3.3.3
374+
375+
# Since websockets only does GET requests (no HEAD, no CONNECT), all
376+
# responses except 1xx, 204, and 304 include a message body.
377+
if 100 <= status_code < 200 or status_code == 204 or status_code == 304:
378+
return b""
379+
380+
# MultipleValuesError is sufficiently unlikely that we don't attempt to
381+
# handle it when accessing headers. Instead we document that its parent
382+
# class, LookupError, may be raised.
383+
# Conversions from str to int are protected by sys.set_int_max_str_digits..
384+
385+
elif (coding := headers.get("Transfer-Encoding")) is not None:
386+
if coding != "chunked":
387+
raise NotImplementedError(f"transfer coding {coding} isn't supported")
388+
389+
body = b""
390+
while True:
391+
chunk_size_line = yield from parse_line(read_line)
392+
raw_chunk_size = chunk_size_line.split(b";", 1)[0]
393+
# Set a lower limit than default_max_str_digits; 1 EB is plenty.
394+
if len(raw_chunk_size) > 15:
395+
str_chunk_size = raw_chunk_size.decode(errors="backslashreplace")
396+
raise SecurityError(f"chunk too large: 0x{str_chunk_size} bytes")
397+
chunk_size = int(raw_chunk_size, 16)
398+
if chunk_size == 0:
399+
break
400+
if len(body) + chunk_size > MAX_BODY_SIZE:
401+
raise SecurityError(
402+
f"chunk too large: {chunk_size} bytes after {len(body)} bytes"
403+
)
404+
body += yield from read_exact(chunk_size)
405+
if (yield from read_exact(2)) != b"\r\n":
406+
raise ValueError("chunk without CRLF")
407+
# Read the trailer.
408+
yield from parse_headers(read_line)
409+
return body
410+
411+
elif (raw_content_length := headers.get("Content-Length")) is not None:
412+
# Set a lower limit than default_max_str_digits; 1 EiB is plenty.
413+
if len(raw_content_length) > 18:
414+
raise SecurityError(f"body too large: {raw_content_length} bytes")
415+
content_length = int(raw_content_length)
416+
if content_length > MAX_BODY_SIZE:
417+
raise SecurityError(f"body too large: {content_length} bytes")
418+
return (yield from read_exact(content_length))
382419

383-
"""
384-
try:
385-
line = yield from read_line(MAX_LINE_LENGTH)
386-
except RuntimeError:
387-
raise SecurityError("line too long")
388-
# Not mandatory but safe - https://datatracker.ietf.org/doc/html/rfc7230#section-3.5
389-
if not line.endswith(b"\r\n"):
390-
raise EOFError("line without CRLF")
391-
return line[:-2]
420+
else:
421+
try:
422+
return (yield from read_to_eof(MAX_BODY_SIZE))
423+
except RuntimeError:
424+
raise SecurityError(f"body too large: over {MAX_BODY_SIZE} bytes")

src/websockets/server.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -525,7 +525,7 @@ def send_response(self, response: Response) -> None:
525525
self.logger.debug("> HTTP/1.1 %d %s", code, phrase)
526526
for key, value in response.headers.raw_items():
527527
self.logger.debug("> %s: %s", key, value)
528-
if response.body is not None:
528+
if response.body:
529529
self.logger.debug("> [body] (%d bytes)", len(response.body))
530530

531531
self.writes.append(response.serialize())

tests/test_client.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ def test_receive_successful_response(self, _generate_key):
204204
}
205205
),
206206
)
207-
self.assertIsNone(response.body)
207+
self.assertEqual(response.body, b"")
208208
self.assertIsNone(client.handshake_exc)
209209

210210
def test_receive_failed_response(self, _generate_key):

tests/test_http11.py

+74-19
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def test_parse_body(self):
8787
)
8888

8989
def test_parse_body_with_transfer_encoding(self):
90-
self.reader.feed_data(b"GET / HTTP/1.1\r\nTransfer-Encoding: chunked\r\n\r\n")
90+
self.reader.feed_data(b"GET / HTTP/1.1\r\nTransfer-Encoding: compress\r\n\r\n")
9191
with self.assertRaises(NotImplementedError) as raised:
9292
next(self.parse())
9393
self.assertEqual(
@@ -151,7 +151,7 @@ def test_parse(self):
151151
self.assertEqual(response.status_code, 101)
152152
self.assertEqual(response.reason_phrase, "Switching Protocols")
153153
self.assertEqual(response.headers["Upgrade"], "websocket")
154-
self.assertIsNone(response.body)
154+
self.assertEqual(response.body, b"")
155155

156156
def test_parse_empty(self):
157157
self.reader.feed_eof()
@@ -215,22 +215,31 @@ def test_parse_invalid_header(self):
215215
"invalid HTTP header line: Oops",
216216
)
217217

218-
def test_parse_body_with_content_length(self):
219-
self.reader.feed_data(
220-
b"HTTP/1.1 200 OK\r\nContent-Length: 13\r\n\r\nHello world!\n"
221-
)
222-
response = self.assertGeneratorReturns(self.parse())
223-
self.assertEqual(response.body, b"Hello world!\n")
224-
225-
def test_parse_body_without_content_length(self):
218+
def test_parse_body(self):
226219
self.reader.feed_data(b"HTTP/1.1 200 OK\r\n\r\nHello world!\n")
227220
gen = self.parse()
228221
self.assertGeneratorRunning(gen)
229222
self.reader.feed_eof()
230223
response = self.assertGeneratorReturns(gen)
231224
self.assertEqual(response.body, b"Hello world!\n")
232225

233-
def test_parse_body_with_content_length_too_long(self):
226+
def test_parse_body_too_large(self):
227+
self.reader.feed_data(b"HTTP/1.1 200 OK\r\n\r\n" + b"a" * 1048577)
228+
with self.assertRaises(SecurityError) as raised:
229+
next(self.parse())
230+
self.assertEqual(
231+
str(raised.exception),
232+
"body too large: over 1048576 bytes",
233+
)
234+
235+
def test_parse_body_with_content_length(self):
236+
self.reader.feed_data(
237+
b"HTTP/1.1 200 OK\r\nContent-Length: 13\r\n\r\nHello world!\n"
238+
)
239+
response = self.assertGeneratorReturns(self.parse())
240+
self.assertEqual(response.body, b"Hello world!\n")
241+
242+
def test_parse_body_with_content_length_and_body_too_large(self):
234243
self.reader.feed_data(b"HTTP/1.1 200 OK\r\nContent-Length: 1048577\r\n\r\n")
235244
with self.assertRaises(SecurityError) as raised:
236245
next(self.parse())
@@ -239,33 +248,79 @@ def test_parse_body_with_content_length_too_long(self):
239248
"body too large: 1048577 bytes",
240249
)
241250

242-
def test_parse_body_without_content_length_too_long(self):
243-
self.reader.feed_data(b"HTTP/1.1 200 OK\r\n\r\n" + b"a" * 1048577)
251+
def test_parse_body_with_content_length_and_body_way_too_large(self):
252+
self.reader.feed_data(
253+
b"HTTP/1.1 200 OK\r\nContent-Length: 1234567890123456789\r\n\r\n"
254+
)
244255
with self.assertRaises(SecurityError) as raised:
245256
next(self.parse())
246257
self.assertEqual(
247258
str(raised.exception),
248-
"body too large: over 1048576 bytes",
259+
"body too large: 1234567890123456789 bytes",
249260
)
250261

251-
def test_parse_body_with_transfer_encoding(self):
252-
self.reader.feed_data(b"HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n")
262+
def test_parse_body_with_chunked_transfer_encoding(self):
263+
self.reader.feed_data(
264+
b"HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n"
265+
b"6\r\nHello \r\n7\r\nworld!\n\r\n0\r\n\r\n"
266+
)
267+
response = self.assertGeneratorReturns(self.parse())
268+
self.assertEqual(response.body, b"Hello world!\n")
269+
270+
def test_parse_body_with_chunked_transfer_encoding_and_chunk_without_crlf(self):
271+
self.reader.feed_data(
272+
b"HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n"
273+
b"6\r\nHello 7\r\nworld!\n0\r\n"
274+
)
275+
with self.assertRaises(ValueError) as raised:
276+
next(self.parse())
277+
self.assertEqual(
278+
str(raised.exception),
279+
"chunk without CRLF",
280+
)
281+
282+
def test_parse_body_with_chunked_transfer_encoding_and_chunk_too_large(self):
283+
self.reader.feed_data(
284+
b"HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n"
285+
b"100000\r\n" + b"a" * 1048576 + b"\r\n1\r\na\r\n0\r\n\r\n"
286+
)
287+
with self.assertRaises(SecurityError) as raised:
288+
next(self.parse())
289+
self.assertEqual(
290+
str(raised.exception),
291+
"chunk too large: 1 bytes after 1048576 bytes",
292+
)
293+
294+
def test_parse_body_with_chunked_transfer_encoding_and_chunk_way_too_large(self):
295+
self.reader.feed_data(
296+
b"HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n"
297+
b"1234567890ABCDEF\r\n\r\n"
298+
)
299+
with self.assertRaises(SecurityError) as raised:
300+
next(self.parse())
301+
self.assertEqual(
302+
str(raised.exception),
303+
"chunk too large: 0x1234567890ABCDEF bytes",
304+
)
305+
306+
def test_parse_body_with_unsupported_transfer_encoding(self):
307+
self.reader.feed_data(b"HTTP/1.1 200 OK\r\nTransfer-Encoding: compress\r\n\r\n")
253308
with self.assertRaises(NotImplementedError) as raised:
254309
next(self.parse())
255310
self.assertEqual(
256311
str(raised.exception),
257-
"transfer codings aren't supported",
312+
"transfer coding compress isn't supported",
258313
)
259314

260315
def test_parse_body_no_content(self):
261316
self.reader.feed_data(b"HTTP/1.1 204 No Content\r\n\r\n")
262317
response = self.assertGeneratorReturns(self.parse())
263-
self.assertIsNone(response.body)
318+
self.assertEqual(response.body, b"")
264319

265320
def test_parse_body_not_modified(self):
266321
self.reader.feed_data(b"HTTP/1.1 304 Not Modified\r\n\r\n")
267322
response = self.assertGeneratorReturns(self.parse())
268-
self.assertIsNone(response.body)
323+
self.assertEqual(response.body, b"")
269324

270325
def test_serialize(self):
271326
# Example from the protocol overview in RFC 6455

tests/test_server.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ def test_accept_response(self, _formatdate):
281281
}
282282
),
283283
)
284-
self.assertIsNone(response.body)
284+
self.assertEqual(response.body, b"")
285285

286286
@patch("email.utils.formatdate", return_value=DATE)
287287
def test_reject_response(self, _formatdate):

0 commit comments

Comments
 (0)