Skip to content

Commit c4a8ab8

Browse files
committed
Spike HTTP(S) proxy implementation for sync client.
1 parent c7de9b1 commit c4a8ab8

File tree

7 files changed

+166
-8
lines changed

7 files changed

+166
-8
lines changed

docs/reference/features.rst

+1-2
Original file line numberDiff line numberDiff line change
@@ -166,12 +166,11 @@ Client
166166
| Perform HTTP Digest Authentication |||||
167167
| (`#784`_) | | | | |
168168
+------------------------------------+--------+--------+--------+--------+
169-
| Connect via HTTP proxy (`#364`_) | | |||
169+
| Connect via HTTP proxy | | |||
170170
+------------------------------------+--------+--------+--------+--------+
171171
| Connect via SOCKS5 proxy |||||
172172
+------------------------------------+--------+--------+--------+--------+
173173

174-
.. _#364: https://github.com/python-websockets/websockets/issues/364
175174
.. _#784: https://github.com/python-websockets/websockets/issues/784
176175

177176
Known limitations

docs/topics/proxies.rst

+5
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,8 @@ SOCKS proxy is configured in the operating system, python-socks uses SOCKS5h.
6464

6565
python-socks supports username/password authentication for SOCKS5 (:rfc:`1929`)
6666
but does not support other authentication methods such as GSSAPI (:rfc:`1961`).
67+
68+
HTTP proxies
69+
------------
70+
71+
TODO

src/websockets/asyncio/client.py

+20-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import logging
55
import os
66
import socket
7+
import ssl
78
import traceback
89
import urllib.parse
910
from collections.abc import AsyncIterator, Generator, Sequence
@@ -213,6 +214,9 @@ class connect:
213214
to :obj:`None` to disable the proxy or to the address of a proxy
214215
to override the system configuration. See the :doc:`proxy docs
215216
<../../topics/proxies>` for details.
217+
proxy_ssl: Configuration for enabling TLS on the proxy connection.
218+
proxy_server_hostname: Host name for the TLS handshake with the proxy.
219+
``proxy_server_hostname`` overrides the host name from ``proxy``.
216220
process_exception: When reconnecting automatically, tell whether an
217221
error is transient or fatal. The default behavior is defined by
218222
:func:`process_exception`. Refer to its documentation for details.
@@ -255,7 +259,7 @@ class connect:
255259
the TLS handshake.
256260
257261
* You can set ``host`` and ``port`` to connect to a different host and port
258-
from those found in ``uri``. This only changes the destination of the TCP
262+
from those found in ``uri``. This only changes the ws_uri of the TCP
259263
connection. The host name from ``uri`` is still used in the TLS handshake
260264
for secure connections and in the ``Host`` header.
261265
@@ -285,6 +289,8 @@ def __init__(
285289
additional_headers: HeadersLike | None = None,
286290
user_agent_header: str | None = USER_AGENT,
287291
proxy: str | Literal[True] | None = True,
292+
proxy_ssl: ssl.SSLContext | None = None,
293+
proxy_server_hostname: str | None = None,
288294
process_exception: Callable[[Exception], Exception | None] = process_exception,
289295
# Timeouts
290296
open_timeout: float | None = 10,
@@ -634,6 +640,17 @@ async def connect_socks_proxy(
634640
raise ImportError("python-socks is required to use a SOCKS proxy")
635641

636642

643+
async def connect_http_proxy(
644+
proxy: Proxy,
645+
ws_uri: WebSocketURI,
646+
*,
647+
proxy_ssl: ssl.SSLContext | None = None,
648+
proxy_server_hostname: str | None = None,
649+
**kwargs: Any,
650+
) -> socket.socket:
651+
raise NotImplementedError("HTTP proxy support is not implemented")
652+
653+
637654
async def connect_proxy(
638655
proxy: Proxy,
639656
ws_uri: WebSocketURI,
@@ -643,5 +660,7 @@ async def connect_proxy(
643660
# parse_proxy() validates proxy.scheme.
644661
if proxy.scheme[:5] == "socks":
645662
return await connect_socks_proxy(proxy, ws_uri, **kwargs)
663+
elif proxy.scheme[:4] == "http":
664+
return await connect_http_proxy(proxy, ws_uri, **kwargs)
646665
else:
647666
raise AssertionError("unsupported proxy")

src/websockets/http11.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,7 @@ def parse(
210210
read_line: Callable[[int], Generator[None, None, bytes]],
211211
read_exact: Callable[[int], Generator[None, None, bytes]],
212212
read_to_eof: Callable[[int], Generator[None, None, bytes]],
213+
include_body: bool = True,
213214
) -> Generator[None, None, Response]:
214215
"""
215216
Parse a WebSocket handshake response.
@@ -265,9 +266,12 @@ def parse(
265266

266267
headers = yield from parse_headers(read_line)
267268

268-
body = yield from read_body(
269-
status_code, headers, read_line, read_exact, read_to_eof
270-
)
269+
if include_body:
270+
body = yield from read_body(
271+
status_code, headers, read_line, read_exact, read_to_eof
272+
)
273+
else:
274+
body = b""
271275

272276
return cls(status_code, reason, headers, body)
273277

src/websockets/sync/client.py

+91-2
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,14 @@
88
from typing import Any, Literal
99

1010
from ..client import ClientProtocol
11-
from ..datastructures import HeadersLike
11+
from ..datastructures import Headers, HeadersLike
12+
from ..exceptions import InvalidProxyMessage, InvalidProxyStatus
1213
from ..extensions.base import ClientExtensionFactory
1314
from ..extensions.permessage_deflate import enable_client_permessage_deflate
14-
from ..headers import validate_subprotocols
15+
from ..headers import build_authorization_basic, build_host, validate_subprotocols
1516
from ..http11 import USER_AGENT, Response
1617
from ..protocol import CONNECTING, Event
18+
from ..streams import StreamReader
1719
from ..typing import LoggerLike, Origin, Subprotocol
1820
from ..uri import Proxy, WebSocketURI, get_proxy, parse_proxy, parse_uri
1921
from .connection import Connection
@@ -140,6 +142,8 @@ def connect(
140142
additional_headers: HeadersLike | None = None,
141143
user_agent_header: str | None = USER_AGENT,
142144
proxy: str | Literal[True] | None = True,
145+
proxy_ssl: ssl_module.SSLContext | None = None,
146+
proxy_server_hostname: str | None = None,
143147
# Timeouts
144148
open_timeout: float | None = 10,
145149
ping_interval: float | None = 20,
@@ -194,6 +198,9 @@ def connect(
194198
to :obj:`None` to disable the proxy or to the address of a proxy
195199
to override the system configuration. See the :doc:`proxy docs
196200
<../../topics/proxies>` for details.
201+
proxy_ssl: Configuration for enabling TLS on the proxy connection.
202+
proxy_server_hostname: Host name for the TLS handshake with the proxy.
203+
``proxy_server_hostname`` overrides the host name from ``proxy``.
197204
open_timeout: Timeout for opening the connection in seconds.
198205
:obj:`None` disables the timeout.
199206
ping_interval: Interval between keepalive pings in seconds.
@@ -287,6 +294,8 @@ def connect(
287294
parse_proxy(proxy),
288295
ws_uri,
289296
deadline,
297+
proxy_ssl=proxy_ssl,
298+
proxy_server_hostname=proxy_server_hostname,
290299
**kwargs,
291300
)
292301
else:
@@ -431,6 +440,84 @@ def connect_socks_proxy(
431440
raise ImportError("python-socks is required to use a SOCKS proxy")
432441

433442

443+
def connect_http_proxy(
444+
proxy: Proxy,
445+
ws_uri: WebSocketURI,
446+
deadline: Deadline,
447+
*,
448+
proxy_ssl: ssl_module.SSLContext | None = None,
449+
proxy_server_hostname: str | None = None,
450+
**kwargs: Any,
451+
) -> socket.socket:
452+
if proxy.scheme[:5] != "https" and proxy_ssl is not None:
453+
raise ValueError("proxy_ssl argument is incompatible with an http:// proxy")
454+
455+
# Connect socket
456+
457+
kwargs.setdefault("timeout", deadline.timeout())
458+
sock = socket.create_connection((proxy.host, proxy.port), **kwargs)
459+
460+
# Initialize TLS wrapper and perform TLS handshake
461+
462+
if proxy.scheme[:5] == "https":
463+
if proxy_ssl is None:
464+
proxy_ssl = ssl_module.create_default_context()
465+
if proxy_server_hostname is None:
466+
proxy_server_hostname = proxy.host
467+
sock.settimeout(deadline.timeout())
468+
sock = proxy_ssl.wrap_socket(sock, server_hostname=proxy_server_hostname)
469+
sock.settimeout(None)
470+
471+
# Send CONNECT request to the proxy.
472+
473+
proxy_headers = Headers()
474+
proxy_headers["Host"] = build_host(ws_uri.host, ws_uri.port, ws_uri.secure)
475+
if proxy.username is not None:
476+
assert proxy.password is not None # enforced by parse_proxy
477+
proxy_headers["Proxy-Authorization"] = build_authorization_basic(
478+
proxy.username,
479+
proxy.password,
480+
)
481+
482+
connect_host = build_host(
483+
ws_uri.host,
484+
ws_uri.port,
485+
ws_uri.secure,
486+
always_include_port=True,
487+
)
488+
# We cannot use the Request class because it supports only GET requests.
489+
proxy_request = f"CONNECT {connect_host} HTTP/1.1\r\n".encode()
490+
proxy_request += proxy_headers.serialize()
491+
sock.sendall(proxy_request)
492+
493+
# Read response from the proxy.
494+
495+
reader = StreamReader()
496+
parser = Response.parse(
497+
reader.read_line,
498+
reader.read_exact,
499+
reader.read_to_eof,
500+
include_body=False,
501+
)
502+
try:
503+
while True:
504+
sock.settimeout(deadline.timeout())
505+
reader.feed_data(sock.recv(4096))
506+
next(parser)
507+
except StopIteration as exc:
508+
response = exc.value
509+
except Exception as exc:
510+
raise InvalidProxyMessage(
511+
"did not receive a valid HTTP response from proxy"
512+
) from exc
513+
finally:
514+
sock.settimeout(None)
515+
if not 200 <= response.status_code < 300:
516+
raise InvalidProxyStatus(response)
517+
518+
return sock
519+
520+
434521
def connect_proxy(
435522
proxy: Proxy,
436523
ws_uri: WebSocketURI,
@@ -441,5 +528,7 @@ def connect_proxy(
441528
# parse_proxy() validates proxy.scheme.
442529
if proxy.scheme[:5] == "socks":
443530
return connect_socks_proxy(proxy, ws_uri, deadline, **kwargs)
531+
elif proxy.scheme[:4] == "http":
532+
return connect_http_proxy(proxy, ws_uri, deadline, **kwargs)
444533
else:
445534
raise AssertionError("unsupported proxy")

tests/asyncio/test_client.py

+21
Original file line numberDiff line numberDiff line change
@@ -578,6 +578,18 @@ async def socks_proxy(self, auth=None):
578578
with patch_environ({"socks_proxy": proxy_uri}):
579579
yield record_flows
580580

581+
@contextlib.asynccontextmanager
582+
async def http_proxy(self, auth=None):
583+
if auth:
584+
proxyauth = "hello:iloveyou"
585+
proxy_uri = "http://hello:iloveyou@localhost:8080"
586+
else:
587+
proxyauth = None
588+
proxy_uri = "http://localhost:8080"
589+
async with async_proxy(mode=["regular"], proxyauth=proxyauth) as record_flows:
590+
with patch_environ({"https_proxy": proxy_uri}):
591+
yield record_flows
592+
581593
async def test_socks_proxy(self):
582594
"""Client connects to server through a SOCKS5 proxy."""
583595
async with self.socks_proxy() as proxy:
@@ -602,6 +614,15 @@ async def test_authenticated_socks_proxy(self):
602614
self.assertEqual(client.protocol.state.name, "OPEN")
603615
self.assertEqual(len(proxy.get_flows()), 1)
604616

617+
@unittest.expectedFailure
618+
async def test_http_proxy(self):
619+
"""Client connects to server through a HTTP proxy."""
620+
async with self.http_proxy() as proxy:
621+
async with serve(*args) as server:
622+
async with connect(get_uri(server)) as client:
623+
self.assertEqual(client.protocol.state.name, "OPEN")
624+
self.assertEqual(len(proxy.get_flows()), 1)
625+
605626
async def test_explicit_proxy(self):
606627
"""Client connects to server through a proxy set explicitly."""
607628
async with async_proxy(mode=["socks5"]) as proxy:

tests/sync/test_client.py

+21
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,19 @@ def socks_proxy(self, auth=None):
320320
with patch_environ({"socks_proxy": proxy_uri}):
321321
yield record_flows
322322

323+
@contextlib.contextmanager
324+
def http_proxy(self, auth=None):
325+
if auth:
326+
proxyauth = "hello:iloveyou"
327+
proxy_uri = "http://hello:iloveyou@localhost:8080"
328+
else:
329+
proxyauth = None
330+
proxy_uri = "http://localhost:8080"
331+
332+
with sync_proxy(mode=["regular"], proxyauth=proxyauth) as record_flows:
333+
with patch_environ({"https_proxy": proxy_uri}):
334+
yield record_flows
335+
323336
def test_socks_proxy(self):
324337
"""Client connects to server through a SOCKS5 proxy."""
325338
with self.socks_proxy() as proxy:
@@ -344,6 +357,14 @@ def test_authenticated_socks_proxy(self):
344357
self.assertEqual(client.protocol.state.name, "OPEN")
345358
self.assertEqual(len(proxy.get_flows()), 1)
346359

360+
def test_http_proxy(self):
361+
"""Client connects to server through a HTTP proxy."""
362+
with self.http_proxy() as proxy:
363+
with run_server() as server:
364+
with connect(get_uri(server)) as client:
365+
self.assertEqual(client.protocol.state.name, "OPEN")
366+
self.assertEqual(len(proxy.get_flows()), 1)
367+
347368
def test_explicit_proxy(self):
348369
"""Client connects to server through a proxy set explicitly."""
349370
with sync_proxy(mode=["socks5"]) as proxy:

0 commit comments

Comments
 (0)