Skip to content

Commit d9a77f0

Browse files
committed
Spike HTTP(S) proxy implementation for sync client.
1 parent 513073b commit d9a77f0

File tree

7 files changed

+191
-15
lines changed

7 files changed

+191
-15
lines changed

docs/reference/features.rst

+1-2
Original file line numberDiff line numberDiff line change
@@ -166,12 +166,11 @@ Client
166166
| Perform HTTP Digest Authentication |||||
167167
| (`#784`_) | | | | |
168168
+------------------------------------+--------+--------+--------+--------+
169-
| Connect via HTTP proxy (`#364`_) | | |||
169+
| Connect via HTTP proxy | | |||
170170
+------------------------------------+--------+--------+--------+--------+
171171
| Connect via SOCKS5 proxy |||||
172172
+------------------------------------+--------+--------+--------+--------+
173173

174-
.. _#364: https://github.com/python-websockets/websockets/issues/364
175174
.. _#784: https://github.com/python-websockets/websockets/issues/784
176175

177176
Known limitations

docs/topics/proxies.rst

+5
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,8 @@ SOCKS proxy is configured in the operating system, python-socks uses SOCKS5h.
6464

6565
python-socks supports username/password authentication for SOCKS5 (:rfc:`1929`)
6666
but does not support other authentication methods such as GSSAPI (:rfc:`1961`).
67+
68+
HTTP proxies
69+
------------
70+
71+
TODO

src/websockets/asyncio/client.py

+45-7
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ class connect:
257257
the TLS handshake.
258258
259259
* You can set ``host`` and ``port`` to connect to a different host and port
260-
from those found in ``uri``. This only changes the destination of the TCP
260+
from those found in ``uri``. This only changes the ws_uri of the TCP
261261
connection. The host name from ``uri`` is still used in the TLS handshake
262262
for secure connections and in the ``Host`` header.
263263
@@ -266,6 +266,23 @@ class connect:
266266
:meth:`~asyncio.loop.create_connection` method) to create a suitable
267267
client socket and customize it.
268268
269+
When using a proxy, :meth:`~asyncio.loop.create_connection` is called twice:
270+
first to connect to the proxy, then to connect to the WebSocket server via
271+
the proxy. In that case:
272+
273+
* Prefix keyword arguments with ``proxy_`` for configuring TLS between the
274+
client and an HTTPS proxy: ``proxy_ssl``, ``proxy_server_hostname``,
275+
``proxy_ssl_keylog_callback``, and ``proxy_ssl_version``.
276+
* Use the standard keyword arguments for configuring TLS between the proxy
277+
and the WebSocket server.
278+
* Other keyword arguments are used only for connecting to the proxy. The
279+
socket connected to the proxy is then passed in the ``sock`` argument when
280+
connecting to the WebSocket server.
281+
282+
To pass different arguments to the two calls, prefix settings for connecting
283+
to the proxy with ``proxy_``. For example, you can set ``proxy_ssl`` or
284+
``proxy_server_hostname`` to configure .
285+
269286
Raises:
270287
InvalidURI: If ``uri`` isn't a valid WebSocket URI.
271288
InvalidProxy: If ``proxy`` isn't a valid proxy.
@@ -383,12 +400,19 @@ def factory() -> ClientConnection:
383400
if kwargs.pop("unix", False):
384401
_, connection = await loop.create_unix_connection(factory, **kwargs)
385402
elif proxy is not None:
386-
kwargs["sock"] = await connect_proxy(
387-
parse_proxy(proxy),
388-
ws_uri,
389-
local_addr=kwargs.pop("local_addr", None),
390-
)
391-
_, connection = await loop.create_connection(factory, **kwargs)
403+
# Split keyword arguments for connecting to the proxy or the server.
404+
all_kwargs, proxy_kwargs, kwargs = kwargs, {}, {}
405+
for key, value in all_kwargs.items():
406+
if key.startswith("ssl") or key == "server_hostname":
407+
kwargs[key] = value
408+
elif key.startswith("proxy_"):
409+
proxy_kwargs[key[6:]] = value
410+
else:
411+
proxy_kwargs[key] = value
412+
# Connect to the proxy.
413+
sock = await connect_proxy(parse_proxy(proxy), ws_uri, **proxy_kwargs)
414+
# Connect to the server via the proxy.
415+
_, connection = await loop.create_connection(factory, sock=sock, **kwargs)
392416
else:
393417
if kwargs.get("sock") is None:
394418
kwargs.setdefault("host", ws_uri.host)
@@ -645,6 +669,18 @@ async def connect_socks_proxy(
645669
raise ImportError("python-socks is required to use a SOCKS proxy")
646670

647671

672+
async def connect_http_proxy(
673+
proxy: Proxy,
674+
ws_uri: WebSocketURI,
675+
**kwargs: Any,
676+
) -> socket.socket:
677+
if proxy.scheme != "https" and kwargs.get("ssl") is not None:
678+
raise ValueError("proxy_ssl argument is incompatible with an http:// proxy")
679+
680+
# TODO: Implement HTTP proxy support.
681+
raise NotImplementedError("HTTP proxy support is not implemented")
682+
683+
648684
async def connect_proxy(
649685
proxy: Proxy,
650686
ws_uri: WebSocketURI,
@@ -654,5 +690,7 @@ async def connect_proxy(
654690
# parse_proxy() validates proxy.scheme.
655691
if proxy.scheme[:5] == "socks":
656692
return await connect_socks_proxy(proxy, ws_uri, **kwargs)
693+
elif proxy.scheme[:4] == "http":
694+
return await connect_http_proxy(proxy, ws_uri, **kwargs)
657695
else:
658696
raise AssertionError("unsupported proxy")

src/websockets/http11.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,7 @@ def parse(
210210
read_line: Callable[[int], Generator[None, None, bytes]],
211211
read_exact: Callable[[int], Generator[None, None, bytes]],
212212
read_to_eof: Callable[[int], Generator[None, None, bytes]],
213+
include_body: bool = True,
213214
) -> Generator[None, None, Response]:
214215
"""
215216
Parse a WebSocket handshake response.
@@ -265,9 +266,12 @@ def parse(
265266

266267
headers = yield from parse_headers(read_line)
267268

268-
body = yield from read_body(
269-
status_code, headers, read_line, read_exact, read_to_eof
270-
)
269+
if include_body:
270+
body = yield from read_body(
271+
status_code, headers, read_line, read_exact, read_to_eof
272+
)
273+
else:
274+
body = b""
271275

272276
return cls(status_code, reason, headers, body)
273277

src/websockets/sync/client.py

+91-3
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,14 @@
88
from typing import Any, Literal
99

1010
from ..client import ClientProtocol
11-
from ..datastructures import HeadersLike
12-
from ..exceptions import ProxyError
11+
from ..datastructures import Headers, HeadersLike
12+
from ..exceptions import InvalidProxyMessage, InvalidProxyStatus, ProxyError
1313
from ..extensions.base import ClientExtensionFactory
1414
from ..extensions.permessage_deflate import enable_client_permessage_deflate
15-
from ..headers import validate_subprotocols
15+
from ..headers import build_authorization_basic, build_host, validate_subprotocols
1616
from ..http11 import USER_AGENT, Response
1717
from ..protocol import CONNECTING, Event
18+
from ..streams import StreamReader
1819
from ..typing import LoggerLike, Origin, Subprotocol
1920
from ..uri import Proxy, WebSocketURI, get_proxy, parse_proxy, parse_uri
2021
from .connection import Connection
@@ -141,6 +142,8 @@ def connect(
141142
additional_headers: HeadersLike | None = None,
142143
user_agent_header: str | None = USER_AGENT,
143144
proxy: str | Literal[True] | None = True,
145+
proxy_ssl: ssl_module.SSLContext | None = None,
146+
proxy_server_hostname: str | None = None,
144147
# Timeouts
145148
open_timeout: float | None = 10,
146149
ping_interval: float | None = 20,
@@ -195,6 +198,9 @@ def connect(
195198
to :obj:`None` to disable the proxy or to the address of a proxy
196199
to override the system configuration. See the :doc:`proxy docs
197200
<../../topics/proxies>` for details.
201+
proxy_ssl: Configuration for enabling TLS on the proxy connection.
202+
proxy_server_hostname: Host name for the TLS handshake with the proxy.
203+
``proxy_server_hostname`` overrides the host name from ``proxy``.
198204
open_timeout: Timeout for opening the connection in seconds.
199205
:obj:`None` disables the timeout.
200206
ping_interval: Interval between keepalive pings in seconds.
@@ -291,6 +297,8 @@ def connect(
291297
# websockets is consistent with the socket module while
292298
# python_socks is consistent across implementations.
293299
local_addr=kwargs.pop("source_address", None),
300+
ssl=proxy_ssl,
301+
server_hostname=proxy_server_hostname,
294302
)
295303
else:
296304
kwargs.setdefault("timeout", deadline.timeout())
@@ -441,6 +449,84 @@ def connect_socks_proxy(
441449
raise ImportError("python-socks is required to use a SOCKS proxy")
442450

443451

452+
def connect_http_proxy(
453+
proxy: Proxy,
454+
ws_uri: WebSocketURI,
455+
deadline: Deadline,
456+
*,
457+
ssl: ssl_module.SSLContext | None = None,
458+
server_hostname: str | None = None,
459+
**kwargs: Any,
460+
) -> socket.socket:
461+
if proxy.scheme != "https" and ssl is not None:
462+
raise ValueError("proxy_ssl argument is incompatible with an http:// proxy")
463+
464+
# Connect socket
465+
466+
kwargs.setdefault("timeout", deadline.timeout())
467+
sock = socket.create_connection((proxy.host, proxy.port), **kwargs)
468+
469+
# Initialize TLS wrapper and perform TLS handshake
470+
471+
if proxy.scheme == "https":
472+
if ssl is None:
473+
ssl = ssl_module.create_default_context()
474+
if server_hostname is None:
475+
server_hostname = proxy.host
476+
sock.settimeout(deadline.timeout())
477+
sock = ssl.wrap_socket(sock, server_hostname=server_hostname)
478+
sock.settimeout(None)
479+
480+
# Send CONNECT request to the proxy.
481+
482+
proxy_headers = Headers()
483+
proxy_headers["Host"] = build_host(ws_uri.host, ws_uri.port, ws_uri.secure)
484+
if proxy.username is not None:
485+
assert proxy.password is not None # enforced by parse_proxy
486+
proxy_headers["Proxy-Authorization"] = build_authorization_basic(
487+
proxy.username,
488+
proxy.password,
489+
)
490+
491+
connect_host = build_host(
492+
ws_uri.host,
493+
ws_uri.port,
494+
ws_uri.secure,
495+
always_include_port=True,
496+
)
497+
# We cannot use the Request class because it supports only GET requests.
498+
proxy_request = f"CONNECT {connect_host} HTTP/1.1\r\n".encode()
499+
proxy_request += proxy_headers.serialize()
500+
sock.sendall(proxy_request)
501+
502+
# Read response from the proxy.
503+
504+
reader = StreamReader()
505+
parser = Response.parse(
506+
reader.read_line,
507+
reader.read_exact,
508+
reader.read_to_eof,
509+
include_body=False,
510+
)
511+
try:
512+
while True:
513+
sock.settimeout(deadline.timeout())
514+
reader.feed_data(sock.recv(4096))
515+
next(parser)
516+
except StopIteration as exc:
517+
response = exc.value
518+
except Exception as exc:
519+
raise InvalidProxyMessage(
520+
"did not receive a valid HTTP response from proxy"
521+
) from exc
522+
finally:
523+
sock.settimeout(None)
524+
if not 200 <= response.status_code < 300:
525+
raise InvalidProxyStatus(response)
526+
527+
return sock
528+
529+
444530
def connect_proxy(
445531
proxy: Proxy,
446532
ws_uri: WebSocketURI,
@@ -451,5 +537,7 @@ def connect_proxy(
451537
# parse_proxy() validates proxy.scheme.
452538
if proxy.scheme[:5] == "socks":
453539
return connect_socks_proxy(proxy, ws_uri, deadline, **kwargs)
540+
elif proxy.scheme[:4] == "http":
541+
return connect_http_proxy(proxy, ws_uri, deadline, **kwargs)
454542
else:
455543
raise AssertionError("unsupported proxy")

tests/asyncio/test_client.py

+21
Original file line numberDiff line numberDiff line change
@@ -574,6 +574,18 @@ async def socks_proxy(self, auth=None):
574574
with patch_environ({"socks_proxy": proxy_uri}):
575575
yield record_flows
576576

577+
@contextlib.asynccontextmanager
578+
async def http_proxy(self, auth=None):
579+
if auth:
580+
proxyauth = "hello:iloveyou"
581+
proxy_uri = "http://hello:iloveyou@localhost:8080"
582+
else:
583+
proxyauth = None
584+
proxy_uri = "http://localhost:8080"
585+
async with async_proxy(mode=["regular"], proxyauth=proxyauth) as record_flows:
586+
with patch_environ({"https_proxy": proxy_uri}):
587+
yield record_flows
588+
577589
async def test_socks_proxy(self):
578590
"""Client connects to server through a SOCKS5 proxy."""
579591
async with self.socks_proxy() as proxy:
@@ -646,6 +658,15 @@ async def test_socks_proxy_connection_timeout(self):
646658
"timed out during handshake",
647659
)
648660

661+
@unittest.expectedFailure
662+
async def test_http_proxy(self):
663+
"""Client connects to server through a HTTP proxy."""
664+
async with self.http_proxy() as proxy:
665+
async with serve(*args) as server:
666+
async with connect(get_uri(server)) as client:
667+
self.assertEqual(client.protocol.state.name, "OPEN")
668+
self.assertEqual(len(proxy.get_flows()), 1)
669+
649670
async def test_explicit_proxy(self):
650671
"""Client connects to server through a proxy set explicitly."""
651672
async with async_proxy(mode=["socks5@51080"]) as proxy:

tests/sync/test_client.py

+21
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,19 @@ def socks_proxy(self, auth=None):
316316
with patch_environ({"socks_proxy": proxy_uri}):
317317
yield record_flows
318318

319+
@contextlib.contextmanager
320+
def http_proxy(self, auth=None):
321+
if auth:
322+
proxyauth = "hello:iloveyou"
323+
proxy_uri = "http://hello:iloveyou@localhost:8080"
324+
else:
325+
proxyauth = None
326+
proxy_uri = "http://localhost:8080"
327+
328+
with sync_proxy(mode=["regular"], proxyauth=proxyauth) as record_flows:
329+
with patch_environ({"https_proxy": proxy_uri}):
330+
yield record_flows
331+
319332
def test_socks_proxy(self):
320333
"""Client connects to server through a SOCKS5 proxy."""
321334
with self.socks_proxy() as proxy:
@@ -388,6 +401,14 @@ def test_socks_proxy_timeout(self):
388401
# Don't test str(raised.exception) because we don't control it.
389402
self.assertIsInstance(raised.exception, SocksProxyTimeoutError)
390403

404+
def test_http_proxy(self):
405+
"""Client connects to server through a HTTP proxy."""
406+
with self.http_proxy() as proxy:
407+
with run_server() as server:
408+
with connect(get_uri(server)) as client:
409+
self.assertEqual(client.protocol.state.name, "OPEN")
410+
self.assertEqual(len(proxy.get_flows()), 1)
411+
391412
def test_explicit_proxy(self):
392413
"""Client connects to server through a proxy set explicitly."""
393414
with sync_proxy(mode=["socks5@51080"]) as proxy:

0 commit comments

Comments
 (0)