Skip to content

Commit 10df15d

Browse files
committed
Spike HTTP(S) proxy implementation for sync client.
1 parent cf87762 commit 10df15d

File tree

8 files changed

+294
-24
lines changed

8 files changed

+294
-24
lines changed

docs/reference/features.rst

+1-2
Original file line numberDiff line numberDiff line change
@@ -166,12 +166,11 @@ Client
166166
| Perform HTTP Digest Authentication |||||
167167
| (`#784`_) | | | | |
168168
+------------------------------------+--------+--------+--------+--------+
169-
| Connect via HTTP proxy (`#364`_) | | |||
169+
| Connect via HTTP proxy | | |||
170170
+------------------------------------+--------+--------+--------+--------+
171171
| Connect via SOCKS5 proxy |||||
172172
+------------------------------------+--------+--------+--------+--------+
173173

174-
.. _#364: https://github.com/python-websockets/websockets/issues/364
175174
.. _#784: https://github.com/python-websockets/websockets/issues/784
176175

177176
Known limitations

docs/topics/proxies.rst

+5
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,8 @@ SOCKS proxy is configured in the operating system, python-socks uses SOCKS5h.
6464

6565
python-socks supports username/password authentication for SOCKS5 (:rfc:`1929`)
6666
but does not support other authentication methods such as GSSAPI (:rfc:`1961`).
67+
68+
HTTP proxies
69+
------------
70+
71+
TODO

src/websockets/asyncio/client.py

+137-11
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,24 @@
88
import urllib.parse
99
from collections.abc import AsyncIterator, Generator, Sequence
1010
from types import TracebackType
11-
from typing import Any, Callable, Literal
11+
from typing import Any, Callable, Literal, cast
1212

1313
from ..client import ClientProtocol, backoff
14-
from ..datastructures import HeadersLike
15-
from ..exceptions import InvalidMessage, InvalidStatus, ProxyError, SecurityError
14+
from ..datastructures import Headers, HeadersLike
15+
from ..exceptions import (
16+
InvalidMessage,
17+
InvalidProxyMessage,
18+
InvalidProxyStatus,
19+
InvalidStatus,
20+
ProxyError,
21+
SecurityError,
22+
)
1623
from ..extensions.base import ClientExtensionFactory
1724
from ..extensions.permessage_deflate import enable_client_permessage_deflate
18-
from ..headers import validate_subprotocols
25+
from ..headers import build_authorization_basic, build_host, validate_subprotocols
1926
from ..http11 import USER_AGENT, Response
2027
from ..protocol import CONNECTING, Event
28+
from ..streams import StreamReader
2129
from ..typing import LoggerLike, Origin, Subprotocol
2230
from ..uri import Proxy, WebSocketURI, get_proxy, parse_proxy, parse_uri
2331
from .compatibility import TimeoutError, asyncio_timeout
@@ -257,7 +265,7 @@ class connect:
257265
the TLS handshake.
258266
259267
* You can set ``host`` and ``port`` to connect to a different host and port
260-
from those found in ``uri``. This only changes the destination of the TCP
268+
from those found in ``uri``. This only changes the ws_uri of the TCP
261269
connection. The host name from ``uri`` is still used in the TLS handshake
262270
for secure connections and in the ``Host`` header.
263271
@@ -266,6 +274,23 @@ class connect:
266274
:meth:`~asyncio.loop.create_connection` method) to create a suitable
267275
client socket and customize it.
268276
277+
When using a proxy, :meth:`~asyncio.loop.create_connection` is called twice:
278+
first to connect to the proxy, then to connect to the WebSocket server via
279+
the proxy. In that case:
280+
281+
* Prefix keyword arguments with ``proxy_`` for configuring TLS between the
282+
client and an HTTPS proxy: ``proxy_ssl``, ``proxy_server_hostname``,
283+
``proxy_ssl_keylog_callback``, and ``proxy_ssl_version``.
284+
* Use the standard keyword arguments for configuring TLS between the proxy
285+
and the WebSocket server.
286+
* Other keyword arguments are used only for connecting to the proxy. The
287+
socket connected to the proxy is then passed in the ``sock`` argument when
288+
connecting to the WebSocket server.
289+
290+
To pass different arguments to the two calls, prefix settings for connecting
291+
to the proxy with ``proxy_``. For example, you can set ``proxy_ssl`` or
292+
``proxy_server_hostname`` to configure .
293+
269294
Raises:
270295
InvalidURI: If ``uri`` isn't a valid WebSocket URI.
271296
InvalidProxy: If ``proxy`` isn't a valid proxy.
@@ -383,12 +408,19 @@ def factory() -> ClientConnection:
383408
if kwargs.pop("unix", False):
384409
_, connection = await loop.create_unix_connection(factory, **kwargs)
385410
elif proxy is not None:
386-
kwargs["sock"] = await connect_proxy(
387-
parse_proxy(proxy),
388-
ws_uri,
389-
local_addr=kwargs.pop("local_addr", None),
390-
)
391-
_, connection = await loop.create_connection(factory, **kwargs)
411+
# Split keyword arguments for connecting to the proxy or the server.
412+
all_kwargs, proxy_kwargs, kwargs = kwargs, {}, {}
413+
for key, value in all_kwargs.items():
414+
if key.startswith("ssl") or key == "server_hostname":
415+
kwargs[key] = value
416+
elif key.startswith("proxy_"):
417+
proxy_kwargs[key[6:]] = value
418+
else:
419+
proxy_kwargs[key] = value
420+
# Connect to the proxy.
421+
sock = await connect_proxy(parse_proxy(proxy), ws_uri, **proxy_kwargs)
422+
# Connect to the server via the proxy.
423+
_, connection = await loop.create_connection(factory, sock=sock, **kwargs)
392424
else:
393425
if kwargs.get("sock") is None:
394426
kwargs.setdefault("host", ws_uri.host)
@@ -645,6 +677,98 @@ async def connect_socks_proxy(
645677
raise ImportError("python-socks is required to use a SOCKS proxy")
646678

647679

680+
def prepare_connect_request(proxy: Proxy, ws_uri: WebSocketURI) -> bytes:
681+
host = build_host(ws_uri.host, ws_uri.port, ws_uri.secure, always_include_port=True)
682+
headers = Headers()
683+
headers["Host"] = build_host(ws_uri.host, ws_uri.port, ws_uri.secure)
684+
if proxy.username is not None:
685+
assert proxy.password is not None # enforced by parse_proxy()
686+
headers["Proxy-Authorization"] = build_authorization_basic(
687+
proxy.username, proxy.password
688+
)
689+
# We cannot use the Request class because it supports only GET requests.
690+
return f"CONNECT {host} HTTP/1.1\r\n".encode() + headers.serialize()
691+
692+
693+
class HTTPProxyConnection(asyncio.Protocol):
694+
def __init__(self, ws_uri: WebSocketURI, proxy: Proxy):
695+
self.ws_uri = ws_uri
696+
self.proxy = proxy
697+
698+
self.reader = StreamReader()
699+
self.parser = Response.parse(
700+
self.reader.read_line,
701+
self.reader.read_exact,
702+
self.reader.read_to_eof,
703+
include_body=False,
704+
)
705+
706+
loop = asyncio.get_running_loop()
707+
self.response: asyncio.Future[Response] = loop.create_future()
708+
709+
def run_parser(self) -> None:
710+
try:
711+
next(self.parser)
712+
except StopIteration as exc:
713+
response = exc.value
714+
if 200 <= response.status_code < 300:
715+
self.response.set_result(response)
716+
else:
717+
self.response.set_exception(InvalidProxyStatus(response))
718+
except Exception as exc:
719+
proxy_exc = InvalidProxyMessage(
720+
"did not receive a valid HTTP response from proxy"
721+
)
722+
proxy_exc.__cause__ = exc
723+
self.response.set_exception(proxy_exc)
724+
725+
def connection_made(self, transport: asyncio.BaseTransport) -> None:
726+
transport = cast(asyncio.Transport, transport)
727+
self.transport = transport
728+
self.transport.write(prepare_connect_request(self.proxy, self.ws_uri))
729+
730+
def data_received(self, data: bytes) -> None:
731+
self.reader.feed_data(data)
732+
self.run_parser()
733+
734+
def eof_received(self) -> None:
735+
self.reader.feed_eof()
736+
self.run_parser()
737+
738+
def connection_lost(self, exc: Exception | None) -> None:
739+
self.reader.feed_eof()
740+
if exc is not None:
741+
self.response.set_exception(exc)
742+
743+
744+
async def connect_http_proxy(
745+
proxy: Proxy,
746+
ws_uri: WebSocketURI,
747+
**kwargs: Any,
748+
) -> socket.socket:
749+
if proxy.scheme != "https" and kwargs.get("ssl") is not None:
750+
raise ValueError("proxy_ssl argument is incompatible with an http:// proxy")
751+
752+
transport, protocol = await asyncio.get_running_loop().create_connection(
753+
lambda: HTTPProxyConnection(ws_uri, proxy),
754+
proxy.host,
755+
proxy.port,
756+
**kwargs,
757+
)
758+
759+
try:
760+
# This raises exceptions if the connection to the proxy fails.
761+
await protocol.response
762+
763+
# We need to extract the socket from the transport, or else asyncio raises
764+
# RuntimeError: File descriptor ... is used by transport ...
765+
# To achieve this, we duplicate the socket then close the transport.
766+
sock = transport.get_extra_info("socket")
767+
return socket.fromfd(sock.fileno(), sock.family, sock.type, sock.proto)
768+
finally:
769+
transport.close()
770+
771+
648772
async def connect_proxy(
649773
proxy: Proxy,
650774
ws_uri: WebSocketURI,
@@ -654,5 +778,7 @@ async def connect_proxy(
654778
# parse_proxy() validates proxy.scheme.
655779
if proxy.scheme[:5] == "socks":
656780
return await connect_socks_proxy(proxy, ws_uri, **kwargs)
781+
elif proxy.scheme[:4] == "http":
782+
return await connect_http_proxy(proxy, ws_uri, **kwargs)
657783
else:
658784
raise AssertionError("unsupported proxy")

src/websockets/http11.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,7 @@ def parse(
210210
read_line: Callable[[int], Generator[None, None, bytes]],
211211
read_exact: Callable[[int], Generator[None, None, bytes]],
212212
read_to_eof: Callable[[int], Generator[None, None, bytes]],
213+
include_body: bool = True,
213214
) -> Generator[None, None, Response]:
214215
"""
215216
Parse a WebSocket handshake response.
@@ -265,9 +266,12 @@ def parse(
265266

266267
headers = yield from parse_headers(read_line)
267268

268-
body = yield from read_body(
269-
status_code, headers, read_line, read_exact, read_to_eof
270-
)
269+
if include_body:
270+
body = yield from read_body(
271+
status_code, headers, read_line, read_exact, read_to_eof
272+
)
273+
else:
274+
body = b""
271275

272276
return cls(status_code, reason, headers, body)
273277

src/websockets/sync/client.py

+96-7
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,17 @@
55
import threading
66
import warnings
77
from collections.abc import Sequence
8-
from typing import Any, Literal
8+
from typing import Any, Literal, cast
99

1010
from ..client import ClientProtocol
11-
from ..datastructures import HeadersLike
12-
from ..exceptions import ProxyError
11+
from ..datastructures import Headers, HeadersLike
12+
from ..exceptions import InvalidProxyMessage, InvalidProxyStatus, ProxyError
1313
from ..extensions.base import ClientExtensionFactory
1414
from ..extensions.permessage_deflate import enable_client_permessage_deflate
15-
from ..headers import validate_subprotocols
15+
from ..headers import build_authorization_basic, build_host, validate_subprotocols
1616
from ..http11 import USER_AGENT, Response
1717
from ..protocol import CONNECTING, Event
18+
from ..streams import StreamReader
1819
from ..typing import LoggerLike, Origin, Subprotocol
1920
from ..uri import Proxy, WebSocketURI, get_proxy, parse_proxy, parse_uri
2021
from .connection import Connection
@@ -141,6 +142,8 @@ def connect(
141142
additional_headers: HeadersLike | None = None,
142143
user_agent_header: str | None = USER_AGENT,
143144
proxy: str | Literal[True] | None = True,
145+
proxy_ssl: ssl_module.SSLContext | None = None,
146+
proxy_server_hostname: str | None = None,
144147
# Timeouts
145148
open_timeout: float | None = 10,
146149
ping_interval: float | None = 20,
@@ -195,6 +198,9 @@ def connect(
195198
to :obj:`None` to disable the proxy or to the address of a proxy
196199
to override the system configuration. See the :doc:`proxy docs
197200
<../../topics/proxies>` for details.
201+
proxy_ssl: Configuration for enabling TLS on the proxy connection.
202+
proxy_server_hostname: Host name for the TLS handshake with the proxy.
203+
``proxy_server_hostname`` overrides the host name from ``proxy``.
198204
open_timeout: Timeout for opening the connection in seconds.
199205
:obj:`None` disables the timeout.
200206
ping_interval: Interval between keepalive pings in seconds.
@@ -288,9 +294,9 @@ def connect(
288294
parse_proxy(proxy),
289295
ws_uri,
290296
deadline,
291-
# websockets is consistent with the socket module while
292-
# python_socks is consistent across implementations.
293-
local_addr=kwargs.pop("source_address", None),
297+
source_address=kwargs.pop("source_address", None),
298+
ssl=proxy_ssl,
299+
server_hostname=proxy_server_hostname,
294300
)
295301
else:
296302
kwargs.setdefault("timeout", deadline.timeout())
@@ -441,6 +447,83 @@ def connect_socks_proxy(
441447
raise ImportError("python-socks is required to use a SOCKS proxy")
442448

443449

450+
def prepare_connect_request(proxy: Proxy, ws_uri: WebSocketURI) -> bytes:
451+
host = build_host(ws_uri.host, ws_uri.port, ws_uri.secure, always_include_port=True)
452+
headers = Headers()
453+
headers["Host"] = build_host(ws_uri.host, ws_uri.port, ws_uri.secure)
454+
if proxy.username is not None:
455+
assert proxy.password is not None # enforced by parse_proxy()
456+
headers["Proxy-Authorization"] = build_authorization_basic(
457+
proxy.username, proxy.password
458+
)
459+
# We cannot use the Request class because it supports only GET requests.
460+
return f"CONNECT {host} HTTP/1.1\r\n".encode() + headers.serialize()
461+
462+
463+
def read_connect_response(sock: socket.socket, deadline: Deadline) -> Response:
464+
reader = StreamReader()
465+
parser = Response.parse(
466+
reader.read_line,
467+
reader.read_exact,
468+
reader.read_to_eof,
469+
include_body=False,
470+
)
471+
try:
472+
while True:
473+
sock.settimeout(deadline.timeout())
474+
reader.feed_data(sock.recv(4096))
475+
next(parser)
476+
except StopIteration as exc:
477+
response = cast(Response, exc.value)
478+
if 200 <= response.status_code < 300:
479+
return response
480+
else:
481+
raise InvalidProxyStatus(response)
482+
except Exception as exc:
483+
raise InvalidProxyMessage(
484+
"did not receive a valid HTTP response from proxy"
485+
) from exc
486+
finally:
487+
sock.settimeout(None)
488+
489+
490+
def connect_http_proxy(
491+
proxy: Proxy,
492+
ws_uri: WebSocketURI,
493+
deadline: Deadline,
494+
*,
495+
ssl: ssl_module.SSLContext | None = None,
496+
server_hostname: str | None = None,
497+
**kwargs: Any,
498+
) -> socket.socket:
499+
if proxy.scheme != "https" and ssl is not None:
500+
raise ValueError("proxy_ssl argument is incompatible with an http:// proxy")
501+
502+
# Connect socket
503+
504+
kwargs.setdefault("timeout", deadline.timeout())
505+
sock = socket.create_connection((proxy.host, proxy.port), **kwargs)
506+
507+
# Initialize TLS wrapper and perform TLS handshake
508+
509+
if proxy.scheme == "https":
510+
if ssl is None:
511+
ssl = ssl_module.create_default_context()
512+
if server_hostname is None:
513+
server_hostname = proxy.host
514+
sock.settimeout(deadline.timeout())
515+
sock = ssl.wrap_socket(sock, server_hostname=server_hostname)
516+
sock.settimeout(None)
517+
518+
# Send CONNECT request to the proxy and read response.
519+
520+
sock.sendall(prepare_connect_request(proxy, ws_uri))
521+
# This raises exceptions if the connection to the proxy fails.
522+
read_connect_response(sock, deadline)
523+
524+
return sock
525+
526+
444527
def connect_proxy(
445528
proxy: Proxy,
446529
ws_uri: WebSocketURI,
@@ -450,6 +533,12 @@ def connect_proxy(
450533
"""Connect via a proxy and return the socket."""
451534
# parse_proxy() validates proxy.scheme.
452535
if proxy.scheme[:5] == "socks":
536+
# websockets is consistent with the socket module while
537+
# python_socks is consistent across implementations.
538+
# It will translate local_addr back to source_address.
539+
kwargs["local_addr"] = kwargs.pop("source_address", None)
453540
return connect_socks_proxy(proxy, ws_uri, deadline, **kwargs)
541+
elif proxy.scheme[:4] == "http":
542+
return connect_http_proxy(proxy, ws_uri, deadline, **kwargs)
454543
else:
455544
raise AssertionError("unsupported proxy")

0 commit comments

Comments
 (0)