From 4e1be225151f976d47b518d458ec3325379a4df7 Mon Sep 17 00:00:00 2001 From: Aymeric Augustin Date: Sat, 25 Jan 2025 13:20:54 +0100 Subject: [PATCH 1/2] Add helpers to locate proxy for client connections. --- docs/reference/exceptions.rst | 10 +- src/websockets/__init__.py | 9 ++ src/websockets/exceptions.py | 42 +++++++- src/websockets/uri.py | 128 +++++++++++++++++++++++- tests/test_exceptions.py | 12 +++ tests/test_uri.py | 182 ++++++++++++++++++++++++++++++++-- tests/utils.py | 16 +++ 7 files changed, 382 insertions(+), 17 deletions(-) diff --git a/docs/reference/exceptions.rst b/docs/reference/exceptions.rst index d6b7f0f57..e0c2efdd1 100644 --- a/docs/reference/exceptions.rst +++ b/docs/reference/exceptions.rst @@ -28,14 +28,20 @@ also reported by :func:`~websockets.asyncio.server.serve` in logs. .. autoexception:: InvalidURI -.. autoexception:: InvalidHandshake +.. autoexception:: InvalidProxy -.. autoexception:: InvalidMessage +.. autoexception:: InvalidHandshake .. autoexception:: SecurityError +.. autoexception:: InvalidMessage + .. autoexception:: InvalidStatus +.. autoexception:: InvalidProxyMessage + +.. autoexception:: InvalidProxyStatus + .. autoexception:: InvalidHeader .. autoexception:: InvalidHeaderFormat diff --git a/src/websockets/__init__.py b/src/websockets/__init__.py index 1d0abe5cd..8bf282a73 100644 --- a/src/websockets/__init__.py +++ b/src/websockets/__init__.py @@ -39,6 +39,9 @@ "InvalidOrigin", "InvalidParameterName", "InvalidParameterValue", + "InvalidProxy", + "InvalidProxyMessage", + "InvalidProxyStatus", "InvalidState", "InvalidStatus", "InvalidUpgrade", @@ -99,6 +102,9 @@ InvalidOrigin, InvalidParameterName, InvalidParameterValue, + InvalidProxy, + InvalidProxyMessage, + InvalidProxyStatus, InvalidState, InvalidStatus, InvalidUpgrade, @@ -157,6 +163,9 @@ "InvalidOrigin": ".exceptions", "InvalidParameterName": ".exceptions", "InvalidParameterValue": ".exceptions", + "InvalidProxy": ".exceptions", + "InvalidProxyMessage": ".exceptions", + "InvalidProxyStatus": ".exceptions", "InvalidState": ".exceptions", "InvalidStatus": ".exceptions", "InvalidUpgrade": ".exceptions", diff --git a/src/websockets/exceptions.py b/src/websockets/exceptions.py index 73b24debf..e70aac92e 100644 --- a/src/websockets/exceptions.py +++ b/src/websockets/exceptions.py @@ -6,11 +6,14 @@ * :exc:`ConnectionClosedOK` * :exc:`ConnectionClosedError` * :exc:`InvalidURI` + * :exc:`InvalidProxy` * :exc:`InvalidHandshake` * :exc:`SecurityError` * :exc:`InvalidMessage` * :exc:`InvalidStatus` * :exc:`InvalidStatusCode` (legacy) + * :exc:`InvalidProxyMessage` + * :exc:`InvalidProxyStatus` * :exc:`InvalidHeader` * :exc:`InvalidHeaderFormat` * :exc:`InvalidHeaderValue` @@ -42,13 +45,16 @@ "ConnectionClosedOK", "ConnectionClosedError", "InvalidURI", + "InvalidProxy", "InvalidHandshake", "SecurityError", + "InvalidMessage", "InvalidStatus", + "InvalidProxyMessage", + "InvalidProxyStatus", "InvalidHeader", "InvalidHeaderFormat", "InvalidHeaderValue", - "InvalidMessage", "InvalidOrigin", "InvalidUpgrade", "NegotiationError", @@ -169,6 +175,20 @@ def __str__(self) -> str: return f"{self.uri} isn't a valid URI: {self.msg}" +class InvalidProxy(WebSocketException): + """ + Raised when connecting via a proxy that isn't valid. + + """ + + def __init__(self, proxy: str, msg: str) -> None: + self.proxy = proxy + self.msg = msg + + def __str__(self) -> str: + return f"{self.proxy} isn't a valid proxy: {self.msg}" + + class InvalidHandshake(WebSocketException): """ Base class for exceptions raised when the opening handshake fails. @@ -208,6 +228,26 @@ def __str__(self) -> str: ) +class InvalidProxyMessage(InvalidHandshake): + """ + Raised when a proxy response is malformed. + + """ + + +class InvalidProxyStatus(InvalidHandshake): + """ + Raised when a proxy rejects the connection. + + """ + + def __init__(self, response: http11.Response) -> None: + self.response = response + + def __str__(self) -> str: + return f"proxy rejected connection: HTTP {self.response.status_code:d}" + + class InvalidHeader(InvalidHandshake): """ Raised when an HTTP header doesn't have a valid format or value. diff --git a/src/websockets/uri.py b/src/websockets/uri.py index 16bb3f1c1..b925b99b5 100644 --- a/src/websockets/uri.py +++ b/src/websockets/uri.py @@ -2,13 +2,18 @@ import dataclasses import urllib.parse +import urllib.request -from .exceptions import InvalidURI +from .exceptions import InvalidProxy, InvalidURI __all__ = ["parse_uri", "WebSocketURI"] +# All characters from the gen-delims and sub-delims sets in RFC 3987. +DELIMS = ":/?#[]@!$&'()*+,;=" + + @dataclasses.dataclass class WebSocketURI: """ @@ -53,10 +58,6 @@ def user_info(self) -> tuple[str, str] | None: return (self.username, self.password) -# All characters from the gen-delims and sub-delims sets in RFC 3987. -DELIMS = ":/?#[]@!$&'()*+,;=" - - def parse_uri(uri: str) -> WebSocketURI: """ Parse and validate a WebSocket URI. @@ -105,3 +106,120 @@ def parse_uri(uri: str) -> WebSocketURI: password = urllib.parse.quote(password, safe=DELIMS) return WebSocketURI(secure, host, port, path, query, username, password) + + +@dataclasses.dataclass +class Proxy: + """ + Proxy. + + Attributes: + scheme: ``"socks5h"``, ``"socks5"``, ``"socks4a"``, ``"socks4"``, + ``"https"``, or ``"http"``. + host: Normalized to lower case. + port: Always set even if it's the default. + username: Available when the proxy address contains `User Information`_. + password: Available when the proxy address contains `User Information`_. + + .. _User Information: https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.1 + + """ + + scheme: str + host: str + port: int + username: str | None = None + password: str | None = None + + @property + def user_info(self) -> tuple[str, str] | None: + if self.username is None: + return None + assert self.password is not None + return (self.username, self.password) + + +def parse_proxy(proxy: str) -> Proxy: + """ + Parse and validate a proxy. + + Args: + proxy: proxy. + + Returns: + Parsed proxy. + + Raises: + InvalidProxy: If ``proxy`` isn't a valid proxy. + + """ + parsed = urllib.parse.urlparse(proxy) + if parsed.scheme not in ["socks5h", "socks5", "socks4a", "socks4", "https", "http"]: + raise InvalidProxy(proxy, f"scheme {parsed.scheme} isn't supported") + if parsed.hostname is None: + raise InvalidProxy(proxy, "hostname isn't provided") + if parsed.path not in ["", "/"]: + raise InvalidProxy(proxy, "path is meaningless") + if parsed.query != "": + raise InvalidProxy(proxy, "query is meaningless") + if parsed.fragment != "": + raise InvalidProxy(proxy, "fragment is meaningless") + + scheme = parsed.scheme + host = parsed.hostname + port = parsed.port or (443 if parsed.scheme == "https" else 80) + username = parsed.username + password = parsed.password + # urllib.parse.urlparse accepts URLs with a username but without a + # password. This doesn't make sense for HTTP Basic Auth credentials. + if username is not None and password is None: + raise InvalidProxy(proxy, "username provided without password") + + try: + proxy.encode("ascii") + except UnicodeEncodeError: + # Input contains non-ASCII characters. + # It must be an IRI. Convert it to a URI. + host = host.encode("idna").decode() + if username is not None: + assert password is not None + username = urllib.parse.quote(username, safe=DELIMS) + password = urllib.parse.quote(password, safe=DELIMS) + + return Proxy(scheme, host, port, username, password) + + +def get_proxy(uri: WebSocketURI) -> str | None: + """ + Return the proxy to use for connecting to the given WebSocket URI, if any. + + """ + if urllib.request.proxy_bypass(f"{uri.host}:{uri.port}"): + return None + + # According to the _Proxy Usage_ section of RFC 6455, use a SOCKS5 proxy if + # available, else favor the proxy for HTTPS connections over the proxy for + # HTTP connections. + + # The priority of a proxy for WebSocket connections is unspecified. We give + # it the highest priority. This makes it easy to configure a specific proxy + # for websockets. + + # getproxies() may return SOCKS proxies as {"socks": "http://host:port"} or + # as {"https": "socks5h://host:port"} depending on whether they're declared + # in the operating system or in environment variables. + + proxies = urllib.request.getproxies() + if uri.secure: + schemes = ["wss", "socks", "https"] + else: + schemes = ["ws", "socks", "https", "http"] + + for scheme in schemes: + proxy = proxies.get(scheme) + if proxy is not None: + if scheme == "socks" and proxy.startswith("http://"): + proxy = "socks5h://" + proxy[7:] + return proxy + else: + return None diff --git a/tests/test_exceptions.py b/tests/test_exceptions.py index e0518b0e0..8b437ab5e 100644 --- a/tests/test_exceptions.py +++ b/tests/test_exceptions.py @@ -83,6 +83,10 @@ def test_str(self): InvalidURI("|", "not at all!"), "| isn't a valid URI: not at all!", ), + ( + InvalidProxy("|", "not at all!"), + "| isn't a valid proxy: not at all!", + ), ( InvalidHandshake("invalid request"), "invalid request", @@ -99,6 +103,14 @@ def test_str(self): InvalidStatus(Response(401, "Unauthorized", Headers())), "server rejected WebSocket connection: HTTP 401", ), + ( + InvalidProxyMessage("malformed HTTP message"), + "malformed HTTP message", + ), + ( + InvalidProxyStatus(Response(401, "Unauthorized", Headers())), + "proxy rejected connection: HTTP 401", + ), ( InvalidHeader("Name"), "missing Name header", diff --git a/tests/test_uri.py b/tests/test_uri.py index 8acc01c18..35b51fa58 100644 --- a/tests/test_uri.py +++ b/tests/test_uri.py @@ -1,7 +1,10 @@ import unittest -from websockets.exceptions import InvalidURI +from websockets.exceptions import InvalidProxy, InvalidURI from websockets.uri import * +from websockets.uri import Proxy, get_proxy, parse_proxy + +from .utils import patch_environ VALID_URIS = [ @@ -59,38 +62,199 @@ "ws:///path", ] -RESOURCE_NAMES = [ +URIS_WITH_RESOURCE_NAMES = [ ("ws://localhost/", "/"), ("ws://localhost", "/"), ("ws://localhost/path?query", "/path?query"), ("ws://høst/πass?qùéry", "/%CF%80ass?q%C3%B9%C3%A9ry"), ] -USER_INFOS = [ +URIS_WITH_USER_INFO = [ ("ws://localhost/", None), ("ws://user:pass@localhost/", ("user", "pass")), ("ws://üser:påss@høst/", ("%C3%BCser", "p%C3%A5ss")), ] +VALID_PROXIES = [ + ( + "http://proxy:8080", + Proxy("http", "proxy", 8080, None, None), + ), + ( + "https://proxy:8080", + Proxy("https", "proxy", 8080, None, None), + ), + ( + "http://proxy", + Proxy("http", "proxy", 80, None, None), + ), + ( + "http://proxy:8080/", + Proxy("http", "proxy", 8080, None, None), + ), + ( + "http://PROXY:8080", + Proxy("http", "proxy", 8080, None, None), + ), + ( + "http://user:pass@proxy:8080", + Proxy("http", "proxy", 8080, "user", "pass"), + ), + ( + "http://høst:8080/", + Proxy("http", "xn--hst-0na", 8080, None, None), + ), + ( + "http://üser:påss@høst:8080", + Proxy("http", "xn--hst-0na", 8080, "%C3%BCser", "p%C3%A5ss"), + ), +] + +INVALID_PROXIES = [ + "ws://proxy:8080", + "wss://proxy:8080", + "http://proxy:8080/path", + "http://proxy:8080/?query", + "http://proxy:8080/#fragment", + "http://user@proxy", + "http:///", +] + +PROXIES_WITH_USER_INFO = [ + ("http://proxy", None), + ("http://user:pass@proxy", ("user", "pass")), + ("http://üser:påss@høst", ("%C3%BCser", "p%C3%A5ss")), +] + +PROXY_ENVS = [ + ( + {"ws_proxy": "http://proxy:8080"}, + "ws://example.com/", + "http://proxy:8080", + ), + ( + {"ws_proxy": "http://proxy:8080"}, + "wss://example.com/", + None, + ), + ( + {"wss_proxy": "http://proxy:8080"}, + "ws://example.com/", + None, + ), + ( + {"wss_proxy": "http://proxy:8080"}, + "wss://example.com/", + "http://proxy:8080", + ), + ( + {"http_proxy": "http://proxy:8080"}, + "ws://example.com/", + "http://proxy:8080", + ), + ( + {"http_proxy": "http://proxy:8080"}, + "wss://example.com/", + None, + ), + ( + {"https_proxy": "http://proxy:8080"}, + "ws://example.com/", + "http://proxy:8080", + ), + ( + {"https_proxy": "http://proxy:8080"}, + "wss://example.com/", + "http://proxy:8080", + ), + ( + {"socks_proxy": "http://proxy:1080"}, + "ws://example.com/", + "socks5h://proxy:1080", + ), + ( + {"socks_proxy": "http://proxy:1080"}, + "wss://example.com/", + "socks5h://proxy:1080", + ), + ( + {"ws_proxy": "http://proxy1:8080", "wss_proxy": "http://proxy2:8080"}, + "ws://example.com/", + "http://proxy1:8080", + ), + ( + {"ws_proxy": "http://proxy1:8080", "wss_proxy": "http://proxy2:8080"}, + "wss://example.com/", + "http://proxy2:8080", + ), + ( + {"http_proxy": "http://proxy1:8080", "https_proxy": "http://proxy2:8080"}, + "ws://example.com/", + "http://proxy2:8080", + ), + ( + {"http_proxy": "http://proxy1:8080", "https_proxy": "http://proxy2:8080"}, + "wss://example.com/", + "http://proxy2:8080", + ), + ( + {"https_proxy": "http://proxy:8080", "socks_proxy": "http://proxy:1080"}, + "ws://example.com/", + "socks5h://proxy:1080", + ), + ( + {"https_proxy": "http://proxy:8080", "socks_proxy": "http://proxy:1080"}, + "wss://example.com/", + "socks5h://proxy:1080", + ), + ( + {"socks_proxy": "http://proxy:1080", "no_proxy": ".local"}, + "ws://example.local/", + None, + ), +] + class URITests(unittest.TestCase): - def test_success(self): + def test_parse_valid_uris(self): for uri, parsed in VALID_URIS: with self.subTest(uri=uri): self.assertEqual(parse_uri(uri), parsed) - def test_error(self): + def test_parse_invalid_uris(self): for uri in INVALID_URIS: with self.subTest(uri=uri): with self.assertRaises(InvalidURI): parse_uri(uri) - def test_resource_name(self): - for uri, resource_name in RESOURCE_NAMES: + def test_parse_resource_name(self): + for uri, resource_name in URIS_WITH_RESOURCE_NAMES: with self.subTest(uri=uri): self.assertEqual(parse_uri(uri).resource_name, resource_name) - def test_user_info(self): - for uri, user_info in USER_INFOS: + def test_parse_user_info(self): + for uri, user_info in URIS_WITH_USER_INFO: with self.subTest(uri=uri): self.assertEqual(parse_uri(uri).user_info, user_info) + + def test_parse_valid_proxies(self): + for proxy, parsed in VALID_PROXIES: + with self.subTest(proxy=proxy): + self.assertEqual(parse_proxy(proxy), parsed) + + def test_parse_invalid_proxies(self): + for proxy in INVALID_PROXIES: + with self.subTest(proxy=proxy): + with self.assertRaises(InvalidProxy): + parse_proxy(proxy) + + def test_parse_proxy_user_info(self): + for proxy, user_info in PROXIES_WITH_USER_INFO: + with self.subTest(proxy=proxy): + self.assertEqual(parse_proxy(proxy).user_info, user_info) + + def test_get_proxy(self): + for environ, uri, proxy in PROXY_ENVS: + with patch_environ(environ): + with self.subTest(environ=environ, uri=uri): + self.assertEqual(get_proxy(parse_uri(uri)), proxy) diff --git a/tests/utils.py b/tests/utils.py index 77d020726..f68a447b1 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -139,6 +139,22 @@ def assertNoLogs(self, logger=None, level=None): self.assertEqual(logs.output, [f"{level_name}:{logger}:dummy"]) +@contextlib.contextmanager +def patch_environ(environ): + backup = {} + for key, value in environ.items(): + backup[key] = os.environ.get(key) + os.environ[key] = value + try: + yield + finally: + for key, value in backup.items(): + if value is None: + del os.environ[key] + else: # pragma: no cover + os.environ[key] = value + + @contextlib.contextmanager def temp_unix_socket_path(): with tempfile.TemporaryDirectory() as temp_dir: From fac7f977082ea412ca9a05099ce658be4a9c2a8c Mon Sep 17 00:00:00 2001 From: Aymeric Augustin Date: Sat, 25 Jan 2025 17:18:27 +0100 Subject: [PATCH 2/2] Add support for SOCKS proxies. Fix #475. --- docs/project/changelog.rst | 19 ++++++- docs/reference/features.rst | 3 +- docs/topics/index.rst | 1 + docs/topics/proxies.rst | 66 +++++++++++++++++++++++ src/websockets/asyncio/client.py | 64 +++++++++++++++++++++-- src/websockets/sync/client.py | 68 ++++++++++++++++++++++-- src/websockets/version.py | 2 +- tests/asyncio/test_client.py | 83 ++++++++++++++++++++++++++++- tests/proxy.py | 89 ++++++++++++++++++++++++++++++++ tests/requirements.txt | 2 + tests/sync/test_client.py | 80 +++++++++++++++++++++++++++- tox.ini | 21 ++++++-- 12 files changed, 479 insertions(+), 19 deletions(-) create mode 100644 docs/topics/proxies.rst create mode 100644 tests/proxy.py create mode 100644 tests/requirements.txt diff --git a/docs/project/changelog.rst b/docs/project/changelog.rst index 7f341d942..2a429b43e 100644 --- a/docs/project/changelog.rst +++ b/docs/project/changelog.rst @@ -25,13 +25,28 @@ fixing regressions shortly after a release. Only documented APIs are public. Undocumented, private APIs may change without notice. -.. _14.3: +.. _15.0: -14.3 +15.0 ---- *In development* +Backwards-incompatible changes +.............................. + +.. admonition:: Client connections use SOCKS proxies automatically. + :class: important + + If a proxy is configured in the operating system or with an environment + variable, websockets uses it automatically when connecting to a server. + This feature requires installing the third-party library `python-socks`_. + + If you want to disable the proxy, add ``proxy=None`` when calling + :func:`~asyncio.client.connect`. See :doc:`../topics/proxies` for details. + + .. _python-socks: https://github.com/romis2012/python-socks + New features ............ diff --git a/docs/reference/features.rst b/docs/reference/features.rst index 6ba42f66b..eaecd02a9 100644 --- a/docs/reference/features.rst +++ b/docs/reference/features.rst @@ -168,11 +168,10 @@ Client +------------------------------------+--------+--------+--------+--------+ | Connect via HTTP proxy (`#364`_) | ❌ | ❌ | — | ❌ | +------------------------------------+--------+--------+--------+--------+ - | Connect via SOCKS5 proxy (`#475`_) | ❌ | ❌ | — | ❌ | + | Connect via SOCKS5 proxy | ✅ | ✅ | — | ❌ | +------------------------------------+--------+--------+--------+--------+ .. _#364: https://github.com/python-websockets/websockets/issues/364 -.. _#475: https://github.com/python-websockets/websockets/issues/475 .. _#784: https://github.com/python-websockets/websockets/issues/784 Known limitations diff --git a/docs/topics/index.rst b/docs/topics/index.rst index 616753c6c..a08d487c9 100644 --- a/docs/topics/index.rst +++ b/docs/topics/index.rst @@ -15,3 +15,4 @@ Get a deeper understanding of how websockets is built and why. memory security performance + proxies diff --git a/docs/topics/proxies.rst b/docs/topics/proxies.rst new file mode 100644 index 000000000..fd3ae78b6 --- /dev/null +++ b/docs/topics/proxies.rst @@ -0,0 +1,66 @@ +Proxies +======= + +.. currentmodule:: websockets + +If a proxy is configured in the operating system or with an environment +variable, websockets uses it automatically when connecting to a server. + +Configuration +------------- + +First, if the server is in the proxy bypass list of the operating system or in +the ``no_proxy`` environment variable, websockets connects directly. + +Then, it looks for a proxy in the following locations: + +1. The ``wss_proxy`` or ``ws_proxy`` environment variables for ``wss://`` and + ``ws://`` connections respectively. They allow configuring a specific proxy + for WebSocket connections. +2. A SOCKS proxy configured in the operating system. +3. An HTTP proxy configured in the operating system or in the ``https_proxy`` + environment variable, for both ``wss://`` and ``ws://`` connections. +4. An HTTP proxy configured in the operating system or in the ``http_proxy`` + environment variable, only for ``ws://`` connections. + +Finally, if no proxy is found, websockets connects directly. + +While environment variables are case-insensitive, the lower-case spelling is the +most common, for `historical reasons`_, and recommended. + +.. _historical reasons: https://unix.stackexchange.com/questions/212894/ + +.. admonition:: Any environment variable can configure a SOCKS proxy or an HTTP proxy. + :class: tip + + For example, ``https_proxy=socks5h://proxy:1080/`` configures a SOCKS proxy + for all WebSocket connections. Likewise, ``wss_proxy=http://proxy:8080/`` + configures an HTTP proxy only for ``wss://`` connections. + +.. admonition:: What if websockets doesn't select the right proxy? + :class: hint + + websockets relies on :func:`~urllib.request.getproxies()` to read the proxy + configuration. Check that it returns what you expect. If it doesn't, review + your proxy configuration. + +You can override the default configuration and configure a proxy explicitly with +the ``proxy`` argument of :func:`~asyncio.client.connect`. Set ``proxy=None`` to +disable the proxy. + +SOCKS proxies +------------- + +Connecting through a SOCKS proxy requires installing the third-party library +`python-socks`_:: + + $ pip install python-socks\[asyncio\] + +.. _python-socks: https://github.com/romis2012/python-socks + +python-socks supports SOCKS4, SOCKS4a, SOCKS5, and SOCKS5h. The protocol version +is configured in the address of the proxy e.g. ``socks5h://proxy:1080/``. When a +SOCKS proxy is configured in the operating system, python-socks uses SOCKS5h. + +python-socks supports username/password authentication for SOCKS5 (:rfc:`1929`) +but does not support other authentication methods such as GSSAPI (:rfc:`1961`). diff --git a/src/websockets/asyncio/client.py b/src/websockets/asyncio/client.py index bde0beeea..f76095ead 100644 --- a/src/websockets/asyncio/client.py +++ b/src/websockets/asyncio/client.py @@ -7,7 +7,7 @@ import urllib.parse from collections.abc import AsyncIterator, Generator, Sequence from types import TracebackType -from typing import Any, Callable +from typing import Any, Callable, Literal from ..client import ClientProtocol, backoff from ..datastructures import HeadersLike @@ -18,7 +18,7 @@ from ..http11 import USER_AGENT, Response from ..protocol import CONNECTING, Event from ..typing import LoggerLike, Origin, Subprotocol -from ..uri import WebSocketURI, parse_uri +from ..uri import Proxy, WebSocketURI, get_proxy, parse_proxy, parse_uri from .compatibility import TimeoutError, asyncio_timeout from .connection import Connection @@ -208,6 +208,10 @@ class connect: user_agent_header: Value of the ``User-Agent`` request header. It defaults to ``"Python/x.y.z websockets/X.Y"``. Setting it to :obj:`None` removes the header. + proxy: If a proxy is configured, it is used by default. Set ``proxy`` + to :obj:`None` to disable the proxy or to the address of a proxy + to override the system configuration. See the :doc:`proxy docs + <../../topics/proxies>` for details. process_exception: When reconnecting automatically, tell whether an error is transient or fatal. The default behavior is defined by :func:`process_exception`. Refer to its documentation for details. @@ -279,6 +283,7 @@ def __init__( # HTTP additional_headers: HeadersLike | None = None, user_agent_header: str | None = USER_AGENT, + proxy: str | Literal[True] | None = True, process_exception: Callable[[Exception], Exception | None] = process_exception, # Timeouts open_timeout: float | None = 10, @@ -333,6 +338,7 @@ def protocol_factory(uri: WebSocketURI) -> ClientConnection: ) return connection + self.proxy = proxy self.protocol_factory = protocol_factory self.handshake_args = ( additional_headers, @@ -346,9 +352,20 @@ def protocol_factory(uri: WebSocketURI) -> ClientConnection: async def create_connection(self) -> ClientConnection: """Create TCP or Unix connection.""" loop = asyncio.get_running_loop() + kwargs = self.connection_kwargs.copy() ws_uri = parse_uri(self.uri) - kwargs = self.connection_kwargs.copy() + + proxy = self.proxy + proxy_uri: Proxy | None = None + if kwargs.get("unix", False): + proxy = None + if kwargs.get("sock") is not None: + proxy = None + if proxy is True: + proxy = get_proxy(ws_uri) + if proxy is not None: + proxy_uri = parse_proxy(proxy) def factory() -> ClientConnection: return self.protocol_factory(ws_uri) @@ -365,6 +382,47 @@ def factory() -> ClientConnection: if kwargs.pop("unix", False): _, connection = await loop.create_unix_connection(factory, **kwargs) else: + if proxy_uri is not None: + if proxy_uri.scheme[:5] == "socks": + try: + from python_socks import ProxyType + from python_socks.async_.asyncio import Proxy + except ImportError: + raise ImportError( + "python-socks is required to use a SOCKS proxy" + ) + if proxy_uri.scheme == "socks5h": + proxy_type = ProxyType.SOCKS5 + rdns = True + elif proxy_uri.scheme == "socks5": + proxy_type = ProxyType.SOCKS5 + rdns = False + # We use mitmproxy for testing and it doesn't support SOCKS4. + elif proxy_uri.scheme == "socks4a": # pragma: no cover + proxy_type = ProxyType.SOCKS4 + rdns = True + elif proxy_uri.scheme == "socks4": # pragma: no cover + proxy_type = ProxyType.SOCKS4 + rdns = False + # Proxy types are enforced in parse_proxy(). + else: + raise AssertionError("unsupported SOCKS proxy") + socks_proxy = Proxy( + proxy_type, + proxy_uri.host, + proxy_uri.port, + proxy_uri.username, + proxy_uri.password, + rdns, + ) + kwargs["sock"] = await socks_proxy.connect( + ws_uri.host, + ws_uri.port, + local_addr=kwargs.pop("local_addr", None), + ) + # Proxy types are enforced in parse_proxy(). + else: + raise AssertionError("unsupported proxy") if kwargs.get("sock") is None: kwargs.setdefault("host", ws_uri.host) kwargs.setdefault("port", ws_uri.port) diff --git a/src/websockets/sync/client.py b/src/websockets/sync/client.py index da2b88591..96f62edab 100644 --- a/src/websockets/sync/client.py +++ b/src/websockets/sync/client.py @@ -5,7 +5,7 @@ import threading import warnings from collections.abc import Sequence -from typing import Any +from typing import Any, Literal from ..client import ClientProtocol from ..datastructures import HeadersLike @@ -15,7 +15,7 @@ from ..http11 import USER_AGENT, Response from ..protocol import CONNECTING, Event from ..typing import LoggerLike, Origin, Subprotocol -from ..uri import parse_uri +from ..uri import Proxy, get_proxy, parse_proxy, parse_uri from .connection import Connection from .utils import Deadline @@ -139,6 +139,7 @@ def connect( # HTTP additional_headers: HeadersLike | None = None, user_agent_header: str | None = USER_AGENT, + proxy: str | Literal[True] | None = True, # Timeouts open_timeout: float | None = 10, ping_interval: float | None = 20, @@ -189,6 +190,10 @@ def connect( user_agent_header: Value of the ``User-Agent`` request header. It defaults to ``"Python/x.y.z websockets/X.Y"``. Setting it to :obj:`None` removes the header. + proxy: If a proxy is configured, it is used by default. Set ``proxy`` + to :obj:`None` to disable the proxy or to the address of a proxy + to override the system configuration. See the :doc:`proxy docs + <../../topics/proxies>` for details. open_timeout: Timeout for opening the connection in seconds. :obj:`None` disables the timeout. ping_interval: Interval between keepalive pings in seconds. @@ -253,6 +258,16 @@ def connect( elif compression is not None: raise ValueError(f"unsupported compression: {compression}") + proxy_uri: Proxy | None = None + if unix: + proxy = None + if sock is not None: + proxy = None + if proxy is True: + proxy = get_proxy(ws_uri) + if proxy is not None: + proxy_uri = parse_proxy(proxy) + # Calculate timeouts on the TCP, TLS, and WebSocket handshakes. # The TCP and TLS timeouts must be set on the socket, then removed # to avoid conflicting with the WebSocket timeout in handshake(). @@ -271,8 +286,53 @@ def connect( assert path is not None # mypy cannot figure this out sock.connect(path) else: - kwargs.setdefault("timeout", deadline.timeout()) - sock = socket.create_connection((ws_uri.host, ws_uri.port), **kwargs) + if proxy_uri is not None: + if proxy_uri.scheme[:5] == "socks": + try: + from python_socks import ProxyType + from python_socks.sync import Proxy + except ImportError: + raise ImportError( + "python-socks is required to use a SOCKS proxy" + ) + if proxy_uri.scheme == "socks5h": + proxy_type = ProxyType.SOCKS5 + rdns = True + elif proxy_uri.scheme == "socks5": + proxy_type = ProxyType.SOCKS5 + rdns = False + # We use mitmproxy for testing and it doesn't support SOCKS4. + elif proxy_uri.scheme == "socks4a": # pragma: no cover + proxy_type = ProxyType.SOCKS4 + rdns = True + elif proxy_uri.scheme == "socks4": # pragma: no cover + proxy_type = ProxyType.SOCKS4 + rdns = False + # Proxy types are enforced in parse_proxy(). + else: + raise AssertionError("unsupported SOCKS proxy") + socks_proxy = Proxy( + proxy_type, + proxy_uri.host, + proxy_uri.port, + proxy_uri.username, + proxy_uri.password, + rdns, + ) + sock = socks_proxy.connect( + ws_uri.host, + ws_uri.port, + timeout=deadline.timeout(), + local_addr=kwargs.pop("local_addr", None), + ) + # Proxy types are enforced in parse_proxy(). + else: + raise AssertionError("unsupported proxy") + else: + kwargs.setdefault("timeout", deadline.timeout()) + sock = socket.create_connection( + (ws_uri.host, ws_uri.port), **kwargs + ) sock.settimeout(None) # Disable Nagle algorithm diff --git a/src/websockets/version.py b/src/websockets/version.py index ca9a9115b..611e7d238 100644 --- a/src/websockets/version.py +++ b/src/websockets/version.py @@ -20,7 +20,7 @@ released = False -tag = version = commit = "14.3" +tag = version = commit = "15.0" if not released: # pragma: no cover diff --git a/tests/asyncio/test_client.py b/tests/asyncio/test_client.py index f05bfc699..cb2b8ede6 100644 --- a/tests/asyncio/test_client.py +++ b/tests/asyncio/test_client.py @@ -4,6 +4,7 @@ import logging import socket import ssl +import sys import unittest from websockets.asyncio.client import * @@ -13,13 +14,21 @@ from websockets.exceptions import ( InvalidHandshake, InvalidMessage, + InvalidProxy, InvalidStatus, InvalidURI, SecurityError, ) from websockets.extensions.permessage_deflate import PerMessageDeflate -from ..utils import CLIENT_CONTEXT, MS, SERVER_CONTEXT, temp_unix_socket_path +from ..proxy import async_proxy +from ..utils import ( + CLIENT_CONTEXT, + MS, + SERVER_CONTEXT, + patch_environ, + temp_unix_socket_path, +) from .server import args, get_host_port, get_uri, handler @@ -555,6 +564,78 @@ def redirect(connection, request): ) +@unittest.skipUnless("mitmproxy" in sys.modules, "mitmproxy not installed") +class ProxyClientTests(unittest.IsolatedAsyncioTestCase): + @contextlib.asynccontextmanager + async def socks_proxy(self, auth=None): + if auth: + proxyauth = "hello:iloveyou" + proxy_uri = "http://hello:iloveyou@localhost:1080" + else: + proxyauth = None + proxy_uri = "http://localhost:1080" + async with async_proxy(mode=["socks5"], proxyauth=proxyauth) as record_flows: + with patch_environ({"socks_proxy": proxy_uri}): + yield record_flows + + async def test_socks_proxy(self): + """Client connects to server through a SOCKS5 proxy.""" + async with self.socks_proxy() as proxy: + async with serve(*args) as server: + async with connect(get_uri(server)) as client: + self.assertEqual(client.protocol.state.name, "OPEN") + self.assertEqual(len(proxy.get_flows()), 1) + + async def test_secure_socks_proxy(self): + """Client connects to server securely through a SOCKS5 proxy.""" + async with self.socks_proxy() as proxy: + async with serve(*args, ssl=SERVER_CONTEXT) as server: + async with connect(get_uri(server), ssl=CLIENT_CONTEXT) as client: + self.assertEqual(client.protocol.state.name, "OPEN") + self.assertEqual(len(proxy.get_flows()), 1) + + async def test_authenticated_socks_proxy(self): + """Client connects to server through an authenticated SOCKS5 proxy.""" + async with self.socks_proxy(auth=True) as proxy: + async with serve(*args) as server: + async with connect(get_uri(server)) as client: + self.assertEqual(client.protocol.state.name, "OPEN") + self.assertEqual(len(proxy.get_flows()), 1) + + async def test_explicit_proxy(self): + """Client connects to server through a proxy set explicitly.""" + async with async_proxy(mode=["socks5"]) as proxy: + async with serve(*args) as server: + async with connect( + get_uri(server), + # Take this opportunity to test socks5 instead of socks5h. + proxy="socks5://localhost:1080", + ) as client: + self.assertEqual(client.protocol.state.name, "OPEN") + self.assertEqual(len(proxy.get_flows()), 1) + + async def test_ignore_proxy_with_existing_socket(self): + """Client connects using a pre-existing socket.""" + async with self.socks_proxy() as proxy: + async with serve(*args) as server: + with socket.create_connection(get_host_port(server)) as sock: + # Use a non-existing domain to ensure we connect to sock. + async with connect("ws://invalid/", sock=sock) as client: + self.assertEqual(client.protocol.state.name, "OPEN") + self.assertEqual(len(proxy.get_flows()), 0) + + async def test_unsupported_proxy(self): + """Client connects to server through an unsupported proxy.""" + with patch_environ({"ws_proxy": "other://localhost:1080"}): + with self.assertRaises(InvalidProxy) as raised: + async with connect("ws://example.com/"): + self.fail("did not raise") + self.assertEqual( + str(raised.exception), + "other://localhost:1080 isn't a valid proxy: scheme other isn't supported", + ) + + @unittest.skipUnless(hasattr(socket, "AF_UNIX"), "this test requires Unix sockets") class UnixClientTests(unittest.IsolatedAsyncioTestCase): async def test_connection(self): diff --git a/tests/proxy.py b/tests/proxy.py new file mode 100644 index 000000000..95525a360 --- /dev/null +++ b/tests/proxy.py @@ -0,0 +1,89 @@ +import asyncio +import contextlib +import pathlib +import threading +import warnings + + +warnings.filterwarnings("ignore", category=DeprecationWarning, module="mitmproxy") +warnings.filterwarnings("ignore", category=DeprecationWarning, module="passlib") +warnings.filterwarnings("ignore", category=DeprecationWarning, module="pyasn1") + +try: + from mitmproxy.addons import core, next_layer, proxyauth, proxyserver, tlsconfig + from mitmproxy.master import Master + from mitmproxy.options import Options +except ImportError: + pass + + +class RecordFlows: + def __init__(self): + self.ready = asyncio.get_running_loop().create_future() + self.flows = [] + + def running(self): + self.ready.set_result(None) + + def websocket_start(self, flow): + self.flows.append(flow) + + def get_flows(self): + flows, self.flows[:] = self.flows[:], [] + return flows + + +@contextlib.asynccontextmanager +async def async_proxy(mode, **config): + options = Options(mode=mode) + master = Master(options) + record_flows = RecordFlows() + master.addons.add( + core.Core(), + proxyauth.ProxyAuth(), + proxyserver.Proxyserver(), + next_layer.NextLayer(), + tlsconfig.TlsConfig(), + record_flows, + ) + config.update( + # Use our test certificate for TLS between client and proxy + # and disable TLS verification between proxy and upstream. + certs=[str(pathlib.Path(__file__).with_name("test_localhost.pem"))], + ssl_insecure=True, + ) + options.update(**config) + + asyncio.create_task(master.run()) + try: + await record_flows.ready + yield record_flows + finally: + for server in master.addons.get("proxyserver").servers: + await server.stop() + master.shutdown() + + +@contextlib.contextmanager +def sync_proxy(mode, **config): + loop = None + test_done = None + proxy_ready = threading.Event() + record_flows = None + + async def proxy_coroutine(): + nonlocal loop, test_done, proxy_ready, record_flows + loop = asyncio.get_running_loop() + test_done = loop.create_future() + async with async_proxy(mode, **config) as record_flows: + proxy_ready.set() + await test_done + + proxy_thread = threading.Thread(target=asyncio.run, args=(proxy_coroutine(),)) + proxy_thread.start() + try: + proxy_ready.wait() + yield record_flows + finally: + loop.call_soon_threadsafe(test_done.set_result, None) + proxy_thread.join() diff --git a/tests/requirements.txt b/tests/requirements.txt new file mode 100644 index 000000000..f375e6f69 --- /dev/null +++ b/tests/requirements.txt @@ -0,0 +1,2 @@ +python-socks[asyncio] +mitmproxy diff --git a/tests/sync/test_client.py b/tests/sync/test_client.py index 736a84c98..2f62dd34d 100644 --- a/tests/sync/test_client.py +++ b/tests/sync/test_client.py @@ -1,8 +1,10 @@ +import contextlib import http import logging import socket import socketserver import ssl +import sys import threading import time import unittest @@ -10,17 +12,20 @@ from websockets.exceptions import ( InvalidHandshake, InvalidMessage, + InvalidProxy, InvalidStatus, InvalidURI, ) from websockets.extensions.permessage_deflate import PerMessageDeflate from websockets.sync.client import * +from ..proxy import sync_proxy from ..utils import ( CLIENT_CONTEXT, MS, SERVER_CONTEXT, DeprecationTestCase, + patch_environ, temp_unix_socket_path, ) from .server import get_uri, run_server, run_unix_server @@ -37,7 +42,7 @@ def test_existing_socket(self): """Client connects using a pre-existing socket.""" with run_server() as server: with socket.create_connection(server.socket.getsockname()) as sock: - # Use a non-existing domain to ensure we connect to the right socket. + # Use a non-existing domain to ensure we connect to sock. with connect("ws://invalid/", sock=sock) as client: self.assertEqual(client.protocol.state.name, "OPEN") @@ -300,6 +305,79 @@ def test_reject_invalid_server_hostname(self): ) +@unittest.skipUnless("mitmproxy" in sys.modules, "mitmproxy not installed") +class ProxyClientTests(unittest.TestCase): + @contextlib.contextmanager + def socks_proxy(self, auth=None): + if auth: + proxyauth = "hello:iloveyou" + proxy_uri = "http://hello:iloveyou@localhost:1080" + else: + proxyauth = None + proxy_uri = "http://localhost:1080" + + with sync_proxy(mode=["socks5"], proxyauth=proxyauth) as record_flows: + with patch_environ({"socks_proxy": proxy_uri}): + yield record_flows + + def test_socks_proxy(self): + """Client connects to server through a SOCKS5 proxy.""" + with self.socks_proxy() as proxy: + with run_server() as server: + with connect(get_uri(server)) as client: + self.assertEqual(client.protocol.state.name, "OPEN") + self.assertEqual(len(proxy.get_flows()), 1) + + def test_secure_socks_proxy(self): + """Client connects to server securely through a SOCKS5 proxy.""" + with self.socks_proxy() as proxy: + with run_server(ssl=SERVER_CONTEXT) as server: + with connect(get_uri(server), ssl=CLIENT_CONTEXT) as client: + self.assertEqual(client.protocol.state.name, "OPEN") + self.assertEqual(len(proxy.get_flows()), 1) + + def test_authenticated_socks_proxy(self): + """Client connects to server through an authenticated SOCKS5 proxy.""" + with self.socks_proxy(auth=True) as proxy: + with run_server() as server: + with connect(get_uri(server)) as client: + self.assertEqual(client.protocol.state.name, "OPEN") + self.assertEqual(len(proxy.get_flows()), 1) + + def test_explicit_proxy(self): + """Client connects to server through a proxy set explicitly.""" + with sync_proxy(mode=["socks5"]) as proxy: + with run_server() as server: + with connect( + get_uri(server), + # Take this opportunity to test socks5 instead of socks5h. + proxy="socks5://localhost:1080", + ) as client: + self.assertEqual(client.protocol.state.name, "OPEN") + self.assertEqual(len(proxy.get_flows()), 1) + + def test_ignore_proxy_with_existing_socket(self): + """Client connects using a pre-existing socket.""" + with self.socks_proxy() as proxy: + with run_server() as server: + with socket.create_connection(server.socket.getsockname()) as sock: + # Use a non-existing domain to ensure we connect to sock. + with connect("ws://invalid/", sock=sock) as client: + self.assertEqual(client.protocol.state.name, "OPEN") + self.assertEqual(len(proxy.get_flows()), 0) + + def test_unsupported_proxy(self): + """Client connects to server through an unsupported proxy.""" + with patch_environ({"ws_proxy": "other://localhost:1080"}): + with self.assertRaises(InvalidProxy) as raised: + with connect("ws://example.com/"): + self.fail("did not raise") + self.assertEqual( + str(raised.exception), + "other://localhost:1080 isn't a valid proxy: scheme other isn't supported", + ) + + @unittest.skipUnless(hasattr(socket, "AF_UNIX"), "this test requires Unix sockets") class UnixClientTests(unittest.TestCase): def test_connection(self): diff --git a/tox.ini b/tox.ini index 0bcec5ded..f5a2f5d3c 100644 --- a/tox.ini +++ b/tox.ini @@ -12,27 +12,38 @@ env_list = [testenv] commands = python -W error::DeprecationWarning -W error::PendingDeprecationWarning -m unittest {posargs} -pass_env = WEBSOCKETS_* +pass_env = + WEBSOCKETS_* +deps = + mitmproxy + python-socks[asyncio] [testenv:coverage] commands = python -m coverage run --source {envsitepackagesdir}/websockets,tests -m unittest {posargs} python -m coverage report --show-missing --fail-under=100 -deps = coverage +deps = + coverage + {[testenv]deps} [testenv:maxi_cov] commands = python tests/maxi_cov.py {envsitepackagesdir} python -m coverage report --show-missing --fail-under=100 -deps = coverage +deps = + coverage + {[testenv]deps} [testenv:ruff] commands = ruff format --check src tests ruff check src tests -deps = ruff +deps = + ruff [testenv:mypy] commands = mypy --strict src -deps = mypy +deps = + mypy + python-socks