Skip to content

Commit 4e1be22

Browse files
committed
Add helpers to locate proxy for client connections.
1 parent 2d515c8 commit 4e1be22

File tree

7 files changed

+382
-17
lines changed

7 files changed

+382
-17
lines changed

docs/reference/exceptions.rst

+8-2
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,20 @@ also reported by :func:`~websockets.asyncio.server.serve` in logs.
2828

2929
.. autoexception:: InvalidURI
3030

31-
.. autoexception:: InvalidHandshake
31+
.. autoexception:: InvalidProxy
3232

33-
.. autoexception:: InvalidMessage
33+
.. autoexception:: InvalidHandshake
3434

3535
.. autoexception:: SecurityError
3636

37+
.. autoexception:: InvalidMessage
38+
3739
.. autoexception:: InvalidStatus
3840

41+
.. autoexception:: InvalidProxyMessage
42+
43+
.. autoexception:: InvalidProxyStatus
44+
3945
.. autoexception:: InvalidHeader
4046

4147
.. autoexception:: InvalidHeaderFormat

src/websockets/__init__.py

+9
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@
3939
"InvalidOrigin",
4040
"InvalidParameterName",
4141
"InvalidParameterValue",
42+
"InvalidProxy",
43+
"InvalidProxyMessage",
44+
"InvalidProxyStatus",
4245
"InvalidState",
4346
"InvalidStatus",
4447
"InvalidUpgrade",
@@ -99,6 +102,9 @@
99102
InvalidOrigin,
100103
InvalidParameterName,
101104
InvalidParameterValue,
105+
InvalidProxy,
106+
InvalidProxyMessage,
107+
InvalidProxyStatus,
102108
InvalidState,
103109
InvalidStatus,
104110
InvalidUpgrade,
@@ -157,6 +163,9 @@
157163
"InvalidOrigin": ".exceptions",
158164
"InvalidParameterName": ".exceptions",
159165
"InvalidParameterValue": ".exceptions",
166+
"InvalidProxy": ".exceptions",
167+
"InvalidProxyMessage": ".exceptions",
168+
"InvalidProxyStatus": ".exceptions",
160169
"InvalidState": ".exceptions",
161170
"InvalidStatus": ".exceptions",
162171
"InvalidUpgrade": ".exceptions",

src/websockets/exceptions.py

+41-1
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,14 @@
66
* :exc:`ConnectionClosedOK`
77
* :exc:`ConnectionClosedError`
88
* :exc:`InvalidURI`
9+
* :exc:`InvalidProxy`
910
* :exc:`InvalidHandshake`
1011
* :exc:`SecurityError`
1112
* :exc:`InvalidMessage`
1213
* :exc:`InvalidStatus`
1314
* :exc:`InvalidStatusCode` (legacy)
15+
* :exc:`InvalidProxyMessage`
16+
* :exc:`InvalidProxyStatus`
1417
* :exc:`InvalidHeader`
1518
* :exc:`InvalidHeaderFormat`
1619
* :exc:`InvalidHeaderValue`
@@ -42,13 +45,16 @@
4245
"ConnectionClosedOK",
4346
"ConnectionClosedError",
4447
"InvalidURI",
48+
"InvalidProxy",
4549
"InvalidHandshake",
4650
"SecurityError",
51+
"InvalidMessage",
4752
"InvalidStatus",
53+
"InvalidProxyMessage",
54+
"InvalidProxyStatus",
4855
"InvalidHeader",
4956
"InvalidHeaderFormat",
5057
"InvalidHeaderValue",
51-
"InvalidMessage",
5258
"InvalidOrigin",
5359
"InvalidUpgrade",
5460
"NegotiationError",
@@ -169,6 +175,20 @@ def __str__(self) -> str:
169175
return f"{self.uri} isn't a valid URI: {self.msg}"
170176

171177

178+
class InvalidProxy(WebSocketException):
179+
"""
180+
Raised when connecting via a proxy that isn't valid.
181+
182+
"""
183+
184+
def __init__(self, proxy: str, msg: str) -> None:
185+
self.proxy = proxy
186+
self.msg = msg
187+
188+
def __str__(self) -> str:
189+
return f"{self.proxy} isn't a valid proxy: {self.msg}"
190+
191+
172192
class InvalidHandshake(WebSocketException):
173193
"""
174194
Base class for exceptions raised when the opening handshake fails.
@@ -208,6 +228,26 @@ def __str__(self) -> str:
208228
)
209229

210230

231+
class InvalidProxyMessage(InvalidHandshake):
232+
"""
233+
Raised when a proxy response is malformed.
234+
235+
"""
236+
237+
238+
class InvalidProxyStatus(InvalidHandshake):
239+
"""
240+
Raised when a proxy rejects the connection.
241+
242+
"""
243+
244+
def __init__(self, response: http11.Response) -> None:
245+
self.response = response
246+
247+
def __str__(self) -> str:
248+
return f"proxy rejected connection: HTTP {self.response.status_code:d}"
249+
250+
211251
class InvalidHeader(InvalidHandshake):
212252
"""
213253
Raised when an HTTP header doesn't have a valid format or value.

src/websockets/uri.py

+123-5
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,18 @@
22

33
import dataclasses
44
import urllib.parse
5+
import urllib.request
56

6-
from .exceptions import InvalidURI
7+
from .exceptions import InvalidProxy, InvalidURI
78

89

910
__all__ = ["parse_uri", "WebSocketURI"]
1011

1112

13+
# All characters from the gen-delims and sub-delims sets in RFC 3987.
14+
DELIMS = ":/?#[]@!$&'()*+,;="
15+
16+
1217
@dataclasses.dataclass
1318
class WebSocketURI:
1419
"""
@@ -53,10 +58,6 @@ def user_info(self) -> tuple[str, str] | None:
5358
return (self.username, self.password)
5459

5560

56-
# All characters from the gen-delims and sub-delims sets in RFC 3987.
57-
DELIMS = ":/?#[]@!$&'()*+,;="
58-
59-
6061
def parse_uri(uri: str) -> WebSocketURI:
6162
"""
6263
Parse and validate a WebSocket URI.
@@ -105,3 +106,120 @@ def parse_uri(uri: str) -> WebSocketURI:
105106
password = urllib.parse.quote(password, safe=DELIMS)
106107

107108
return WebSocketURI(secure, host, port, path, query, username, password)
109+
110+
111+
@dataclasses.dataclass
112+
class Proxy:
113+
"""
114+
Proxy.
115+
116+
Attributes:
117+
scheme: ``"socks5h"``, ``"socks5"``, ``"socks4a"``, ``"socks4"``,
118+
``"https"``, or ``"http"``.
119+
host: Normalized to lower case.
120+
port: Always set even if it's the default.
121+
username: Available when the proxy address contains `User Information`_.
122+
password: Available when the proxy address contains `User Information`_.
123+
124+
.. _User Information: https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.1
125+
126+
"""
127+
128+
scheme: str
129+
host: str
130+
port: int
131+
username: str | None = None
132+
password: str | None = None
133+
134+
@property
135+
def user_info(self) -> tuple[str, str] | None:
136+
if self.username is None:
137+
return None
138+
assert self.password is not None
139+
return (self.username, self.password)
140+
141+
142+
def parse_proxy(proxy: str) -> Proxy:
143+
"""
144+
Parse and validate a proxy.
145+
146+
Args:
147+
proxy: proxy.
148+
149+
Returns:
150+
Parsed proxy.
151+
152+
Raises:
153+
InvalidProxy: If ``proxy`` isn't a valid proxy.
154+
155+
"""
156+
parsed = urllib.parse.urlparse(proxy)
157+
if parsed.scheme not in ["socks5h", "socks5", "socks4a", "socks4", "https", "http"]:
158+
raise InvalidProxy(proxy, f"scheme {parsed.scheme} isn't supported")
159+
if parsed.hostname is None:
160+
raise InvalidProxy(proxy, "hostname isn't provided")
161+
if parsed.path not in ["", "/"]:
162+
raise InvalidProxy(proxy, "path is meaningless")
163+
if parsed.query != "":
164+
raise InvalidProxy(proxy, "query is meaningless")
165+
if parsed.fragment != "":
166+
raise InvalidProxy(proxy, "fragment is meaningless")
167+
168+
scheme = parsed.scheme
169+
host = parsed.hostname
170+
port = parsed.port or (443 if parsed.scheme == "https" else 80)
171+
username = parsed.username
172+
password = parsed.password
173+
# urllib.parse.urlparse accepts URLs with a username but without a
174+
# password. This doesn't make sense for HTTP Basic Auth credentials.
175+
if username is not None and password is None:
176+
raise InvalidProxy(proxy, "username provided without password")
177+
178+
try:
179+
proxy.encode("ascii")
180+
except UnicodeEncodeError:
181+
# Input contains non-ASCII characters.
182+
# It must be an IRI. Convert it to a URI.
183+
host = host.encode("idna").decode()
184+
if username is not None:
185+
assert password is not None
186+
username = urllib.parse.quote(username, safe=DELIMS)
187+
password = urllib.parse.quote(password, safe=DELIMS)
188+
189+
return Proxy(scheme, host, port, username, password)
190+
191+
192+
def get_proxy(uri: WebSocketURI) -> str | None:
193+
"""
194+
Return the proxy to use for connecting to the given WebSocket URI, if any.
195+
196+
"""
197+
if urllib.request.proxy_bypass(f"{uri.host}:{uri.port}"):
198+
return None
199+
200+
# According to the _Proxy Usage_ section of RFC 6455, use a SOCKS5 proxy if
201+
# available, else favor the proxy for HTTPS connections over the proxy for
202+
# HTTP connections.
203+
204+
# The priority of a proxy for WebSocket connections is unspecified. We give
205+
# it the highest priority. This makes it easy to configure a specific proxy
206+
# for websockets.
207+
208+
# getproxies() may return SOCKS proxies as {"socks": "http://host:port"} or
209+
# as {"https": "socks5h://host:port"} depending on whether they're declared
210+
# in the operating system or in environment variables.
211+
212+
proxies = urllib.request.getproxies()
213+
if uri.secure:
214+
schemes = ["wss", "socks", "https"]
215+
else:
216+
schemes = ["ws", "socks", "https", "http"]
217+
218+
for scheme in schemes:
219+
proxy = proxies.get(scheme)
220+
if proxy is not None:
221+
if scheme == "socks" and proxy.startswith("http://"):
222+
proxy = "socks5h://" + proxy[7:]
223+
return proxy
224+
else:
225+
return None

tests/test_exceptions.py

+12
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,10 @@ def test_str(self):
8383
InvalidURI("|", "not at all!"),
8484
"| isn't a valid URI: not at all!",
8585
),
86+
(
87+
InvalidProxy("|", "not at all!"),
88+
"| isn't a valid proxy: not at all!",
89+
),
8690
(
8791
InvalidHandshake("invalid request"),
8892
"invalid request",
@@ -99,6 +103,14 @@ def test_str(self):
99103
InvalidStatus(Response(401, "Unauthorized", Headers())),
100104
"server rejected WebSocket connection: HTTP 401",
101105
),
106+
(
107+
InvalidProxyMessage("malformed HTTP message"),
108+
"malformed HTTP message",
109+
),
110+
(
111+
InvalidProxyStatus(Response(401, "Unauthorized", Headers())),
112+
"proxy rejected connection: HTTP 401",
113+
),
102114
(
103115
InvalidHeader("Name"),
104116
"missing Name header",

0 commit comments

Comments
 (0)