|
9 | 9 | */
|
10 | 10 | package org.truffleruby.core.regexp;
|
11 | 11 |
|
12 |
| -import java.nio.charset.UnsupportedCharsetException; |
13 |
| - |
14 | 12 | import com.oracle.truffle.api.CompilerDirectives;
|
15 | 13 | import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
|
16 | 14 | import com.oracle.truffle.api.interop.InteropLibrary;
|
@@ -155,12 +153,12 @@ private static Object compileTRegex(RubyContext context, RubyRegexp regexp, bool
|
155 | 153 | var tstring = tstringBuilder.toTString();
|
156 | 154 | try {
|
157 | 155 | processedRegexpSource = TStringUtils.toJavaStringOrThrow(tstring, tstringBuilder.getRubyEncoding());
|
158 |
| - } catch (CannotConvertBinaryRubyStringToJavaString | UnsupportedCharsetException e) { |
159 |
| - // Some strings cannot be converted to Java strings, e.g. strings with the |
160 |
| - // BINARY encoding containing characters higher than 127. |
161 |
| - // Also, some charsets might not be supported on the JVM and therefore |
162 |
| - // a conversion to j.l.String might be impossible. |
163 |
| - return null; |
| 156 | + } catch (CannotConvertBinaryRubyStringToJavaString e) { |
| 157 | + // A BINARY regexp with non-US-ASCII bytes, pass it as "raw bytes" instead. |
| 158 | + // TRegex knows how to interpret those bytes correctly as we pass the encoding name as well. |
| 159 | + var latin1string = tstring.forceEncodingUncached(Encodings.BINARY.tencoding, |
| 160 | + Encodings.ISO_8859_1.tencoding); |
| 161 | + processedRegexpSource = TStringUtils.toJavaStringOrThrow(latin1string, Encodings.ISO_8859_1); |
164 | 162 | }
|
165 | 163 |
|
166 | 164 | String flags = optionsToFlags(regexp.options, atStart);
|
|
0 commit comments