9
9
*/
10
10
package org .truffleruby .core .regexp ;
11
11
12
- import java .nio .charset .UnsupportedCharsetException ;
13
-
14
12
import com .oracle .truffle .api .CompilerDirectives ;
15
13
import com .oracle .truffle .api .CompilerDirectives .TruffleBoundary ;
16
14
import com .oracle .truffle .api .interop .InteropLibrary ;
@@ -139,6 +137,11 @@ public static String toTRegexEncoding(RubyEncoding encoding) {
139
137
140
138
@ TruffleBoundary
141
139
private static Object compileTRegex (RubyContext context , RubyRegexp regexp , boolean atStart , RubyEncoding enc ) {
140
+ String tRegexEncoding = TRegexCache .toTRegexEncoding (enc );
141
+ if (tRegexEncoding == null ) {
142
+ return null ;
143
+ }
144
+
142
145
String processedRegexpSource ;
143
146
RubyEncoding [] fixedEnc = new RubyEncoding []{ null };
144
147
final TStringBuilder tstringBuilder ;
@@ -155,21 +158,16 @@ private static Object compileTRegex(RubyContext context, RubyRegexp regexp, bool
155
158
var tstring = tstringBuilder .toTString ();
156
159
try {
157
160
processedRegexpSource = TStringUtils .toJavaStringOrThrow (tstring , tstringBuilder .getRubyEncoding ());
158
- } catch (CannotConvertBinaryRubyStringToJavaString | UnsupportedCharsetException e ) {
159
- // Some strings cannot be converted to Java strings, e.g. strings with the
160
- // BINARY encoding containing characters higher than 127 .
161
- // Also, some charsets might not be supported on the JVM and therefore
162
- // a conversion to j.l.String might be impossible.
163
- return null ;
161
+ } catch (CannotConvertBinaryRubyStringToJavaString e ) {
162
+ // A BINARY regexp with non-US-ASCII bytes, pass it as "raw bytes" instead.
163
+ // TRegex knows how to interpret those bytes correctly as we pass the encoding name as well .
164
+ var latin1string = tstring . forceEncodingUncached ( Encodings . BINARY . tencoding ,
165
+ Encodings . ISO_8859_1 . tencoding );
166
+ processedRegexpSource = TStringUtils . toJavaStringOrThrow ( latin1string , Encodings . ISO_8859_1 ) ;
164
167
}
165
168
166
169
String flags = optionsToFlags (regexp .options , atStart );
167
170
168
- String tRegexEncoding = TRegexCache .toTRegexEncoding (enc );
169
- if (tRegexEncoding == null ) {
170
- return null ;
171
- }
172
-
173
171
String ignoreAtomicGroups = context .getOptions ().TRUFFLE_REGEX_IGNORE_ATOMIC_GROUPS
174
172
? ",IgnoreAtomicGroups=true"
175
173
: "" ;
0 commit comments