Skip to content

Commit 5420d51

Browse files
committed
fix(str): remove support for unneeded word boundaries
1 parent 9823128 commit 5420d51

File tree

5 files changed

+45
-9
lines changed

5 files changed

+45
-9
lines changed

CHANGES.md

+2
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ Unreleased
1414
* Fix [Re.Pcre.split]. Regression introduced in 1.12 and a previous bug with
1515
[Re.Pcre.split] (#538).
1616

17+
* Avoid parsing unnecessary patterns supported `Re.Emacs` in `Re.Str` (#..)
18+
1719
1.13.1 (30-Sep-2024)
1820
--------------------
1921

lib/emacs.ml

+13-8
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ let by_code f c c' =
3131
Char.chr (f c c')
3232
;;
3333

34-
let parse s =
34+
let parse ~emacs_only s =
3535
let buf = Parse_buffer.create s in
3636
let accept = Parse_buffer.accept buf in
3737
let eos () = Parse_buffer.eos buf in
@@ -72,19 +72,19 @@ let parse s =
7272
let r = regexp () in
7373
if not (Parse_buffer.accept_s buf {|\)|}) then raise Parse_error;
7474
Re.group r)
75-
else if accept '`'
75+
else if emacs_only && accept '`'
7676
then Re.bos
77-
else if accept '\''
77+
else if emacs_only && accept '\''
7878
then Re.eos
7979
else if accept '='
8080
then Re.start
8181
else if accept 'b'
8282
then Re.alt [ Re.bow; Re.eow ]
83-
else if accept 'B'
83+
else if emacs_only && accept 'B'
8484
then Re.not_boundary
85-
else if accept '<'
85+
else if emacs_only && accept '<'
8686
then Re.bow
87-
else if accept '>'
87+
else if emacs_only && accept '>'
8888
then Re.eow
8989
else if accept 'w'
9090
then Re.alt [ Re.alnum; Re.char '_' ]
@@ -95,7 +95,7 @@ let parse s =
9595
match get () with
9696
| ('*' | '+' | '?' | '[' | ']' | '.' | '^' | '$' | '\\') as c -> Re.char c
9797
| '0' .. '9' -> raise Not_supported
98-
| _ -> raise Parse_error)
98+
| c -> if emacs_only then raise Parse_error else Re.char c)
9999
else (
100100
if eos () then raise Parse_error;
101101
match get () with
@@ -125,7 +125,12 @@ let parse s =
125125
;;
126126

127127
let re ?(case = true) s =
128-
let r = parse s in
128+
let r = parse s ~emacs_only:true in
129+
if case then r else Re.no_case r
130+
;;
131+
132+
let re_no_emacs ~case s =
133+
let r = parse s ~emacs_only:false in
129134
if case then r else Re.no_case r
130135
;;
131136

lib/emacs.mli

+2
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,5 @@ val compile : Core.t -> Core.re
3737

3838
(** Same as [Core.compile] *)
3939
val compile_pat : ?case:bool -> string -> Core.re
40+
41+
val re_no_emacs : case:bool -> string -> Core.t

lib/str.ml

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ type regexp =
3030
}
3131

3232
let compile_regexp s c =
33-
let re = Emacs.re ~case:(not c) s in
33+
let re = Emacs.re_no_emacs ~case:(not c) s in
3434
{ mtch = lazy (Compile.compile (Ast.seq [ Ast.start; re ]))
3535
; srch = lazy (Compile.compile re)
3636
}

lib_test/str/test_str.ml

+27
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,15 @@ module Test_matches (R : Str_intf) = struct
2727
with
2828
| Not_found -> None
2929
;;
30+
31+
let eq_match' ?(pos = 0) ?(case = true) r s =
32+
let pat = if case then R.regexp r else R.regexp_case_fold r in
33+
try
34+
ignore (R.string_match pat s pos);
35+
Some (groups ())
36+
with
37+
| Not_found -> None
38+
;;
3039
end
3140

3241
module T_str = Test_matches (Str)
@@ -42,6 +51,16 @@ let eq_match ?pos ?case r s =
4251
()
4352
;;
4453

54+
let eq_match' ?pos ?case r s =
55+
expect_equal_app
56+
~msg:(str_printer s)
57+
~printer:(opt_printer (list_printer ofs_printer))
58+
(fun () -> T_str.eq_match' ?pos ?case r s)
59+
()
60+
(fun () -> T_re.eq_match' ?pos ?case r s)
61+
()
62+
;;
63+
4564
let split_result_conv =
4665
List.map (function
4766
| Str.Delim x -> Re.Str.Delim x
@@ -206,6 +225,14 @@ let _ =
206225
eq_match "[^0-9a-z]+" "A:Z+";
207226
eq_match "[^0-9a-z]+" "0";
208227
eq_match "[^0-9a-z]+" "a");
228+
(* Word modifiers *)
229+
expect_pass "word boundaries" (fun () ->
230+
eq_match' "\\bfoo" "foo";
231+
eq_match' "\\<foo" "foo";
232+
eq_match' "foo\\>" "foo";
233+
eq_match' "z\\Bfoo" "zfoo";
234+
eq_match' "\\`foo" "foo";
235+
eq_match' "foo\\'" "foo");
209236
(* Case modifiers *)
210237
expect_pass "no_case" (fun () ->
211238
eq_match ~case:false "abc" "abc";

0 commit comments

Comments
 (0)