1
1
#include " pch.h"
2
2
#include " match.hpp"
3
3
#include < functional>
4
- #include < IbWinCppLib/WinCppLib.hpp>
5
4
6
5
char32_t read_char32 (const char8_t * str, int * length) {
7
6
char c = str[0 ];
@@ -15,31 +14,72 @@ char32_t read_char32(const char8_t* str, int* length) {
15
14
16
15
Pattern* compile (const char8_t * pattern, PatternFlag::Value flags, std::vector<pinyin::PinyinFlagValue>* pinyin_flags) {
17
16
size_t length = 1 ; // '\0'
17
+ size_t length_u8 = 0 ;
18
18
{
19
19
const char8_t * p = pattern;
20
20
int char_len;
21
- for (char32_t c = read_char32 (p, &char_len); c; c = read_char32 (p += char_len, &char_len))
21
+ for (char32_t c = read_char32 (p, &char_len); c; c = read_char32 (p += char_len, &char_len)) {
22
22
length++;
23
+ length_u8 += char_len;
24
+ }
23
25
}
24
26
// Pattern* pat = ib::Addr(new ib::Byte[sizeof Pattern + length * sizeof(char32_t)]);
25
- Pattern* pat = ib::Addr (HeapAlloc (GetProcessHeap (), 0 , sizeof Pattern + length * sizeof (char32_t )));
27
+ Pattern* pat = ib::Addr (HeapAlloc (GetProcessHeap (), 0 , sizeof Pattern + length * sizeof (char32_t ) + length_u8 * sizeof ( char8_t ) ));
26
28
27
29
pat->flags = flags;
28
30
pat->pinyin_flags = pinyin_flags;
29
31
32
+ pat->pattern_len = length - 1 ;
33
+ pat->pattern_u8_len = length_u8;
34
+
30
35
const char8_t * p = pattern;
31
36
int char_len;
32
37
for (size_t i = 0 ; i < length; i++) {
33
- pat->pattern [i] = read_char32 (p, &char_len);
38
+ pat->pattern () [i] = read_char32 (p, &char_len);
34
39
p += char_len;
35
40
}
36
41
42
+ memcpy (pat->pattern_u8 (), pattern, length_u8);
43
+
37
44
return pat;
38
45
}
39
46
40
47
int exec (Pattern* pattern, const char8_t * subject, int length, size_t nmatch, int pmatch[], PatternFlag::Value flags)
41
48
{
42
49
const char8_t * subject_end = subject + length;
50
+
51
+ // plain text match
52
+ bool plain = true ;
53
+ {
54
+ const char8_t * s = subject;
55
+ int char_len;
56
+ for (char32_t c = read_char32 (s, &char_len); s != subject_end; c = read_char32 (s += char_len, &char_len)) {
57
+ if (c >= 0x3007 ) {
58
+ plain = false ;
59
+ break ;
60
+ }
61
+ }
62
+ }
63
+ if (plain) {
64
+ std::u8string_view sv (subject, length);
65
+ std::u8string_view pt = pattern->pattern_u8_sv ();
66
+ auto it = std::search (sv.begin (), sv.end (), pt.begin (), pt.end (),
67
+ [](char8_t c1, char8_t c2) {
68
+ return std::toupper (c1) == std::toupper (c2);
69
+ });
70
+
71
+ if (it == sv.end ()) {
72
+ return -1 ;
73
+ } else {
74
+ if (nmatch) {
75
+ pmatch[0 ] = it - sv.begin ();
76
+ pmatch[1 ] = it - sv.begin () + pt.size ();
77
+ return 1 ;
78
+ } else {
79
+ return 0 ;
80
+ }
81
+ }
82
+ }
43
83
44
84
// DFA?
45
85
auto char_match = [pattern](char32_t c, const char32_t * pat) -> std::vector<size_t > {
@@ -85,7 +125,7 @@ int exec(Pattern* pattern, const char8_t* subject, int length, size_t nmatch, in
85
125
const char8_t * sub = subject;
86
126
int char_len;
87
127
while (sub != subject_end) {
88
- if (const char8_t * s = subject_match (sub, pattern->pattern )) {
128
+ if (const char8_t * s = subject_match (sub, pattern->pattern () )) {
89
129
if (nmatch) {
90
130
pmatch[0 ] = sub - subject;
91
131
pmatch[1 ] = s - subject;
0 commit comments