Skip to content

Commit 78705b5

Browse files
committed
Search: Add plain text match (#13)
1 parent fcd67e9 commit 78705b5

File tree

2 files changed

+60
-6
lines changed

2 files changed

+60
-6
lines changed

Hijacker/match.cpp

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#include "pch.h"
22
#include "match.hpp"
33
#include <functional>
4-
#include <IbWinCppLib/WinCppLib.hpp>
54

65
char32_t read_char32(const char8_t* str, int* length) {
76
char c = str[0];
@@ -15,31 +14,72 @@ char32_t read_char32(const char8_t* str, int* length) {
1514

1615
Pattern* compile(const char8_t* pattern, PatternFlag::Value flags, std::vector<pinyin::PinyinFlagValue>* pinyin_flags) {
1716
size_t length = 1; // '\0'
17+
size_t length_u8 = 0;
1818
{
1919
const char8_t* p = pattern;
2020
int char_len;
21-
for (char32_t c = read_char32(p, &char_len); c; c = read_char32(p += char_len, &char_len))
21+
for (char32_t c = read_char32(p, &char_len); c; c = read_char32(p += char_len, &char_len)) {
2222
length++;
23+
length_u8 += char_len;
24+
}
2325
}
2426
//Pattern* pat = ib::Addr(new ib::Byte[sizeof Pattern + length * sizeof(char32_t)]);
25-
Pattern* pat = ib::Addr(HeapAlloc(GetProcessHeap(), 0, sizeof Pattern + length * sizeof(char32_t)));
27+
Pattern* pat = ib::Addr(HeapAlloc(GetProcessHeap(), 0, sizeof Pattern + length * sizeof(char32_t) + length_u8 * sizeof(char8_t)));
2628

2729
pat->flags = flags;
2830
pat->pinyin_flags = pinyin_flags;
2931

32+
pat->pattern_len = length - 1;
33+
pat->pattern_u8_len = length_u8;
34+
3035
const char8_t* p = pattern;
3136
int char_len;
3237
for (size_t i = 0; i < length; i++) {
33-
pat->pattern[i] = read_char32(p, &char_len);
38+
pat->pattern()[i] = read_char32(p, &char_len);
3439
p += char_len;
3540
}
3641

42+
memcpy(pat->pattern_u8(), pattern, length_u8);
43+
3744
return pat;
3845
}
3946

4047
int exec(Pattern* pattern, const char8_t* subject, int length, size_t nmatch, int pmatch[], PatternFlag::Value flags)
4148
{
4249
const char8_t* subject_end = subject + length;
50+
51+
// plain text match
52+
bool plain = true;
53+
{
54+
const char8_t* s = subject;
55+
int char_len;
56+
for (char32_t c = read_char32(s, &char_len); s != subject_end; c = read_char32(s += char_len, &char_len)) {
57+
if (c >= 0x3007) {
58+
plain = false;
59+
break;
60+
}
61+
}
62+
}
63+
if (plain) {
64+
std::u8string_view sv(subject, length);
65+
std::u8string_view pt = pattern->pattern_u8_sv();
66+
auto it = std::search(sv.begin(), sv.end(), pt.begin(), pt.end(),
67+
[](char8_t c1, char8_t c2) {
68+
return std::toupper(c1) == std::toupper(c2);
69+
});
70+
71+
if (it == sv.end()) {
72+
return -1;
73+
} else {
74+
if (nmatch) {
75+
pmatch[0] = it - sv.begin();
76+
pmatch[1] = it - sv.begin() + pt.size();
77+
return 1;
78+
} else {
79+
return 0;
80+
}
81+
}
82+
}
4383

4484
// DFA?
4585
auto char_match = [pattern](char32_t c, const char32_t* pat) -> std::vector<size_t> {
@@ -85,7 +125,7 @@ int exec(Pattern* pattern, const char8_t* subject, int length, size_t nmatch, in
85125
const char8_t* sub = subject;
86126
int char_len;
87127
while (sub != subject_end) {
88-
if (const char8_t* s = subject_match(sub, pattern->pattern)) {
128+
if (const char8_t* s = subject_match(sub, pattern->pattern())) {
89129
if (nmatch) {
90130
pmatch[0] = sub - subject;
91131
pmatch[1] = s - subject;

Hijacker/match.hpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#pragma once
22
#include <vector>
3+
#include <IbWinCppLib/WinCppLib.hpp>
34
#define IB_PINYIN_ENCODING 32
45
#include <IbPinyinLib/Pinyin.hpp>
56

@@ -14,7 +15,20 @@ struct PatternFlag {
1415
struct Pattern {
1516
PatternFlag::Value flags;
1617
std::vector<pinyin::PinyinFlagValue>* pinyin_flags;
17-
char32_t pattern[];
18+
unsigned int pattern_len;
19+
unsigned int pattern_u8_len;
20+
//char32_t pattern[];
21+
//char8_t pattern_u8[];
22+
23+
char32_t* pattern() {
24+
return ib::Addr(this) + sizeof(Pattern);
25+
}
26+
char8_t* pattern_u8() {
27+
return ib::Addr(this) + sizeof(Pattern) + (pattern_len + 1) * sizeof(char32_t);
28+
}
29+
std::u8string_view pattern_u8_sv() {
30+
return { pattern_u8(), pattern_u8_len };
31+
}
1832
};
1933

2034
Pattern* compile(const char8_t* pattern, PatternFlag::Value flags, std::vector<pinyin::PinyinFlagValue>* pinyin_flags);

0 commit comments

Comments
 (0)