File tree 13 files changed +877
-77
lines changed
13 files changed +877
-77
lines changed Original file line number Diff line number Diff line change 1
1
[package ]
2
2
name = " rustrict"
3
3
authors = [" Finn Bear" ]
4
- version = " 0.7.34 "
4
+ version = " 0.7.35 "
5
5
edition = " 2021"
6
6
license = " MIT OR Apache-2.0"
7
7
repository = " https://github.com/finnbear/rustrict/"
Original file line number Diff line number Diff line change @@ -177,7 +177,7 @@ is used as a dataset. Positive accuracy is the percentage of profanity detected
177
177
178
178
| Crate | Accuracy | Positive Accuracy | Negative Accuracy | Time |
179
179
| -------| ----------| -------------------| -------------------| ------|
180
- | [ rustrict] ( https://crates.io/crates/rustrict ) | 80.00% | 93.98 % | 76.52 % | 9s |
180
+ | [ rustrict] ( https://crates.io/crates/rustrict ) | 80.00% | 94.01 % | 76.50 % | 9s |
181
181
| [ censor] ( https://crates.io/crates/censor ) | 76.16% | 72.76% | 77.01% | 23s |
182
182
| [ stfu] ( https://crates.io/crates/stfu ) | 91.74% | 77.69% | 95.25% | 45s |
183
183
| [ profane-rs] ( https://crates.io/crates/profane-rs ) | 80.47% | 73.79% | 82.14% | 52s |
Original file line number Diff line number Diff line change @@ -566,7 +566,7 @@ impl<I: Iterator<Item = char>> Iterator for Censor<I> {
566
566
// space.
567
567
// ( and ) are for ignoring appositive phrases.
568
568
// Checking node.last is to collapse multiple spaces into one
569
- let new_space = matches ! ( c, ' ' | '.' | ',' | ':' | ';' | '…' | '(' | ')' )
569
+ let new_space = matches ! ( c, ' ' | '.' | ',' | ':' | ';' | '…' | '(' | ')' | '_' | '-' )
570
570
&& m. node . last != Some ( ' ' ) ;
571
571
let new_repetition: bool = !new_space && c == m. last ;
572
572
let new_skip = !new_space && skippable && !ignore_sep && !new_repetition;
Original file line number Diff line number Diff line change @@ -47,7 +47,7 @@ fn main() {
47
47
if let Some ( c) = char:: from_u32 ( u) {
48
48
let max_width = match c {
49
49
'🐿' => 20 ,
50
- '𒐫' => 40 ,
50
+ '𒐫' => 80 ,
51
51
'𒈙' => 35 ,
52
52
'༺' | '༻' => 25 ,
53
53
_ => {
Original file line number Diff line number Diff line change @@ -87,6 +87,8 @@ faggetaboutit
87
87
farming xp
88
88
fatty acid
89
89
fatty food
90
+ femboys are awesome
91
+ femboys are cool
90
92
few secs
91
93
ffa game
92
94
fire cracker
@@ -99,6 +101,7 @@ freakin
99
101
fuchs dystrophy
100
102
fugia
101
103
gaya
104
+ gg german
102
105
ght, its
103
106
glhf
104
107
graham cracker
@@ -196,6 +199,7 @@ pc master race
196
199
pegging the
197
200
plss
198
201
plsss
202
+ plz stop
199
203
plzz
200
204
plzzz
201
205
pocock
Original file line number Diff line number Diff line change @@ -1049,6 +1049,8 @@ arco on
1049
1049
arco vary
1050
1050
arco ward
1051
1051
arctocephalus
1052
+ are africans
1053
+ are asians
1052
1054
areas hole
1053
1055
ared skins
1054
1056
arena holes
@@ -3490,6 +3492,7 @@ buzz ext
3490
3492
buzz hilt
3491
3493
buzz hit
3492
3494
buzz lut
3495
+ buzz off
3493
3496
buzz perm
3494
3497
bytes cumulative
3495
3498
bytes ext
@@ -3529,6 +3532,7 @@ caliphate
3529
3532
cam girl
3530
3533
camel tox
3531
3534
campoo
3535
+ can't it
3532
3536
canal
3533
3537
canberra appeal
3534
3538
canberra appear
@@ -5270,6 +5274,7 @@ directions lut
5270
5274
directions perm
5271
5275
directions seeks
5272
5276
dirty juan
5277
+ dirty muslim
5273
5278
disco jones
5274
5279
disco on
5275
5280
disco vary
@@ -6570,6 +6575,8 @@ felt chuck
6570
6575
felt church
6571
6576
felt xhtml
6572
6577
females squirting
6578
+ femboys are awesome
6579
+ femboys are cool
6573
6580
fend yourself
6574
6581
fennig
6575
6582
fers cumulative
@@ -7203,6 +7210,7 @@ geyan
7203
7210
geyerite
7204
7211
geylies
7205
7212
geyser
7213
+ gg ger
7206
7214
ghastful
7207
7215
ghettoized
7208
7216
ghettoizes
@@ -12187,6 +12195,8 @@ nu destin
12187
12195
nu destroy
12188
12196
nu destruct
12189
12197
nu ger
12198
+ nuke iran
12199
+ nuke israel
12190
12200
nurses cumulative
12191
12201
nurses ext
12192
12202
nurses hilt
@@ -13567,6 +13577,7 @@ plumbaginaceae
13567
13577
plumbaginaceous
13568
13578
plumbum
13569
13579
plumigerous
13580
+ plz stop
13570
13581
plzz
13571
13582
pmsg
13572
13583
pn lips
@@ -13581,6 +13592,7 @@ pockets perm
13581
13592
pockets seeks
13582
13593
pocock
13583
13594
pogeys
13595
+ poggers
13584
13596
pogonips
13585
13597
points cumulative
13586
13598
points ext
@@ -15285,6 +15297,20 @@ rico ward
15285
15297
rid dicke
15286
15298
rid licking
15287
15299
rid ongoing
15300
+ ride mea
15301
+ ride mech
15302
+ ride med
15303
+ ride mee
15304
+ ride meg
15305
+ ride mel
15306
+ ride mem
15307
+ ride men
15308
+ ride mer
15309
+ ride mes
15310
+ ride met
15311
+ ride mexica
15312
+ ride mexico
15313
+ ride meyer
15288
15314
riders cumulative
15289
15315
riders ext
15290
15316
riders hilt
@@ -17056,6 +17082,7 @@ spleening
17056
17082
spleninii
17057
17083
splice
17058
17084
splicing
17085
+ splix
17059
17086
sponsible peer
17060
17087
spoorn
17061
17088
sporadic
@@ -18770,6 +18797,7 @@ twattle
18770
18797
twattling
18771
18798
tweenies
18772
18799
tweesht
18800
+ tweezer
18773
18801
twigger
18774
18802
twilit
18775
18803
twilt
@@ -19447,6 +19475,7 @@ wan kr
19447
19475
wan kurt
19448
19476
wan kuwait
19449
19477
wan ky
19478
+ wang ker
19450
19479
wantwit
19451
19480
wap anti
19452
19481
wap peru
Original file line number Diff line number Diff line change @@ -88,9 +88,10 @@ pub fn is_whitespace(c: char) -> bool {
88
88
// https://www.compart.com/en/unicode/U+FFA0
89
89
c. is_whitespace ( )
90
90
|| c. is_other ( )
91
+ || c. is_format ( )
91
92
|| matches ! (
92
93
c,
93
- '\u{115F}' | '\u{1160}' | '\u{2800}' | '\u{3164}' | '\u{FFA0}'
94
+ '\u{115F}' | '\u{1160}' | '\u{2800}' | '\u{3164}' | '\u{FFA0}' | '\u{FFFC}'
94
95
)
95
96
}
96
97
@@ -113,7 +114,7 @@ mod tests {
113
114
// Special cases.
114
115
assert_eq ! (
115
116
crate :: trim_whitespace(
116
- "\u{0488} \u{1160} \u{0489} \u{1160} \u{0488} \u{1160} \u{0489} abc\u{0488} \u{0489} "
117
+ "\u{FFF9} \u{FFFA} \u{FFFB} \u{FFFC} \u{ 0488}\u{1160} \u{0489} \u{1160} \u{0488} \u{1160} \u{0489} abc\u{0488} \u{0489} "
117
118
) ,
118
119
"abc\u{0488} \u{0489} "
119
120
)
You can’t perform that action at this time.
0 commit comments