Skip to content

Commit e8cee91

Browse files
authored
Merge pull request #1885 from ikedas/issue-1884 by ikedas
WWSympa: Invalid UTF-8 sequences in input may trigger crashing (#1884)
2 parents a1126e6 + 3a1bbc7 commit e8cee91

File tree

7 files changed

+55
-56
lines changed

7 files changed

+55
-56
lines changed

.github/workflows/make-check.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ jobs:
6161
--verbose --no-interactive
6262
--with-develop
6363
--with-feature=Data::Password --with-feature=ldap
64-
--with-feature=safe-unicode --with-feature=smime
64+
--with-feature=smime
6565
--with-feature=soap --with-feature=sqlite
6666
${{ startsWith(matrix.os, 'macos') && '--with-feature=macos' || '' }}
6767
- name: Run tests

.gitlab-ci.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
- . ~/bashrc
88
- coverage-install
99
- coverage-setup
10-
- cpanm --quiet --notest --installdeps --with-develop --with-feature=Data::Password --with-feature=ldap --with-feature=safe-unicode --with-feature=smime --with-feature=soap --with-feature=sqlite .
10+
- cpanm --quiet --notest --installdeps --with-develop --with-feature=Data::Password --with-feature=ldap --with-feature=smime --with-feature=soap --with-feature=sqlite .
1111
- autoreconf -i
1212
- ./configure
1313
- cd src; make; cd ..

.travis.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ before_install:
2525

2626
install:
2727
- cpan-install --coverage
28-
- cpanm --installdeps --notest --with-develop --with-feature=Data::Password --with-feature=ldap --with-feature=safe-unicode --with-feature=smime --with-feature=soap --with-feature=sqlite .
28+
- cpanm --installdeps --notest --with-develop --with-feature=Data::Password --with-feature=ldap --with-feature=smime --with-feature=soap --with-feature=sqlite .
2929

3030
before_script:
3131
- coverage-setup

cpanfile

+11-11
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,14 @@ requires 'Time::HiRes', '>= 1.29';
150150
# Used to get Unix time from local time
151151
requires 'Time::Local', '>= 1.23';
152152

153+
# Normalizes file names represented by Unicode.
154+
# Note: Perl 5.8.1 bundles version 0.23.
155+
# Note: Perl 5.10.1 bundles 1.03 (per Unicode 5.1.0).
156+
requires 'Unicode::Normalize', '>= 1.03';
157+
158+
# Sanitizes inputs with Unicode text.
159+
requires 'Unicode::UTF8', '>= 0.58';
160+
153161
# Used to create URI containing non URI-canonical characters.
154162
# Note: '3.28' is the version included in URI-1.35.
155163
requires 'URI::Escape', '>= 3.28';
@@ -190,13 +198,6 @@ recommends 'Net::DNS', '>= 0.65';
190198
# This is required if you set "list_check_smtp" sympa.conf parameter, used to check existing aliases before mailing list creation.
191199
recommends 'Net::SMTP';
192200

193-
# Normalizes file names represented by Unicode
194-
# Note: Perl 5.8.1 bundles version 0.23.
195-
# Note: Perl 5.10.1 bundles 1.03 (per Unicode 5.1.0).
196-
recommends 'Unicode::Normalize', '>= 1.03';
197-
198-
recommends 'Unicode::UTF8', '>= 0.60';
199-
200201
### Features
201202
##
202203

@@ -324,10 +325,9 @@ feature 'soap', 'Required if you want to run the Sympa SOAP server that provides
324325
};
325326

326327
feature 'safe-unicode', 'Sanitizes inputs with Unicode text.' => sub {
327-
# Note: Perl 5.8.1 bundles version 0.23.
328-
# Note: Perl 5.10.1 bundles 1.03 (per Unicode 5.1.0).
329-
requires 'Unicode::Normalize', '>= 1.03';
330-
requires 'Unicode::UTF8', '>= 0.60';
328+
# Note: These became required (>=6.2.73b).
329+
#requires 'Unicode::Normalize', '>= 1.03';
330+
#requires 'Unicode::UTF8', '>= 0.58';
331331
};
332332

333333
on 'test' => sub {

src/cgi/wwsympa.fcgi.in

+26-12
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ use IO::File qw();
3939
use MIME::EncWords;
4040
use MIME::Lite::HTML;
4141
use POSIX qw();
42+
use Unicode::UTF8;
4243
use URI;
4344
use Data::Dumper; # tentative
4445

@@ -1057,6 +1058,15 @@ while ($query = Sympa::WWW::FastCGI->new) {
10571058

10581059
## Get params in a hash
10591060
%in = $query->Vars;
1061+
while (my ($k, $v) = each %in) {
1062+
next if ref $v;
1063+
next if Encode::is_utf8($v);
1064+
unless (Unicode::UTF8::valid_utf8($v)) {
1065+
$log->syslog('err', 'Parameter in invalid UTF-8 %s="%s": Ignored',
1066+
$k, sprintf("\\x%*v02X", "\\x", $v));
1067+
delete $in{$k};
1068+
}
1069+
}
10601070

10611071
# Determin robot.
10621072
$robot = $ENV{SYMPA_DOMAIN};
@@ -1868,24 +1878,28 @@ sub _split_params {
18681878
}
18691879

18701880
if (@params) {
1871-
$in{'action'} = $params[0];
1881+
$in{'action'} = shift @params;
18721882
my @args = @{$action_args{$in{'action'}} // $action_args{'default'}};
18731883

1874-
my $i = 1;
18751884
foreach my $p (@args) {
1876-
my $pname;
1877-
## More than 1 param
1885+
my ($k, $v);
18781886
if ($p =~ /^\@(\w+)$/) {
1879-
$pname = $1;
1880-
$in{$pname} = join '/', @params[$i .. $#params];
1881-
$in{$pname} .= '/' if $ending_slash;
1882-
last;
1887+
$k = $1;
1888+
$v = join '/', @params;
1889+
$v .= '/' if $ending_slash;
18831890
} else {
1884-
$pname = $p;
1885-
$in{$pname} = $params[$i];
1891+
$k = $p;
1892+
$v = shift @params;
1893+
}
1894+
$in{$k} = $v;
1895+
1896+
unless (Encode::is_utf8($v) or Unicode::UTF8::valid_utf8($v)) {
1897+
$log->syslog('err',
1898+
'Parameter in invalid UTF-8 %s="%s": Ignored',
1899+
$k, sprintf("\\x%*v02X", "\\x", $v));
1900+
delete $in{$k};
18861901
}
1887-
wwslog('debug', 'Incoming parameter: %s=%s', $pname, $in{$pname});
1888-
$i++;
1902+
last if 0 == index $p, '@';
18891903
}
18901904
}
18911905
}

src/lib/Sympa/Tools/Text.pm

+7-17
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ use MIME::EncWords;
3939
use Text::LineFold;
4040
use Unicode::GCString;
4141
use URI::Escape qw();
42-
BEGIN { eval 'use Unicode::Normalize qw()'; }
43-
BEGIN { eval 'use Unicode::UTF8 qw()'; }
42+
use Unicode::Normalize qw();
43+
use Unicode::UTF8;
4444

4545
use Sympa::Language;
4646
use Sympa::Regexps;
@@ -141,15 +141,11 @@ sub canonic_text {
141141
my $utext;
142142
if (Encode::is_utf8($text)) {
143143
$utext = $text;
144-
} elsif ($Unicode::UTF8::VERSION) {
144+
} else {
145145
no warnings 'utf8';
146146
$utext = Unicode::UTF8::decode_utf8($text);
147-
} else {
148-
$utext = Encode::decode_utf8($text);
149-
}
150-
if ($Unicode::Normalize::VERSION) {
151-
$utext = Unicode::Normalize::normalize('NFC', $utext);
152147
}
148+
$utext = Unicode::Normalize::normalize('NFC', $utext);
153149

154150
# Remove DOS linefeeds (^M) that cause problems with Outlook 98, AOL,
155151
# and EIMS:
@@ -313,13 +309,8 @@ sub guessed_to_utf8 {
313309
and length $text
314310
and $text =~ /[^\x00-\x7F]/;
315311

316-
my $utf8;
317-
if ($Unicode::UTF8::VERSION) {
318-
$utf8 = Unicode::UTF8::decode_utf8($text)
319-
if Unicode::UTF8::valid_utf8($text);
320-
} else {
321-
$utf8 = eval { Encode::decode_utf8($text, Encode::FB_CROAK()) };
322-
}
312+
my $utf8 = Unicode::UTF8::decode_utf8($text)
313+
if Unicode::UTF8::valid_utf8($text);
323314
unless (defined $utf8) {
324315
foreach my $charset (map { $_ ? @$_ : () } @legacy_charsets{@langs}) {
325316
$utf8 =
@@ -332,8 +323,7 @@ sub guessed_to_utf8 {
332323
}
333324

334325
# Apply NFC: e.g. for modified-NFD by Mac OS X.
335-
$utf8 = Unicode::Normalize::normalize('NFC', $utf8)
336-
if $Unicode::Normalize::VERSION;
326+
$utf8 = Unicode::Normalize::normalize('NFC', $utf8);
337327

338328
return Encode::encode_utf8($utf8);
339329
}

t/Tools_Text.t

+8-13
Original file line numberDiff line numberDiff line change
@@ -46,18 +46,13 @@ is $dec, $unicode_email, 'decode_filesystem_safe, Unicode';
4646
# ToDo: foldcase()
4747
# ToDo: wrap_text()
4848

49-
SKIP: {
50-
skip 'Unicode::Normalize and Unicode::UTF8 required.'
51-
unless $Unicode::Normalize::VERSION and $Unicode::UTF8::VERSION;
52-
53-
# Noncharacters: U+D800, U+10FFE, U+110000, U+200000
54-
is Sympa::Tools::Text::canonic_text(
55-
"\xED\xA0\x80\n\xF4\x8F\xBF\xBE\n\xF4\x90\x80\x80\n\xF8\x88\x80\x80\x80\n"
56-
),
57-
Encode::encode_utf8(
58-
"\x{FFFD}\x{FFFD}\x{FFFD}\n\x{FFFD}\n\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\n\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\n"
59-
),
60-
'canonic_text';
61-
}
49+
# Noncharacters: U+D800, U+10FFE, U+110000, U+200000
50+
is Sympa::Tools::Text::canonic_text(
51+
"\xED\xA0\x80\n\xF4\x8F\xBF\xBE\n\xF4\x90\x80\x80\n\xF8\x88\x80\x80\x80\n"
52+
),
53+
Encode::encode_utf8(
54+
"\x{FFFD}\x{FFFD}\x{FFFD}\n\x{FFFD}\n\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\n\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\n"
55+
),
56+
'canonic_text';
6257

6358
done_testing();

0 commit comments

Comments
 (0)