Skip to content

Commit 03d5692

Browse files
committed
Fix bug in HTML parser that could cause missing text fragments when there was no head/body tag (#6713)
1 parent 1078d8d commit 03d5692

File tree

3 files changed

+52
-1
lines changed

3 files changed

+52
-1
lines changed

CHANGELOG

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ CHANGELOG Roundcube Webmail
1212
- Elastic: Fix folders list scrolling on touch devices (#6706)
1313
- Elastic: Fix non-working pretty selects in Chrome browser (#6705)
1414
- Managesieve: Fix bug where global includes were requested for vacation (#6716)
15+
- Fix bug in HTML parser that could cause missing text fragments when there was no head/body tag (#6713)
1516
- Fix bug where HTML messages with a xml:namespace tag were not rendered (#6697)
1617
- Fix TinyMCE download location (#6694)
1718
- Fix so "Open in new window" consistently displays "external window" interface (#6659)

program/lib/Roundcube/rcube_washtml.php

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -562,7 +562,7 @@ public function wash($html)
562562
if (!$this->is_xml && class_exists('Masterminds\HTML5')) {
563563
try {
564564
$html5 = new Masterminds\HTML5();
565-
$node = $html5->loadHTML($html);
565+
$node = $html5->loadHTML($this->fix_html5($html));
566566
}
567567
catch (Exception $e) {
568568
// ignore, fallback to DOMDocument
@@ -778,6 +778,28 @@ public static function fix_broken_lists(&$html)
778778
}
779779
}
780780

781+
/**
782+
* Cleanup and workarounds on input to Masterminds/HTML5
783+
*/
784+
protected function fix_html5($html)
785+
{
786+
// HTML5 requires <head> or <body> (#6713)
787+
// https://github.com/Masterminds/html5-php/issues/166
788+
if (!preg_match('/<(head|body)/i', $html)) {
789+
$pos = stripos($html, '<html');
790+
791+
if ($pos === false) {
792+
$html = '<html><body>' . $html;
793+
}
794+
else {
795+
$pos = strpos($html, '>', $pos);
796+
$html = substr_replace($html, '<body>', $pos, 0);
797+
}
798+
}
799+
800+
return $html;
801+
}
802+
781803
/**
782804
* Explode css style value
783805
*/

tests/Framework/Washtml.php

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -444,4 +444,32 @@ function test_xml_namespace()
444444
$this->assertNotContains('&lt;?xml:namespace"', $washed);
445445
$this->assertSame($washed, '<p></p>');
446446
}
447+
448+
/**
449+
* Test missing main HTML hierarchy tags (#6713)
450+
*/
451+
function test_missing_tags()
452+
{
453+
$washer = new rcube_washtml();
454+
455+
$html = '<head></head>First line<br />Second line';
456+
$washed = $washer->wash($html);
457+
458+
$this->assertContains('First line', $washed);
459+
460+
$html = 'First line<br />Second line';
461+
$washed = $washer->wash($html);
462+
463+
$this->assertContains('First line', $washed);
464+
465+
$html = '<html>First line<br />Second line</html>';
466+
$washed = $washer->wash($html);
467+
468+
$this->assertContains('First line', $washed);
469+
470+
$html = '<html><head></head>First line<br />Second line</html>';
471+
$washed = $washer->wash($html);
472+
473+
$this->assertContains('First line', $washed);
474+
}
447475
}

0 commit comments

Comments
 (0)