Skip to content

Commit dbce3e8

Browse files
committed
1 parent a3edfe5 commit dbce3e8

File tree

2 files changed

+61
-10
lines changed

2 files changed

+61
-10
lines changed

src/HTML5/Parser/DOMTreeBuilder.php

+15-10
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,12 @@ public function startTag($name, $attributes = array(), $selfClosing = false)
322322
break;
323323
}
324324

325+
// Case when no <body> exists, note section on 'Anything else' below.
326+
// https://html.spec.whatwg.org/multipage/parsing.html#the-after-head-insertion-mode
327+
if ($this->insertMode === static::IM_AFTER_HEAD && 'head' !== $name && 'body' !== $name) {
328+
$this->startTag('body');
329+
}
330+
325331
// Special case handling for SVG.
326332
if ($this->insertMode === static::IM_IN_SVG) {
327333
$lname = Elements::normalizeSvgElement($lname);
@@ -548,21 +554,20 @@ public function comment($cdata)
548554

549555
public function text($data)
550556
{
551-
// XXX: Hmmm.... should we really be this strict?
557+
// https://html.spec.whatwg.org/multipage/parsing.html#the-before-head-insertion-mode
552558
if ($this->insertMode < static::IM_IN_HEAD) {
553559
// Per '8.2.5.4.3 The "before head" insertion mode' the characters
554-
// " \t\n\r\f" should be ignored but no mention of a parse error. This is
555-
// practical as most documents contain these characters. Other text is not
556-
// expected here so recording a parse error is necessary.
560+
// " \t\n\r\f" should be ignored .
557561
$dataTmp = trim($data, " \t\n\r\f");
558-
if (!empty($dataTmp)) {
559-
// fprintf(STDOUT, "Unexpected insert mode: %d", $this->insertMode);
560-
$this->parseError('Unexpected text. Ignoring: ' . $dataTmp);
562+
if (! empty($dataTmp)) {
563+
$this->startTag('head');
564+
$this->endTag('head');
565+
$this->startTag('body');
566+
} else {
567+
return;
561568
}
562-
563-
return;
564569
}
565-
// fprintf(STDOUT, "Appending text %s.", $data);
570+
566571
$node = $this->doc->createTextNode($data);
567572
$this->current->appendChild($node);
568573
}

test/HTML5/Html5Test.php

+46
Original file line numberDiff line numberDiff line change
@@ -480,4 +480,50 @@ public function testCDATA()
480480
$res = $this->cycleFragment('a<![CDATA[ This <is> a test. ]]>b');
481481
$this->assertRegExp('|<!\[CDATA\[ This <is> a test\. \]\]>|', $res);
482482
}
483+
484+
/**
485+
* Test for issue #166.
486+
*
487+
* @param $input
488+
* @param $expected
489+
*
490+
* @dataProvider tagOmissionProvider
491+
*/
492+
public function testTagOmission($input, $expected)
493+
{
494+
$doc = $this->html5->loadHTML($input);
495+
496+
$out = $this->html5->saveHTML($doc);
497+
498+
$this->assertRegExp("|" . preg_quote($expected, "|") . "|", $out);
499+
}
500+
501+
/**
502+
* Tag omission test cases.
503+
*
504+
* @return \string[][]
505+
*/
506+
public function tagOmissionProvider()
507+
{
508+
return $provider = array(
509+
array(
510+
'<html>Hello, This is a test.<br />Does it work this time?</html>',
511+
'<html><head></head><body>Hello, This is a test.<br>Does it work this time?</body></html>',
512+
),
513+
// test whitespace (\n)
514+
array(
515+
'<!DOCTYPE html>
516+
<html>
517+
<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"></head>
518+
<body>
519+
<br>
520+
</body>
521+
</html>',
522+
'<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"></head>
523+
<body>
524+
<br>
525+
</body>'
526+
),
527+
);
528+
}
483529
}

0 commit comments

Comments
 (0)