Skip to content

Commit 4d14e8c

Browse files
bytestreamgoetas
authored andcommitted
1 parent 05cc730 commit 4d14e8c

File tree

2 files changed

+61
-10
lines changed

2 files changed

+61
-10
lines changed

src/HTML5/Parser/DOMTreeBuilder.php

+15-10
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,12 @@ public function startTag($name, $attributes = array(), $selfClosing = false)
322322
break;
323323
}
324324

325+
// Case when no <body> exists, note section on 'Anything else' below.
326+
// https://html.spec.whatwg.org/multipage/parsing.html#the-after-head-insertion-mode
327+
if ($this->insertMode === static::IM_AFTER_HEAD && 'head' !== $name && 'body' !== $name) {
328+
$this->startTag('body');
329+
}
330+
325331
// Special case handling for SVG.
326332
if ($this->insertMode === static::IM_IN_SVG) {
327333
$lname = Elements::normalizeSvgElement($lname);
@@ -556,21 +562,20 @@ public function comment($cdata)
556562

557563
public function text($data)
558564
{
559-
// XXX: Hmmm.... should we really be this strict?
565+
// https://html.spec.whatwg.org/multipage/parsing.html#the-before-head-insertion-mode
560566
if ($this->insertMode < static::IM_IN_HEAD) {
561567
// Per '8.2.5.4.3 The "before head" insertion mode' the characters
562-
// " \t\n\r\f" should be ignored but no mention of a parse error. This is
563-
// practical as most documents contain these characters. Other text is not
564-
// expected here so recording a parse error is necessary.
568+
// " \t\n\r\f" should be ignored .
565569
$dataTmp = trim($data, " \t\n\r\f");
566-
if (!empty($dataTmp)) {
567-
// fprintf(STDOUT, "Unexpected insert mode: %d", $this->insertMode);
568-
$this->parseError('Unexpected text. Ignoring: ' . $dataTmp);
570+
if (! empty($dataTmp)) {
571+
$this->startTag('head');
572+
$this->endTag('head');
573+
$this->startTag('body');
574+
} else {
575+
return;
569576
}
570-
571-
return;
572577
}
573-
// fprintf(STDOUT, "Appending text %s.", $data);
578+
574579
$node = $this->doc->createTextNode($data);
575580
$this->current->appendChild($node);
576581
}

test/HTML5/Html5Test.php

+46
Original file line numberDiff line numberDiff line change
@@ -492,4 +492,50 @@ public function testAnchorTargetQueryParam()
492492
$res
493493
);
494494
}
495+
496+
/**
497+
* Test for issue #166.
498+
*
499+
* @param $input
500+
* @param $expected
501+
*
502+
* @dataProvider tagOmissionProvider
503+
*/
504+
public function testTagOmission($input, $expected)
505+
{
506+
$doc = $this->html5->loadHTML($input);
507+
508+
$out = $this->html5->saveHTML($doc);
509+
510+
$this->assertRegExp("|" . preg_quote($expected, "|") . "|", $out);
511+
}
512+
513+
/**
514+
* Tag omission test cases.
515+
*
516+
* @return \string[][]
517+
*/
518+
public function tagOmissionProvider()
519+
{
520+
return $provider = array(
521+
array(
522+
'<html>Hello, This is a test.<br />Does it work this time?</html>',
523+
'<html><head></head><body>Hello, This is a test.<br>Does it work this time?</body></html>',
524+
),
525+
// test whitespace (\n)
526+
array(
527+
'<!DOCTYPE html>
528+
<html>
529+
<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"></head>
530+
<body>
531+
<br>
532+
</body>
533+
</html>',
534+
'<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"></head>
535+
<body>
536+
<br>
537+
</body>'
538+
),
539+
);
540+
}
495541
}

0 commit comments

Comments
 (0)