Skip to content

Commit 700d3a4

Browse files
author
Christoph Singer
committed
initial commit
0 parents  commit 700d3a4

7 files changed

+541
-0
lines changed

.gitignore

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
.idea
2+
composer.lock
3+
composer.phar
4+
vendor

LICENSE

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
Copyright (c) 2015 Christoph Singer, Web-Agentur 72
2+
3+
Permission is hereby granted, free of charge, to any person obtaining a copy
4+
of this software and associated documentation files (the "Software"), to deal
5+
in the Software without restriction, including without limitation the rights
6+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7+
copies of the Software, and to permit persons to whom the Software is furnished
8+
to do so, subject to the following conditions:
9+
10+
The above copyright notice and this permission notice shall be included in all
11+
copies or substantial portions of the Software.
12+
13+
The software is provided "as is", without warranty of any kind, express or
14+
implied, including but not limited to the warranties of merchantability,
15+
fitness for a particular purpose and noninfringement. In no event shall the
16+
authors or copyright holders be liable for any claim, damages or other
17+
liability, whether in an action of contract, tort or otherwise, arising from,
18+
out of or in connection with the software or the use or other dealings in
19+
the software.

PrettyMin.php

+335
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,335 @@
1+
<?php
2+
namespace Wa72\HtmlPrettymin;
3+
4+
use JSMin\JSMin;
5+
use Symfony\Component\OptionsResolver\OptionsResolver;
6+
7+
/**
8+
* PrettyMin is a HTML minifier and code formatter that works directly on the DOM tree
9+
*
10+
*/
11+
class PrettyMin
12+
{
13+
/**
14+
* @var \DOMDocument
15+
*/
16+
protected $doc;
17+
18+
/**
19+
* @var array
20+
*/
21+
protected $options;
22+
23+
/**
24+
* @param array $options
25+
*/
26+
public function __construct(array $options = [])
27+
{
28+
$resolver = new OptionsResolver();
29+
$this->configureOptions($resolver);
30+
$this->options = $resolver->resolve($options);
31+
}
32+
33+
/**
34+
* @param OptionsResolver $resolver
35+
*/
36+
public function configureOptions(OptionsResolver $resolver)
37+
{
38+
$resolver->setDefaults([
39+
'minify_js' => true,
40+
'minify_css' => true,
41+
'remove_comments' => true,
42+
'remove_comments_exeptions' => ['/^\[if /'],
43+
'keep_whitespace_around' => [
44+
// keep whitespace around inline elements
45+
'b', 'big', 'i', 'small', 'tt',
46+
'abbr', 'acronym', 'cite', 'code', 'dfn', 'em', 'kbd', 'strong', 'samp', 'var',
47+
'a', 'bdo', 'br', 'img', 'map', 'object', 'q', 'span', 'sub', 'sup',
48+
'button', 'input', 'label', 'select', 'textarea'
49+
],
50+
'keep_whitespace_in' => ['script', 'style', 'pre'],
51+
'remove_empty_attributes' => ['style', 'class'],
52+
'indent_characters' => "\t"
53+
]);
54+
}
55+
56+
/**
57+
* Load an HTML document
58+
*
59+
* @param \DOMDocument|\DOMElement|\SplFileInfo|string $html
60+
* @return PrettyMin
61+
*/
62+
public function load($html) {
63+
if ($html instanceof \DOMDocument) {
64+
$d = $html;
65+
} elseif ($html instanceof \DOMElement) {
66+
$d = $html->ownerDocument;
67+
} elseif ($html instanceof \SplFileInfo) {
68+
$d = new \DOMDocument();
69+
$d->preserveWhiteSpace = false;
70+
$d->validateOnParse = true;
71+
$d->loadHTMLFile($html->getPathname());
72+
} else {
73+
$d = new \DOMDocument();
74+
$d->preserveWhiteSpace = false;
75+
$d->validateOnParse = true;
76+
$d->loadHTML($html);
77+
}
78+
$d->formatOutput = false;
79+
$d->normalizeDocument();
80+
$this->doc = $d;
81+
return $this;
82+
}
83+
84+
/**
85+
* Minify the loaded HTML document
86+
*
87+
* @param array $options
88+
* @return PrettyMin
89+
*/
90+
public function minify($options = [])
91+
{
92+
$resolver = new OptionsResolver();
93+
$resolver->setDefaults([
94+
'minify_js' => $this->options['minify_js'],
95+
'minify_css' => $this->options['minify_css'],
96+
'remove_comments' => $this->options['remove_comments'],
97+
'remove_empty_attributes' => $this->options['remove_empty_attributes']
98+
]);
99+
$options = $resolver->resolve($options);
100+
101+
if ($options['minify_js']) {
102+
$this->minifyJavascript();
103+
}
104+
if ($options['minify_css']) {
105+
$this->minifyCss();
106+
}
107+
if ($options['remove_comments']) {
108+
$this->removeComments();
109+
}
110+
111+
if ($options['remove_empty_attributes']) {
112+
$this->removeEmptyAttributes();
113+
}
114+
115+
$this->removeWhitespace();
116+
117+
return $this;
118+
}
119+
120+
/**
121+
* nicely indent HTML code
122+
*
123+
* @return PrettyMin
124+
*/
125+
public function indent()
126+
{
127+
$this->removeWhitespace();
128+
$this->indentRecursive($this->doc->documentElement, 0);
129+
return $this;
130+
}
131+
132+
/**
133+
* Get the DOMDocument
134+
*
135+
* @return \DOMDocument
136+
*/
137+
public function getDomDocument()
138+
{
139+
return $this->doc;
140+
}
141+
142+
/**
143+
* Get the HTML code as string
144+
*
145+
* This is a shortcut for calling $this->getDomDocument()->saveHTML()
146+
*
147+
* @return string
148+
*/
149+
public function saveHtml()
150+
{
151+
return $this->doc->saveHTML();
152+
}
153+
154+
protected function minifyJavascript()
155+
{
156+
$elements = $this->doc->getElementsByTagName('script');
157+
158+
$to_be_removed = [];
159+
/** @var \DOMElement $element */
160+
foreach ($elements as $element) {
161+
$code = $element->textContent;
162+
$element->nodeValue = '';
163+
if (trim($code)) {
164+
$code = JSMin::minify($code);
165+
$ct = $this->doc->createCDATASection($code);
166+
$element->appendChild($ct);
167+
} elseif (!$element->hasAttribute('src')) {
168+
// script tag has neither content nor a src attribute, remove it completely
169+
array_push($to_be_removed, $element);
170+
}
171+
}
172+
foreach ($to_be_removed as $element) {
173+
$element->parentNode->removeChild($element);
174+
}
175+
}
176+
177+
protected function minifyCss()
178+
{
179+
$elements = $this->doc->getElementsByTagName('style');
180+
/** @var \DOMElement $element */
181+
foreach ($elements as $element) {
182+
$code = $element->nodeValue;
183+
$element->nodeValue = '';
184+
if (trim($code)) {
185+
$min = new \CSSmin();
186+
if (trim($code)) {
187+
$code = trim($min->run($code));
188+
}
189+
$ct = $this->doc->createCDATASection($code);
190+
$element->appendChild($ct);
191+
} else {
192+
// Style tag is empty, remove it completely
193+
$element->parentNode->removeChild($element);
194+
}
195+
}
196+
}
197+
198+
protected function removeEmptyAttributes()
199+
{
200+
if (!$this->options['remove_empty_attributes']) return;
201+
if (is_string($this->options['remove_empty_attributes'])) {
202+
$this->options['remove_empty_attributes'] = [$this->options['remove_empty_attributes']];
203+
}
204+
if (is_array($this->options['remove_empty_attributes'])) {
205+
$xpath = new \DOMXPath($this->doc);
206+
foreach ($this->options['remove_empty_attributes'] as $attr) {
207+
// TODO: this is very slow, needs to be improved.
208+
/** @var \DOMElement $el */
209+
foreach ($xpath->query('//*[string-length(normalize-space(@' . $attr . ')) = 0]') as $el) {
210+
$el->removeAttribute($attr);
211+
}
212+
}
213+
}
214+
}
215+
216+
protected function removeComments($exception_patterns = null)
217+
{
218+
if ($exception_patterns === null) {
219+
$exception_patterns = $this->options['remove_comments_exeptions'];
220+
}
221+
$xpath = new \DOMXPath($this->doc);
222+
foreach ($xpath->query('//comment()') as $comment) {
223+
/** @var \DOMNode $comment */
224+
$remove = true;
225+
foreach ($exception_patterns as $exception) {
226+
if (preg_match($exception, $comment->textContent)) {
227+
$remove = false;
228+
break;
229+
}
230+
}
231+
if ($remove) $comment->parentNode->removeChild($comment);
232+
}
233+
}
234+
235+
/**
236+
* originally based on http://stackoverflow.com/a/18260955
237+
*/
238+
protected function removeWhitespace() {
239+
// Retrieve all text nodes using XPath
240+
$x = new \DOMXPath($this->doc);
241+
$nodeList = $x->query("//text()");
242+
foreach($nodeList as $node) {
243+
/** @var \DOMNode $node */
244+
245+
// keep white space inside special elements
246+
if (in_array($node->parentNode->nodeName, $this->options['keep_whitespace_in'])) continue;
247+
248+
if (in_array($node->parentNode->nodeName, $this->options['keep_whitespace_in'])) {
249+
// $node->nodeValue = trim($node->nodeValue);
250+
continue;
251+
};
252+
253+
// 1. "Trim" each text node by removing its leading and trailing spaces and newlines.
254+
// Modified by CS: keep whitespace around inline elements
255+
if (in_array($node->parentNode->nodeName, $this->options['keep_whitespace_around'])) {
256+
$replacement = ' ';
257+
} else {
258+
$replacement = '';
259+
}
260+
261+
$r_replacement = $replacement;
262+
if ($node->previousSibling && in_array($node->previousSibling->nodeName, $this->options['keep_whitespace_around'])) {
263+
$r_replacement = ' ';
264+
}
265+
$node->nodeValue = preg_replace('/^[\s\r\n]+/', $r_replacement, $node->nodeValue);
266+
267+
$l_replacement = $replacement;
268+
if ($node->nextSibling && in_array($node->nextSibling->nodeName, $this->options['keep_whitespace_around'])) {
269+
$l_replacement = ' ';
270+
}
271+
$node->nodeValue = preg_replace('/[\s\r\n]+$/', $l_replacement, $node->nodeValue);
272+
273+
$node->nodeValue = preg_replace('/[\s]+/', ' ', $node->nodeValue);
274+
275+
276+
// 2. Resulting text node may have become "empty" (zero length nodeValue) after trim. If so, remove it from the dom.
277+
if((strlen($node->nodeValue) == 0)) {
278+
$node->parentNode->removeChild($node);
279+
}
280+
}
281+
}
282+
283+
/**
284+
* indent HTML code
285+
*
286+
* originally based on http://stackoverflow.com/a/18260955
287+
*
288+
* @param \DOMNode $currentNode
289+
* @param int $depth
290+
* @return bool
291+
*/
292+
protected function indentRecursive(\DOMNode $currentNode, $depth) {
293+
$indent_characters = $this->options['indent_characters'];
294+
295+
$indentCurrent = true;
296+
$indentChildren = true;
297+
$indentClosingTag = false;
298+
if(($currentNode->nodeType == XML_TEXT_NODE)) {
299+
$indentCurrent = false;
300+
}
301+
302+
if (in_array($currentNode->nodeName, $this->options['keep_whitespace_in'])) {
303+
$indentCurrent = true;
304+
$indentChildren = false;
305+
$indentClosingTag = (strpos($currentNode->nodeValue, "\n") !== false);
306+
}
307+
308+
if (in_array($currentNode->nodeName, $this->options['keep_whitespace_around'])) {
309+
$indentCurrent = false;
310+
}
311+
if($indentCurrent && $depth > 0) {
312+
// Indenting a node consists of inserting before it a new text node
313+
// containing a newline followed by a number of tabs corresponding
314+
// to the node depth.
315+
$textNode = $currentNode->ownerDocument->createTextNode("\n" . str_repeat($indent_characters, $depth));
316+
$currentNode->parentNode->insertBefore($textNode, $currentNode);
317+
}
318+
if($indentCurrent && $currentNode->childNodes && $indentChildren) {
319+
foreach($currentNode->childNodes as $childNode) {
320+
$indentClosingTag = $this->indentRecursive($childNode, $depth + 1);
321+
}
322+
}
323+
if($indentClosingTag) {
324+
// If children have been indented, then the closing tag
325+
// of the current node must also be indented.
326+
if ($currentNode->lastChild && ($currentNode->lastChild->nodeType == XML_CDATA_SECTION_NODE || $currentNode->lastChild->nodeType == XML_TEXT_NODE) && preg_match('/\n\s?$/', $currentNode->lastChild->textContent)) {
327+
$currentNode->lastChild->nodeValue = preg_replace('/\n\s?$/', "\n" . str_repeat("\t", $depth), $currentNode->lastChild->nodeValue);
328+
} else {
329+
$textNode = $currentNode->ownerDocument->createTextNode("\n" . str_repeat("\t", $depth));
330+
$currentNode->appendChild($textNode);
331+
}
332+
}
333+
return $indentCurrent;
334+
}
335+
}

README.md

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
HTML Pretty-Min
2+
===============
3+
4+
HTML Pretty-Min is a PHP library for minifying and prettyprinting (indenting) HTML documents
5+
that works directly on the DOM tree of an HTML document.
6+
7+
Currently it has the following features:
8+
9+
- **Prettyprint**:
10+
- Indent Block-level elements, do not indent inline elements
11+
12+
- **Minify**:
13+
- Remove whitespace and newlines
14+
- Compress embedded Javascript using [mrclay/jsmin-php](https://packagist.org/packages/mrclay/jsmin-php)
15+
- Compress embedded CSS using [tubalmartin/cssmin](https://packagist.org/packages/tubalmartin/cssmin)
16+
- Remove some attributes when their value is empty (by default "style" and "class" attributes)
17+
18+
Installation
19+
------------
20+
21+
`composer require wasinger/html-pretty-min`
22+
23+
Usage
24+
-----
25+
26+
```php
27+
<?php
28+
use Wa72\HtmlPrettymin\PrettyMin;
29+
30+
$pm = new PrettyMin();
31+
32+
$output = $pm
33+
->load($html) // $html may be a \DOMDocument, a string containing an HTML code,
34+
// or an \SplFileInfo pointing to an HTML document
35+
->minify()
36+
->saveHtml();
37+
```

0 commit comments

Comments
 (0)