Guest User

PHP Emogrifier 2.0.0

a guest
Jun 11th, 2019
401
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 59.21 KB | None | 0 0
  1. <?php
  2. /**
  3.  * This class provides functions for converting CSS styles into inline style attributes in your HTML code.
  4.  *
  5.  * For more information, please see the README.md file.
  6.  *
  7.  * @version 2.0.0
  8.  *
  9.  * @author Cameron Brooks
  10.  * @author Jaime Prado
  11.  * @author Oliver Klee <github@oliverklee.de>
  12.  * @author Roman Ožana <ozana@omdesign.cz>
  13.  * @author Sander Kruger <s.kruger@invessel.com>
  14.  * @author Zoli Szabó <zoli.szabo+github@gmail.com>
  15.  */
  16. class Emogrifier
  17. {
  18.     /**
  19.      * @var int
  20.      */
  21.     const CACHE_KEY_CSS = 0;
  22.  
  23.     /**
  24.      * @var int
  25.      */
  26.     const CACHE_KEY_SELECTOR = 1;
  27.  
  28.     /**
  29.      * @var int
  30.      */
  31.     const CACHE_KEY_XPATH = 2;
  32.  
  33.     /**
  34.      * @var int
  35.      */
  36.     const CACHE_KEY_CSS_DECLARATIONS_BLOCK = 3;
  37.  
  38.     /**
  39.      * @var int
  40.      */
  41.     const CACHE_KEY_COMBINED_STYLES = 4;
  42.  
  43.     /**
  44.      * for calculating nth-of-type and nth-child selectors
  45.      *
  46.      * @var int
  47.      */
  48.     const INDEX = 0;
  49.  
  50.     /**
  51.      * for calculating nth-of-type and nth-child selectors
  52.      *
  53.      * @var int
  54.      */
  55.     const MULTIPLIER = 1;
  56.  
  57.     /**
  58.      * @var string
  59.      */
  60.     const ID_ATTRIBUTE_MATCHER = '/(\\w+)?\\#([\\w\\-]+)/';
  61.  
  62.     /**
  63.      * @var string
  64.      */
  65.     const CLASS_ATTRIBUTE_MATCHER = '/(\\w+|[\\*\\]])?((\\.[\\w\\-]+)+)/';
  66.  
  67.     /**
  68.      * @var string
  69.      */
  70.     const CONTENT_TYPE_META_TAG = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">';
  71.  
  72.     /**
  73.      * @var string
  74.      */
  75.     const DEFAULT_DOCUMENT_TYPE = '<!DOCTYPE html>';
  76.  
  77.     /**
  78.      * @var string
  79.      */
  80.     private $html = '';
  81.  
  82.     /**
  83.      * @var string
  84.      */
  85.     private $css = '';
  86.  
  87.     /**
  88.      * @var bool[]
  89.      */
  90.     private $excludedSelectors = [];
  91.  
  92.     /**
  93.      * @var string[]
  94.      */
  95.     private $unprocessableHtmlTags = ['wbr'];
  96.  
  97.     /**
  98.      * @var bool[]
  99.      */
  100.     private $allowedMediaTypes = ['all' => true, 'screen' => true, 'print' => true];
  101.  
  102.     /**
  103.      * @var mixed[]
  104.      */
  105.     private $caches = [
  106.         self::CACHE_KEY_CSS => [],
  107.         self::CACHE_KEY_SELECTOR => [],
  108.         self::CACHE_KEY_XPATH => [],
  109.         self::CACHE_KEY_CSS_DECLARATIONS_BLOCK => [],
  110.         self::CACHE_KEY_COMBINED_STYLES => [],
  111.     ];
  112.  
  113.     /**
  114.      * the visited nodes with the XPath paths as array keys
  115.      *
  116.      * @var \DOMElement[]
  117.      */
  118.     private $visitedNodes = [];
  119.  
  120.     /**
  121.      * the styles to apply to the nodes with the XPath paths as array keys for the outer array
  122.      * and the attribute names/values as key/value pairs for the inner array
  123.      *
  124.      * @var string[][]
  125.      */
  126.     private $styleAttributesForNodes = [];
  127.  
  128.     /**
  129.      * Determines whether the "style" attributes of tags in the the HTML passed to this class should be preserved.
  130.      * If set to false, the value of the style attributes will be discarded.
  131.      *
  132.      * @var bool
  133.      */
  134.     private $isInlineStyleAttributesParsingEnabled = true;
  135.  
  136.     /**
  137.      * Determines whether the <style> blocks in the HTML passed to this class should be parsed.
  138.      *
  139.      * If set to true, the <style> blocks will be removed from the HTML and their contents will be applied to the HTML
  140.      * via inline styles.
  141.      *
  142.      * If set to false, the <style> blocks will be left as they are in the HTML.
  143.      *
  144.      * @var bool
  145.      */
  146.     private $isStyleBlocksParsingEnabled = true;
  147.  
  148.     /**
  149.      * Determines whether elements with the `display: none` property are
  150.      * removed from the DOM.
  151.      *
  152.      * @var bool
  153.      */
  154.     private $shouldKeepInvisibleNodes = true;
  155.  
  156.     /**
  157.      * @var string[]
  158.      */
  159.     private $xPathRules = [
  160.         // attribute presence
  161.         '/^\\[(\\w+|\\w+\\=[\'"]?\\w+[\'"]?)\\]/' => '*[@\\1]',
  162.         // type and attribute exact value
  163.         '/(\\w)\\[(\\w+)\\=[\'"]?([\\w\\s]+)[\'"]?\\]/' => '\\1[@\\2="\\3"]',
  164.         // type and attribute value with ~ (one word within a whitespace-separated list of words)
  165.         '/([\\w\\*]+)\\[(\\w+)[\\s]*\\~\\=[\\s]*[\'"]?([\\w\\-_\\/]+)[\'"]?\\]/'
  166.         => '\\1[contains(concat(" ", @\\2, " "), concat(" ", "\\3", " "))]',
  167.         // type and attribute value with | (either exact value match or prefix followed by a hyphen)
  168.         '/([\\w\\*]+)\\[(\\w+)[\\s]*\\|\\=[\\s]*[\'"]?([\\w\\-_\\s\\/]+)[\'"]?\\]/'
  169.         => '\\1[@\\2="\\3" or starts-with(@\\2, concat("\\3", "-"))]',
  170.         // type and attribute value with ^ (prefix match)
  171.         '/([\\w\\*]+)\\[(\\w+)[\\s]*\\^\\=[\\s]*[\'"]?([\\w\\-_\\/]+)[\'"]?\\]/' => '\\1[starts-with(@\\2, "\\3")]',
  172.         // type and attribute value with * (substring match)
  173.         '/([\\w\\*]+)\\[(\\w+)[\\s]*\\*\\=[\\s]*[\'"]?([\\w\\-_\\s\\/:;]+)[\'"]?\\]/' => '\\1[contains(@\\2, "\\3")]',
  174.         // adjacent sibling
  175.         '/\\s+\\+\\s+/' => '/following-sibling::*[1]/self::',
  176.         // child
  177.         '/\\s*>\\s*/' => '/',
  178.         // descendant
  179.         '/\\s+(?=.*[^\\]]{1}$)/' => '//',
  180.         // type and :first-child
  181.         '/([^\\/]+):first-child/i' => '*[1]/self::\\1',
  182.         // type and :last-child
  183.         '/([^\\/]+):last-child/i' => '*[last()]/self::\\1',
  184.  
  185.         // The following matcher will break things if it is placed before the adjacent matcher.
  186.         // So one of the matchers matches either too much or not enough.
  187.         // type and attribute value with $ (suffix match)
  188.         '/([\\w\\*]+)\\[(\\w+)[\\s]*\\$\\=[\\s]*[\'"]?([\\w\\-_\\s\\/]+)[\'"]?\\]/'
  189.         => '\\1[substring(@\\2, string-length(@\\2) - string-length("\\3") + 1) = "\\3"]',
  190.     ];
  191.  
  192.     /**
  193.      * Determines whether CSS styles that have an equivalent HTML attribute
  194.      * should be mapped and attached to those elements.
  195.      *
  196.      * @var bool
  197.      */
  198.     private $shouldMapCssToHtml = false;
  199.  
  200.     /**
  201.      * This multi-level array contains simple mappings of CSS properties to
  202.      * HTML attributes. If a mapping only applies to certain HTML nodes or
  203.      * only for certain values, the mapping is an object with a whitelist
  204.      * of nodes and values.
  205.      *
  206.      * @var mixed[][]
  207.      */
  208.     private $cssToHtmlMap = [
  209.         'background-color' => [
  210.             'attribute' => 'bgcolor',
  211.         ],
  212.         'text-align' => [
  213.             'attribute' => 'align',
  214.             'nodes' => ['p', 'div', 'td'],
  215.             'values' => ['left', 'right', 'center', 'justify'],
  216.         ],
  217.         'float' => [
  218.             'attribute' => 'align',
  219.             'nodes' => ['table', 'img'],
  220.             'values' => ['left', 'right'],
  221.         ],
  222.         'border-spacing' => [
  223.             'attribute' => 'cellspacing',
  224.             'nodes' => ['table'],
  225.         ],
  226.     ];
  227.  
  228.     /**
  229.      * Emogrifier will throw Exceptions when it encounters an error instead of silently ignoring them.
  230.      *
  231.      * @var bool
  232.      */
  233.     private $debug = false;
  234.  
  235.     /**
  236.      * The constructor.
  237.      *
  238.      * @param string $html the HTML to emogrify, must be UTF-8-encoded
  239.      * @param string $css the CSS to merge, must be UTF-8-encoded
  240.      */
  241.     public function __construct($html = '', $css = '')
  242.     {
  243.         $this->setHtml($html);
  244.         $this->setCss($css);
  245.     }
  246.  
  247.     /**
  248.      * The destructor.
  249.      */
  250.     public function __destruct()
  251.     {
  252.         $this->purgeVisitedNodes();
  253.     }
  254.  
  255.     /**
  256.      * Sets the HTML to emogrify.
  257.      *
  258.      * @param string $html the HTML to emogrify, must be UTF-8-encoded
  259.      *
  260.      * @return void
  261.      */
  262.     public function setHtml($html)
  263.     {
  264.         $this->html = $html;
  265.     }
  266.  
  267.     /**
  268.      * Sets the CSS to merge with the HTML.
  269.      *
  270.      * @param string $css the CSS to merge, must be UTF-8-encoded
  271.      *
  272.      * @return void
  273.      */
  274.     public function setCss($css)
  275.     {
  276.         $this->css = $css;
  277.     }
  278.  
  279.     /**
  280.      * Applies $this->css to $this->html and returns the HTML with the CSS
  281.      * applied.
  282.      *
  283.      * This method places the CSS inline.
  284.      *
  285.      * @return string
  286.      *
  287.      * @throws \BadMethodCallException
  288.      */
  289.     public function emogrify()
  290.     {
  291.         return $this->createAndProcessXmlDocument()->saveHTML();
  292.     }
  293.  
  294.     /**
  295.      * Applies $this->css to $this->html and returns only the HTML content
  296.      * within the <body> tag.
  297.      *
  298.      * This method places the CSS inline.
  299.      *
  300.      * @return string
  301.      *
  302.      * @throws \BadMethodCallException
  303.      */
  304.     public function emogrifyBodyContent()
  305.     {
  306.         $xmlDocument = $this->createAndProcessXmlDocument();
  307.         $bodyNodeHtml = $xmlDocument->saveHTML($this->getBodyElement($xmlDocument));
  308.  
  309.         return str_replace(['<body>', '</body>'], '', $bodyNodeHtml);
  310.     }
  311.  
  312.     /**
  313.      * Creates an XML document from $this->html and emogrifies ist.
  314.      *
  315.      * @return \DOMDocument
  316.      *
  317.      * @throws \BadMethodCallException
  318.      */
  319.     private function createAndProcessXmlDocument()
  320.     {
  321.         if ($this->html === '') {
  322.             throw new \BadMethodCallException('Please set some HTML first.', 1390393096);
  323.         }
  324.  
  325.         $xmlDocument = $this->createRawXmlDocument();
  326.         $this->ensureExistenceOfBodyElement($xmlDocument);
  327.         $this->process($xmlDocument);
  328.  
  329.         return $xmlDocument;
  330.     }
  331.  
  332.     /**
  333.      * Applies $this->css to $xmlDocument.
  334.      *
  335.      * This method places the CSS inline.
  336.      *
  337.      * @param \DOMDocument $xmlDocument
  338.      *
  339.      * @return void
  340.      *
  341.      * @throws \InvalidArgumentException
  342.      */
  343.     protected function process(\DOMDocument $xmlDocument)
  344.     {
  345.         $xPath = new \DOMXPath($xmlDocument);
  346.         $this->clearAllCaches();
  347.         $this->purgeVisitedNodes();
  348.         set_error_handler([$this, 'handleXpathQueryWarnings'], E_WARNING);
  349.  
  350.         $this->normalizeStyleAttributesOfAllNodes($xPath);
  351.  
  352.         // grab any existing style blocks from the html and append them to the existing CSS
  353.         // (these blocks should be appended so as to have precedence over conflicting styles in the existing CSS)
  354.         $allCss = $this->css;
  355.         if ($this->isStyleBlocksParsingEnabled) {
  356.             $allCss .= $this->getCssFromAllStyleNodes($xPath);
  357.         }
  358.  
  359.         $cssParts = $this->splitCssAndMediaQuery($allCss);
  360.         $excludedNodes = $this->getNodesToExclude($xPath);
  361.         $cssRules = $this->parseCssRules($cssParts['css']);
  362.         foreach ($cssRules as $cssRule) {
  363.             // There's no real way to test "PHP Warning" output generated by the following XPath query unless PHPUnit
  364.             // converts it to an exception. Unfortunately, this would only apply to tests and not work for production
  365.             // executions, which can still flood logs/output unnecessarily. Instead, Emogrifier's error handler should
  366.             // always throw an exception and it must be caught here and only rethrown if in debug mode.
  367.             try {
  368.                 // \DOMXPath::query will always return a DOMNodeList or an exception when errors are caught.
  369.                 $nodesMatchingCssSelectors = $xPath->query($this->translateCssToXpath($cssRule['selector']));
  370.             } catch (\InvalidArgumentException $e) {
  371.                 if ($this->debug) {
  372.                     throw $e;
  373.                 }
  374.                 continue;
  375.             }
  376.  
  377.             /** @var \DOMElement $node */
  378.             foreach ($nodesMatchingCssSelectors as $node) {
  379.                 if (in_array($node, $excludedNodes, true)) {
  380.                     continue;
  381.                 }
  382.                 // if it has a style attribute, get it, process it, and append (overwrite) new stuff
  383.                 if ($node->hasAttribute('style')) {
  384.                     // break it up into an associative array
  385.                     $oldStyleDeclarations = $this->parseCssDeclarationsBlock($node->getAttribute('style'));
  386.                 } else {
  387.                     $oldStyleDeclarations = [];
  388.                 }
  389.                 $newStyleDeclarations = $this->parseCssDeclarationsBlock($cssRule['declarationsBlock']);
  390.                 $node->setAttribute(
  391.                     'style',
  392.                     $this->generateStyleStringFromDeclarationsArrays($oldStyleDeclarations, $newStyleDeclarations)
  393.                 );
  394.             }
  395.         }
  396.  
  397.         if ($this->isInlineStyleAttributesParsingEnabled) {
  398.             $this->fillStyleAttributesWithMergedStyles();
  399.         }
  400.  
  401.         if ($this->shouldMapCssToHtml) {
  402.             $this->mapAllInlineStylesToHtmlAttributes($xPath);
  403.         }
  404.  
  405.         if ($this->shouldKeepInvisibleNodes) {
  406.             $this->removeInvisibleNodes($xPath);
  407.         }
  408.  
  409.         $this->removeImportantAnnotationFromAllInlineStyles($xPath);
  410.  
  411.         $this->copyCssWithMediaToStyleNode($xmlDocument, $xPath, $cssParts['media']);
  412.  
  413.         restore_error_handler();
  414.     }
  415.  
  416.     /**
  417.      * Searches for all nodes with a style attribute, transforms the CSS found
  418.      * to HTML attributes and adds those attributes to each node.
  419.      *
  420.      * @param \DOMXPath $xPath
  421.      *
  422.      * @return void
  423.      */
  424.     private function mapAllInlineStylesToHtmlAttributes(\DOMXPath $xPath)
  425.     {
  426.         /** @var \DOMElement $node */
  427.         foreach ($this->getAllNodesWithStyleAttribute($xPath) as $node) {
  428.             $inlineStyleDeclarations = $this->parseCssDeclarationsBlock($node->getAttribute('style'));
  429.             $this->mapCssToHtmlAttributes($inlineStyleDeclarations, $node);
  430.         }
  431.     }
  432.  
  433.     /**
  434.      * Searches for all nodes with a style attribute and removes the "!important" annotations out of
  435.      * the inline style declarations, eventually by rearranging declarations.
  436.      *
  437.      * @param \DOMXPath $xPath
  438.      *
  439.      * @return void
  440.      */
  441.     private function removeImportantAnnotationFromAllInlineStyles(\DOMXPath $xPath)
  442.     {
  443.         foreach ($this->getAllNodesWithStyleAttribute($xPath) as $node) {
  444.             $this->removeImportantAnnotationFromNodeInlineStyle($node);
  445.         }
  446.     }
  447.  
  448.     /**
  449.      * Removes the "!important" annotations out of the inline style declarations,
  450.      * eventually by rearranging declarations.
  451.      * Rearranging needed when !important shorthand properties are followed by some of their
  452.      * not !important expanded-version properties.
  453.      * For example "font: 12px serif !important; font-size: 13px;" must be reordered
  454.      * to "font-size: 13px; font: 12px serif;" in order to remain correct.
  455.      *
  456.      * @param \DOMElement $node
  457.      *
  458.      * @return void
  459.      */
  460.     private function removeImportantAnnotationFromNodeInlineStyle(\DOMElement $node)
  461.     {
  462.         $inlineStyleDeclarations = $this->parseCssDeclarationsBlock($node->getAttribute('style'));
  463.         $regularStyleDeclarations = [];
  464.         $importantStyleDeclarations = [];
  465.         foreach ($inlineStyleDeclarations as $property => $value) {
  466.             if ($this->attributeValueIsImportant($value)) {
  467.                 $importantStyleDeclarations[$property] = trim(str_replace('!important', '', $value));
  468.             } else {
  469.                 $regularStyleDeclarations[$property] = $value;
  470.             }
  471.         }
  472.         $inlineStyleDeclarationsInNewOrder = array_merge(
  473.             $regularStyleDeclarations,
  474.             $importantStyleDeclarations
  475.         );
  476.         $node->setAttribute(
  477.             'style',
  478.             $this->generateStyleStringFromSingleDeclarationsArray($inlineStyleDeclarationsInNewOrder)
  479.         );
  480.     }
  481.  
  482.     /**
  483.      * Returns a list with all DOM nodes that have a style attribute.
  484.      *
  485.      * @param \DOMXPath $xPath
  486.      *
  487.      * @return \DOMNodeList
  488.      */
  489.     private function getAllNodesWithStyleAttribute(\DOMXPath $xPath)
  490.     {
  491.         return $xPath->query('//*[@style]');
  492.     }
  493.  
  494.     /**
  495.      * Applies $styles to $node.
  496.      *
  497.      * This method maps CSS styles to HTML attributes and adds those to the
  498.      * node.
  499.      *
  500.      * @param string[] $styles the new CSS styles taken from the global styles to be applied to this node
  501.      * @param \DOMElement $node node to apply styles to
  502.      *
  503.      * @return void
  504.      */
  505.     private function mapCssToHtmlAttributes(array $styles, \DOMElement $node)
  506.     {
  507.         foreach ($styles as $property => $value) {
  508.             // Strip !important indicator
  509.             $value = trim(str_replace('!important', '', $value));
  510.             $this->mapCssToHtmlAttribute($property, $value, $node);
  511.         }
  512.     }
  513.  
  514.     /**
  515.      * Tries to apply the CSS style to $node as an attribute.
  516.      *
  517.      * This method maps a CSS rule to HTML attributes and adds those to the node.
  518.      *
  519.      * @param string $property the name of the CSS property to map
  520.      * @param string $value the value of the style rule to map
  521.      * @param \DOMElement $node node to apply styles to
  522.      *
  523.      * @return void
  524.      */
  525.     private function mapCssToHtmlAttribute($property, $value, \DOMElement $node)
  526.     {
  527.         if (!$this->mapSimpleCssProperty($property, $value, $node)) {
  528.             $this->mapComplexCssProperty($property, $value, $node);
  529.         }
  530.     }
  531.  
  532.     /**
  533.      * Looks up the CSS property in the mapping table and maps it if it matches the conditions.
  534.      *
  535.      * @param string $property the name of the CSS property to map
  536.      * @param string $value the value of the style rule to map
  537.      * @param \DOMElement $node node to apply styles to
  538.      *
  539.      * @return bool true if the property cab be mapped using the simple mapping table
  540.      */
  541.     private function mapSimpleCssProperty($property, $value, \DOMElement $node)
  542.     {
  543.         if (!isset($this->cssToHtmlMap[$property])) {
  544.             return false;
  545.         }
  546.  
  547.         $mapping = $this->cssToHtmlMap[$property];
  548.         $nodesMatch = !isset($mapping['nodes']) || in_array($node->nodeName, $mapping['nodes'], true);
  549.         $valuesMatch = !isset($mapping['values']) || in_array($value, $mapping['values'], true);
  550.         if (!$nodesMatch || !$valuesMatch) {
  551.             return false;
  552.         }
  553.  
  554.         $node->setAttribute($mapping['attribute'], $value);
  555.  
  556.         return true;
  557.     }
  558.  
  559.     /**
  560.      * Maps CSS properties that need special transformation to an HTML attribute.
  561.      *
  562.      * @param string $property the name of the CSS property to map
  563.      * @param string $value the value of the style rule to map
  564.      * @param \DOMElement $node node to apply styles to
  565.      *
  566.      * @return void
  567.      */
  568.     private function mapComplexCssProperty($property, $value, \DOMElement $node)
  569.     {
  570.         $nodeName = $node->nodeName;
  571.         $isTable = $nodeName === 'table';
  572.         $isImage = $nodeName === 'img';
  573.         $isTableOrImage = $isTable || $isImage;
  574.  
  575.         switch ($property) {
  576.             case 'background':
  577.                 // Parse out the color, if any
  578.                 $styles = explode(' ', $value);
  579.                 $first = $styles[0];
  580.                 if (!is_numeric($first[0]) && strpos($first, 'url') !== 0) {
  581.                     // This is not a position or image, assume it's a color
  582.                     $node->setAttribute('bgcolor', $first);
  583.                 }
  584.                 break;
  585.             case 'width':
  586.                 // intentional fall-through
  587.             case 'height':
  588.                 // Only parse values in px and %, but not values like "auto".
  589.                 if (preg_match('/^\d+(px|%)$/', $value)) {
  590.                     // Remove 'px'. This regex only conserves numbers and %
  591.                     $number = preg_replace('/[^0-9.%]/', '', $value);
  592.                     $node->setAttribute($property, $number);
  593.                 }
  594.                 break;
  595.             case 'margin':
  596.                 if ($isTableOrImage) {
  597.                     $margins = $this->parseCssShorthandValue($value);
  598.                     if ($margins['left'] === 'auto' && $margins['right'] === 'auto') {
  599.                         $node->setAttribute('align', 'center');
  600.                     }
  601.                 }
  602.                 break;
  603.             case 'border':
  604.                 if ($isTableOrImage) {
  605.                     if ($value === 'none' || $value === '0') {
  606.                         $node->setAttribute('border', '0');
  607.                     }
  608.                 }
  609.                 break;
  610.             default:
  611.         }
  612.     }
  613.  
  614.     /**
  615.      * Parses a shorthand CSS value and splits it into individual values
  616.      *
  617.      * @param string $value a string of CSS value with 1, 2, 3 or 4 sizes
  618.      *                      For example: padding: 0 auto;
  619.      *                      '0 auto' is split into top: 0, left: auto, bottom: 0,
  620.      *                      right: auto.
  621.      *
  622.      * @return string[] an array of values for top, right, bottom and left (using these as associative array keys)
  623.      */
  624.     private function parseCssShorthandValue($value)
  625.     {
  626.         $values = preg_split('/\\s+/', $value);
  627.  
  628.         $css = [];
  629.         $css['top'] = $values[0];
  630.         $css['right'] = (count($values) > 1) ? $values[1] : $css['top'];
  631.         $css['bottom'] = (count($values) > 2) ? $values[2] : $css['top'];
  632.         $css['left'] = (count($values) > 3) ? $values[3] : $css['right'];
  633.  
  634.         return $css;
  635.     }
  636.  
  637.     /**
  638.      * Extracts and parses the individual rules from a CSS string.
  639.      *
  640.      * @param string $css a string of raw CSS code
  641.      *
  642.      * @return string[][] an array of string sub-arrays with the keys
  643.      *         "selector" (the CSS selector(s), e.g., "*" or "h1"),
  644.      *         "declarationsBLock" (the semicolon-separated CSS declarations for that selector(s),
  645.      *         e.g., "color: red; height: 4px;"),
  646.      *         and "line" (the line number e.g. 42)
  647.      */
  648.     private function parseCssRules($css)
  649.     {
  650.         $cssKey = md5($css);
  651.         if (!isset($this->caches[self::CACHE_KEY_CSS][$cssKey])) {
  652.             // process the CSS file for selectors and definitions
  653.             preg_match_all('/(?:^|[\\s^{}]*)([^{]+){([^}]*)}/mi', $css, $matches, PREG_SET_ORDER);
  654.  
  655.             $cssRules = [];
  656.             /** @var string[][] $matches */
  657.             /** @var string[] $cssRule */
  658.             foreach ($matches as $key => $cssRule) {
  659.                 $cssDeclaration = trim($cssRule[2]);
  660.                 if ($cssDeclaration === '') {
  661.                     continue;
  662.                 }
  663.  
  664.                 $selectors = explode(',', $cssRule[1]);
  665.                 foreach ($selectors as $selector) {
  666.                     // don't process pseudo-elements and behavioral (dynamic) pseudo-classes;
  667.                     // only allow structural pseudo-classes
  668.                     $hasPseudoElement = strpos($selector, '::') !== false;
  669.                     $hasAnyPseudoClass = (bool)preg_match('/:[a-zA-Z]/', $selector);
  670.                     $hasSupportedPseudoClass = (bool)preg_match(
  671.                         '/:(\\S+\\-(child|type\\()|not\\([[:ascii:]]*\\))/i',
  672.                         $selector
  673.                     );
  674.                     if ($hasPseudoElement || ($hasAnyPseudoClass && !$hasSupportedPseudoClass)) {
  675.                         continue;
  676.                     }
  677.  
  678.                     $cssRules[] = [
  679.                         'selector' => trim($selector),
  680.                         'declarationsBlock' => $cssDeclaration,
  681.                         // keep track of where it appears in the file, since order is important
  682.                         'line' => $key,
  683.                     ];
  684.                 }
  685.             }
  686.  
  687.             usort($cssRules, [$this, 'sortBySelectorPrecedence']);
  688.  
  689.             $this->caches[self::CACHE_KEY_CSS][$cssKey] = $cssRules;
  690.         }
  691.  
  692.         return $this->caches[self::CACHE_KEY_CSS][$cssKey];
  693.     }
  694.  
  695.     /**
  696.      * Disables the parsing of inline styles.
  697.      *
  698.      * @return void
  699.      */
  700.     public function disableInlineStyleAttributesParsing()
  701.     {
  702.         $this->isInlineStyleAttributesParsingEnabled = false;
  703.     }
  704.  
  705.     /**
  706.      * Disables the parsing of <style> blocks.
  707.      *
  708.      * @return void
  709.      */
  710.     public function disableStyleBlocksParsing()
  711.     {
  712.         $this->isStyleBlocksParsingEnabled = false;
  713.     }
  714.  
  715.     /**
  716.      * Disables the removal of elements with `display: none` properties.
  717.      *
  718.      * @return void
  719.      */
  720.     public function disableInvisibleNodeRemoval()
  721.     {
  722.         $this->shouldKeepInvisibleNodes = false;
  723.     }
  724.  
  725.     /**
  726.      * Enables the attachment/override of HTML attributes for which a
  727.      * corresponding CSS property has been set.
  728.      *
  729.      * @return void
  730.      */
  731.     public function enableCssToHtmlMapping()
  732.     {
  733.         $this->shouldMapCssToHtml = true;
  734.     }
  735.  
  736.     /**
  737.      * Clears all caches.
  738.      *
  739.      * @return void
  740.      */
  741.     private function clearAllCaches()
  742.     {
  743.         $this->clearCache(self::CACHE_KEY_CSS);
  744.         $this->clearCache(self::CACHE_KEY_SELECTOR);
  745.         $this->clearCache(self::CACHE_KEY_XPATH);
  746.         $this->clearCache(self::CACHE_KEY_CSS_DECLARATIONS_BLOCK);
  747.         $this->clearCache(self::CACHE_KEY_COMBINED_STYLES);
  748.     }
  749.  
  750.     /**
  751.      * Clears a single cache by key.
  752.      *
  753.      * @param int $key the cache key, must be CACHE_KEY_CSS, CACHE_KEY_SELECTOR, CACHE_KEY_XPATH
  754.      *                 or CACHE_KEY_CSS_DECLARATION_BLOCK
  755.      *
  756.      * @return void
  757.      *
  758.      * @throws \InvalidArgumentException
  759.      */
  760.     private function clearCache($key)
  761.     {
  762.         $allowedCacheKeys = [
  763.             self::CACHE_KEY_CSS,
  764.             self::CACHE_KEY_SELECTOR,
  765.             self::CACHE_KEY_XPATH,
  766.             self::CACHE_KEY_CSS_DECLARATIONS_BLOCK,
  767.             self::CACHE_KEY_COMBINED_STYLES,
  768.         ];
  769.         if (!in_array($key, $allowedCacheKeys, true)) {
  770.             throw new \InvalidArgumentException('Invalid cache key: ' . $key, 1391822035);
  771.         }
  772.  
  773.         $this->caches[$key] = [];
  774.     }
  775.  
  776.     /**
  777.      * Purges the visited nodes.
  778.      *
  779.      * @return void
  780.      */
  781.     private function purgeVisitedNodes()
  782.     {
  783.         $this->visitedNodes = [];
  784.         $this->styleAttributesForNodes = [];
  785.     }
  786.  
  787.     /**
  788.      * Marks a tag for removal.
  789.      *
  790.      * There are some HTML tags that DOMDocument cannot process, and it will throw an error if it encounters them.
  791.      * In particular, DOMDocument will complain if you try to use HTML5 tags in an XHTML document.
  792.      *
  793.      * Note: The tags will not be removed if they have any content.
  794.      *
  795.      * @param string $tagName the tag name, e.g., "p"
  796.      *
  797.      * @return void
  798.      */
  799.     public function addUnprocessableHtmlTag($tagName)
  800.     {
  801.         $this->unprocessableHtmlTags[] = $tagName;
  802.     }
  803.  
  804.     /**
  805.      * Drops a tag from the removal list.
  806.      *
  807.      * @param string $tagName the tag name, e.g., "p"
  808.      *
  809.      * @return void
  810.      */
  811.     public function removeUnprocessableHtmlTag($tagName)
  812.     {
  813.         $key = array_search($tagName, $this->unprocessableHtmlTags, true);
  814.         if ($key !== false) {
  815.             unset($this->unprocessableHtmlTags[$key]);
  816.         }
  817.     }
  818.  
  819.     /**
  820.      * Marks a media query type to keep.
  821.      *
  822.      * @param string $mediaName the media type name, e.g., "braille"
  823.      *
  824.      * @return void
  825.      */
  826.     public function addAllowedMediaType($mediaName)
  827.     {
  828.         $this->allowedMediaTypes[$mediaName] = true;
  829.     }
  830.  
  831.     /**
  832.      * Drops a media query type from the allowed list.
  833.      *
  834.      * @param string $mediaName the tag name, e.g., "braille"
  835.      *
  836.      * @return void
  837.      */
  838.     public function removeAllowedMediaType($mediaName)
  839.     {
  840.         if (isset($this->allowedMediaTypes[$mediaName])) {
  841.             unset($this->allowedMediaTypes[$mediaName]);
  842.         }
  843.     }
  844.  
  845.     /**
  846.      * Adds a selector to exclude nodes from emogrification.
  847.      *
  848.      * Any nodes that match the selector will not have their style altered.
  849.      *
  850.      * @param string $selector the selector to exclude, e.g., ".editor"
  851.      *
  852.      * @return void
  853.      */
  854.     public function addExcludedSelector($selector)
  855.     {
  856.         $this->excludedSelectors[$selector] = true;
  857.     }
  858.  
  859.     /**
  860.      * No longer excludes the nodes matching this selector from emogrification.
  861.      *
  862.      * @param string $selector the selector to no longer exclude, e.g., ".editor"
  863.      *
  864.      * @return void
  865.      */
  866.     public function removeExcludedSelector($selector)
  867.     {
  868.         if (isset($this->excludedSelectors[$selector])) {
  869.             unset($this->excludedSelectors[$selector]);
  870.         }
  871.     }
  872.  
  873.     /**
  874.      * This removes styles from your email that contain display:none.
  875.      * We need to look for display:none, but we need to do a case-insensitive search. Since DOMDocument only
  876.      * supports XPath 1.0, lower-case() isn't available to us. We've thus far only set attributes to lowercase,
  877.      * not attribute values. Consequently, we need to translate() the letters that would be in 'NONE' ("NOE")
  878.      * to lowercase.
  879.      *
  880.      * @param \DOMXPath $xPath
  881.      *
  882.      * @return void
  883.      */
  884.     private function removeInvisibleNodes(\DOMXPath $xPath)
  885.     {
  886.         $nodesWithStyleDisplayNone = $xPath->query(
  887.             '//*[contains(translate(translate(@style," ",""),"NOE","noe"),"display:none")]'
  888.         );
  889.         if ($nodesWithStyleDisplayNone->length === 0) {
  890.             return;
  891.         }
  892.  
  893.         // The checks on parentNode and is_callable below ensure that if we've deleted the parent node,
  894.         // we don't try to call removeChild on a nonexistent child node
  895.         /** @var \DOMNode $node */
  896.         foreach ($nodesWithStyleDisplayNone as $node) {
  897.             if ($node->parentNode && is_callable([$node->parentNode, 'removeChild'])) {
  898.                 $node->parentNode->removeChild($node);
  899.             }
  900.         }
  901.     }
  902.  
  903.     /**
  904.      * Parses the document and normalizes all existing CSS attributes.
  905.      * This changes 'DISPLAY: none' to 'display: none'.
  906.      * We wouldn't have to do this if DOMXPath supported XPath 2.0.
  907.      * Also stores a reference of nodes with existing inline styles so we don't overwrite them.
  908.      *
  909.      * @param \DOMXPath $xPath
  910.      *
  911.      * @return void
  912.      */
  913.     private function normalizeStyleAttributesOfAllNodes(\DOMXPath $xPath)
  914.     {
  915.         /** @var \DOMElement $node */
  916.         foreach ($this->getAllNodesWithStyleAttribute($xPath) as $node) {
  917.             if ($this->isInlineStyleAttributesParsingEnabled) {
  918.                 $this->normalizeStyleAttributes($node);
  919.             }
  920.             // Remove style attribute in every case, so we can add them back (if inline style attributes
  921.             // parsing is enabled) to the end of the style list, thus keeping the right priority of CSS rules;
  922.             // else original inline style rules may remain at the beginning of the final inline style definition
  923.             // of a node, which may give not the desired results
  924.             $node->removeAttribute('style');
  925.         }
  926.     }
  927.  
  928.     /**
  929.      * Normalizes the value of the "style" attribute and saves it.
  930.      *
  931.      * @param \DOMElement $node
  932.      *
  933.      * @return void
  934.      */
  935.     private function normalizeStyleAttributes(\DOMElement $node)
  936.     {
  937.         $normalizedOriginalStyle = preg_replace_callback(
  938.             '/[A-z\\-]+(?=\\:)/S',
  939.             function (array $m) {
  940.                 return strtolower($m[0]);
  941.             },
  942.             $node->getAttribute('style')
  943.         );
  944.  
  945.         // in order to not overwrite existing style attributes in the HTML, we
  946.         // have to save the original HTML styles
  947.         $nodePath = $node->getNodePath();
  948.         if (!isset($this->styleAttributesForNodes[$nodePath])) {
  949.             $this->styleAttributesForNodes[$nodePath] = $this->parseCssDeclarationsBlock($normalizedOriginalStyle);
  950.             $this->visitedNodes[$nodePath] = $node;
  951.         }
  952.  
  953.         $node->setAttribute('style', $normalizedOriginalStyle);
  954.     }
  955.  
  956.     /**
  957.      * Merges styles from styles attributes and style nodes and applies them to the attribute nodes
  958.      *
  959.      * @return void
  960.      */
  961.     private function fillStyleAttributesWithMergedStyles()
  962.     {
  963.         foreach ($this->styleAttributesForNodes as $nodePath => $styleAttributesForNode) {
  964.             $node = $this->visitedNodes[$nodePath];
  965.             $currentStyleAttributes = $this->parseCssDeclarationsBlock($node->getAttribute('style'));
  966.             $node->setAttribute(
  967.                 'style',
  968.                 $this->generateStyleStringFromDeclarationsArrays(
  969.                     $currentStyleAttributes,
  970.                     $styleAttributesForNode
  971.                 )
  972.             );
  973.         }
  974.     }
  975.  
  976.     /**
  977.      * This method merges old or existing name/value array with new name/value array
  978.      * and then generates a string of the combined style suitable for placing inline.
  979.      * This becomes the single point for CSS string generation allowing for consistent
  980.      * CSS output no matter where the CSS originally came from.
  981.      *
  982.      * @param string[] $oldStyles
  983.      * @param string[] $newStyles
  984.      *
  985.      * @return string
  986.      */
  987.     private function generateStyleStringFromDeclarationsArrays(array $oldStyles, array $newStyles)
  988.     {
  989.         $combinedStyles = array_merge($oldStyles, $newStyles);
  990.         $cacheKey = serialize($combinedStyles);
  991.         if (isset($this->caches[self::CACHE_KEY_COMBINED_STYLES][$cacheKey])) {
  992.             return $this->caches[self::CACHE_KEY_COMBINED_STYLES][$cacheKey];
  993.         }
  994.  
  995.         foreach ($oldStyles as $attributeName => $attributeValue) {
  996.             if (!isset($newStyles[$attributeName])) {
  997.                 continue;
  998.             }
  999.  
  1000.             $newAttributeValue = $newStyles[$attributeName];
  1001.             if ($this->attributeValueIsImportant($attributeValue)
  1002.                 && !$this->attributeValueIsImportant($newAttributeValue)
  1003.             ) {
  1004.                 $combinedStyles[$attributeName] = $attributeValue;
  1005.             }
  1006.         }
  1007.  
  1008.         $style = '';
  1009.         foreach ($combinedStyles as $attributeName => $attributeValue) {
  1010.             $style .= strtolower(trim($attributeName)) . ': ' . trim($attributeValue) . '; ';
  1011.         }
  1012.         $trimmedStyle = rtrim($style);
  1013.  
  1014.         $this->caches[self::CACHE_KEY_COMBINED_STYLES][$cacheKey] = $trimmedStyle;
  1015.  
  1016.         return $trimmedStyle;
  1017.     }
  1018.  
  1019.     /**
  1020.      * Generates a CSS style string suitable to be used inline from the $styleDeclarations property => value array.
  1021.      *
  1022.      * @param string[] $styleDeclarations
  1023.      *
  1024.      * @return string
  1025.      */
  1026.     private function generateStyleStringFromSingleDeclarationsArray(array $styleDeclarations)
  1027.     {
  1028.         return $this->generateStyleStringFromDeclarationsArrays([], $styleDeclarations);
  1029.     }
  1030.  
  1031.     /**
  1032.      * Checks whether $attributeValue is marked as !important.
  1033.      *
  1034.      * @param string $attributeValue
  1035.      *
  1036.      * @return bool
  1037.      */
  1038.     private function attributeValueIsImportant($attributeValue)
  1039.     {
  1040.         return strtolower(substr(trim($attributeValue), -10)) === '!important';
  1041.     }
  1042.  
  1043.     /**
  1044.      * Applies $css to $xmlDocument, limited to the media queries that actually apply to the document.
  1045.      *
  1046.      * @param \DOMDocument $xmlDocument the document to match against
  1047.      * @param \DOMXPath $xPath
  1048.      * @param string $css a string of CSS
  1049.      *
  1050.      * @return void
  1051.      */
  1052.     private function copyCssWithMediaToStyleNode(\DOMDocument $xmlDocument, \DOMXPath $xPath, $css)
  1053.     {
  1054.         if ($css === '') {
  1055.             return;
  1056.         }
  1057.  
  1058.         $mediaQueriesRelevantForDocument = [];
  1059.  
  1060.         foreach ($this->extractMediaQueriesFromCss($css) as $mediaQuery) {
  1061.             foreach ($this->parseCssRules($mediaQuery['css']) as $selector) {
  1062.                 if ($this->existsMatchForCssSelector($xPath, $selector['selector'])) {
  1063.                     $mediaQueriesRelevantForDocument[] = $mediaQuery['query'];
  1064.                     break;
  1065.                 }
  1066.             }
  1067.         }
  1068.  
  1069.         $this->addStyleElementToDocument($xmlDocument, implode($mediaQueriesRelevantForDocument));
  1070.     }
  1071.  
  1072.     /**
  1073.      * Extracts the media queries from $css while skipping empty media queries.
  1074.      *
  1075.      * @param string $css
  1076.      *
  1077.      * @return string[][] numeric array with string sub-arrays with the keys "css" and "query"
  1078.      */
  1079.     private function extractMediaQueriesFromCss($css)
  1080.     {
  1081.         preg_match_all('/@media\\b[^{]*({((?:[^{}]+|(?1))*)})/', $css, $rawMediaQueries, PREG_SET_ORDER);
  1082.         $parsedQueries = [];
  1083.  
  1084.         /** @var string[][] $rawMediaQueries */
  1085.         foreach ($rawMediaQueries as $mediaQuery) {
  1086.             if ($mediaQuery[2] !== '') {
  1087.                 $parsedQueries[] = [
  1088.                     'css' => $mediaQuery[2],
  1089.                     'query' => $mediaQuery[0],
  1090.                 ];
  1091.             }
  1092.         }
  1093.  
  1094.         return $parsedQueries;
  1095.     }
  1096.  
  1097.     /**
  1098.      * Checks whether there is at least one matching element for $cssSelector.
  1099.      * When not in debug mode, it returns true also for invalid selectors (because they may be valid,
  1100.      * just not implemented/recognized yet by Emogrifier).
  1101.      *
  1102.      * @param \DOMXPath $xPath
  1103.      * @param string $cssSelector
  1104.      *
  1105.      * @return bool
  1106.      *
  1107.      * @throws \InvalidArgumentException
  1108.      */
  1109.     private function existsMatchForCssSelector(\DOMXPath $xPath, $cssSelector)
  1110.     {
  1111.         try {
  1112.             $nodesMatchingSelector = $xPath->query($this->translateCssToXpath($cssSelector));
  1113.         } catch (\InvalidArgumentException $e) {
  1114.             if ($this->debug) {
  1115.                 throw $e;
  1116.             }
  1117.             return true;
  1118.         }
  1119.  
  1120.         return $nodesMatchingSelector !== false && $nodesMatchingSelector->length !== 0;
  1121.     }
  1122.  
  1123.     /**
  1124.      * Returns CSS content.
  1125.      *
  1126.      * @param \DOMXPath $xPath
  1127.      *
  1128.      * @return string
  1129.      */
  1130.     private function getCssFromAllStyleNodes(\DOMXPath $xPath)
  1131.     {
  1132.         $styleNodes = $xPath->query('//style');
  1133.  
  1134.         if ($styleNodes === false) {
  1135.             return '';
  1136.         }
  1137.  
  1138.         $css = '';
  1139.         /** @var \DOMNode $styleNode */
  1140.         foreach ($styleNodes as $styleNode) {
  1141.             $css .= "\n\n" . $styleNode->nodeValue;
  1142.             $styleNode->parentNode->removeChild($styleNode);
  1143.         }
  1144.  
  1145.         return $css;
  1146.     }
  1147.  
  1148.     /**
  1149.      * Adds a style element with $css to $document.
  1150.      *
  1151.      * This method is protected to allow overriding.
  1152.      *
  1153.      * @see https://github.com/jjriv/emogrifier/issues/103
  1154.      *
  1155.      * @param \DOMDocument $document
  1156.      * @param string $css
  1157.      *
  1158.      * @return void
  1159.      */
  1160.     protected function addStyleElementToDocument(\DOMDocument $document, $css)
  1161.     {
  1162.         $styleElement = $document->createElement('style', $css);
  1163.         $styleAttribute = $document->createAttribute('type');
  1164.         $styleAttribute->value = 'text/css';
  1165.         $styleElement->appendChild($styleAttribute);
  1166.  
  1167.         $bodyElement = $this->getBodyElement($document);
  1168.         $bodyElement->appendChild($styleElement);
  1169.     }
  1170.  
  1171.     /**
  1172.      * Checks that $document has a BODY element and adds it if it is missing.
  1173.      *
  1174.      * @param \DOMDocument $document
  1175.      */
  1176.     private function ensureExistenceOfBodyElement(\DOMDocument $document)
  1177.     {
  1178.         if ($document->getElementsByTagName('body')->item(0) !== null) {
  1179.             return;
  1180.         }
  1181.  
  1182.         $htmlElement = $document->getElementsByTagName('html')->item(0);
  1183.  
  1184.         $htmlElement->appendChild($document->createElement('body'));
  1185.     }
  1186.  
  1187.     /**
  1188.      * Returns the BODY element.
  1189.      *
  1190.      * This method assumes that there always is a BODY element.
  1191.      *
  1192.      * @param \DOMDocument $document
  1193.      *
  1194.      * @return \DOMElement
  1195.      *
  1196.      * @throws \BadMethodCallException
  1197.      */
  1198.     private function getBodyElement(\DOMDocument $document)
  1199.     {
  1200.         $bodyElement = $document->getElementsByTagName('body')->item(0);
  1201.         if ($bodyElement === null) {
  1202.             throw new \BadMethodCallException(
  1203.                 'getBodyElement method may only be called after ensureExistenceOfBodyElement has been called.',
  1204.                 1508173775427
  1205.             );
  1206.         }
  1207.  
  1208.         return $bodyElement;
  1209.     }
  1210.  
  1211.     /**
  1212.      * Splits input CSS code to an array where:
  1213.      *
  1214.      * - key "css" will be contains clean CSS code
  1215.      * - key "media" will be contains all valuable media queries
  1216.      *
  1217.      * Example:
  1218.      *
  1219.      * The CSS code
  1220.      *
  1221.      *   "@import "file.css"; h1 { color:red; } @media { h1 {}} @media tv { h1 {}}"
  1222.      *
  1223.      * will be parsed into the following array:
  1224.      *
  1225.      *   "css" => "h1 { color:red; }"
  1226.      *   "media" => "@media { h1 {}}"
  1227.      *
  1228.      * @param string $css
  1229.      *
  1230.      * @return string[]
  1231.      */
  1232.     private function splitCssAndMediaQuery($css)
  1233.     {
  1234.         $cssWithoutComments = preg_replace('/\\/\\*.*\\*\\//sU', '', $css);
  1235.  
  1236.         $mediaTypesExpression = '';
  1237.         if (!empty($this->allowedMediaTypes)) {
  1238.             $mediaTypesExpression = '|' . implode('|', array_keys($this->allowedMediaTypes));
  1239.         }
  1240.  
  1241.         $media = '';
  1242.         $cssForAllowedMediaTypes = preg_replace_callback(
  1243.             '#@media\\s+(?:only\\s)?(?:[\\s{\\(]\\s*' . $mediaTypesExpression . ')\\s*[^{]*+{.*}\\s*}\\s*#misU',
  1244.             function ($matches) use (&$media) {
  1245.                 $media .= $matches[0];
  1246.             },
  1247.             $cssWithoutComments
  1248.         );
  1249.  
  1250.         // filter the CSS
  1251.         $search = [
  1252.             'import directives' => '/^\\s*@import\\s[^;]+;/misU',
  1253.             'remaining media enclosures' => '/^\\s*@media\\s[^{]+{(.*)}\\s*}\\s/misU',
  1254.         ];
  1255.  
  1256.         $cleanedCss = preg_replace($search, '', $cssForAllowedMediaTypes);
  1257.  
  1258.         return ['css' => $cleanedCss, 'media' => $media];
  1259.     }
  1260.  
  1261.     /**
  1262.      * Creates a DOMDocument instance with the current HTML.
  1263.      *
  1264.      * @return \DOMDocument
  1265.      */
  1266.     private function createRawXmlDocument()
  1267.     {
  1268.         $xmlDocument = new \DOMDocument;
  1269.         $xmlDocument->encoding = 'UTF-8';
  1270.         $xmlDocument->strictErrorChecking = false;
  1271.         $xmlDocument->formatOutput = true;
  1272.         $libXmlState = libxml_use_internal_errors(true);
  1273.         $xmlDocument->loadHTML($this->getUnifiedHtml());
  1274.         libxml_clear_errors();
  1275.         libxml_use_internal_errors($libXmlState);
  1276.         $xmlDocument->normalizeDocument();
  1277.  
  1278.         return $xmlDocument;
  1279.     }
  1280.  
  1281.     /**
  1282.      * Returns the HTML with the unprocessable HTML tags removed and
  1283.      * with added document type and Content-Type meta tag if needed.
  1284.      *
  1285.      * @return string the unified HTML
  1286.      *
  1287.      * @throws \BadMethodCallException
  1288.      */
  1289.     private function getUnifiedHtml()
  1290.     {
  1291.         $htmlWithoutUnprocessableTags = $this->removeUnprocessableTags($this->html);
  1292.         $htmlWithDocumentType = $this->ensureDocumentType($htmlWithoutUnprocessableTags);
  1293.  
  1294.         return $this->addContentTypeMetaTag($htmlWithDocumentType);
  1295.     }
  1296.  
  1297.     /**
  1298.      * Removes the unprocessable tags from $html (if this feature is enabled).
  1299.      *
  1300.      * @param string $html
  1301.      *
  1302.      * @return string the reworked HTML with the unprocessable tags removed
  1303.      */
  1304.     private function removeUnprocessableTags($html)
  1305.     {
  1306.         if (empty($this->unprocessableHtmlTags)) {
  1307.             return $html;
  1308.         }
  1309.  
  1310.         $unprocessableHtmlTags = implode('|', $this->unprocessableHtmlTags);
  1311.  
  1312.         return preg_replace(
  1313.             '/<\\/?(' . $unprocessableHtmlTags . ')[^>]*>/i',
  1314.             '',
  1315.             $html
  1316.         );
  1317.     }
  1318.  
  1319.     /**
  1320.      * Makes sure that the passed HTML has a document type.
  1321.      *
  1322.      * @param string $html
  1323.      *
  1324.      * @return string HTML with document type
  1325.      */
  1326.     private function ensureDocumentType($html)
  1327.     {
  1328.         $hasDocumentType = stripos($html, '<!DOCTYPE') !== false;
  1329.         if ($hasDocumentType) {
  1330.             return $html;
  1331.         }
  1332.  
  1333.         return self::DEFAULT_DOCUMENT_TYPE . $html;
  1334.     }
  1335.  
  1336.     /**
  1337.      * Adds a Content-Type meta tag for the charset.
  1338.      *
  1339.      * @param string $html
  1340.      *
  1341.      * @return string the HTML with the meta tag added
  1342.      */
  1343.     private function addContentTypeMetaTag($html)
  1344.     {
  1345.         $hasContentTypeMetaTag = stripos($html, 'Content-Type') !== false;
  1346.         if ($hasContentTypeMetaTag) {
  1347.             return $html;
  1348.         }
  1349.  
  1350.         // We are trying to insert the meta tag to the right spot in the DOM.
  1351.         // If we just prepended it to the HTML, we would lose attributes set to the HTML tag.
  1352.         $hasHeadTag = stripos($html, '<head') !== false;
  1353.         $hasHtmlTag = stripos($html, '<html') !== false;
  1354.  
  1355.         if ($hasHeadTag) {
  1356.             $reworkedHtml = preg_replace('/<head(.*?)>/i', '<head$1>' . self::CONTENT_TYPE_META_TAG, $html);
  1357.         } elseif ($hasHtmlTag) {
  1358.             $reworkedHtml = preg_replace(
  1359.                 '/<html(.*?)>/i',
  1360.                 '<html$1><head>' . self::CONTENT_TYPE_META_TAG . '</head>',
  1361.                 $html
  1362.             );
  1363.         } else {
  1364.             $reworkedHtml = self::CONTENT_TYPE_META_TAG . $html;
  1365.         }
  1366.  
  1367.         return $reworkedHtml;
  1368.     }
  1369.  
  1370.     /**
  1371.      * @param string[] $a
  1372.      * @param string[] $b
  1373.      *
  1374.      * @return int
  1375.      */
  1376.     private function sortBySelectorPrecedence(array $a, array $b)
  1377.     {
  1378.         $precedenceA = $this->getCssSelectorPrecedence($a['selector']);
  1379.         $precedenceB = $this->getCssSelectorPrecedence($b['selector']);
  1380.  
  1381.         // We want these sorted in ascending order so selectors with lesser precedence get processed first and
  1382.         // selectors with greater precedence get sorted last.
  1383.         $precedenceForEquals = ($a['line'] < $b['line'] ? -1 : 1);
  1384.         $precedenceForNotEquals = ($precedenceA < $precedenceB ? -1 : 1);
  1385.         return ($precedenceA === $precedenceB) ? $precedenceForEquals : $precedenceForNotEquals;
  1386.     }
  1387.  
  1388.     /**
  1389.      * @param string $selector
  1390.      *
  1391.      * @return int
  1392.      */
  1393.     private function getCssSelectorPrecedence($selector)
  1394.     {
  1395.         $selectorKey = md5($selector);
  1396.         if (!isset($this->caches[self::CACHE_KEY_SELECTOR][$selectorKey])) {
  1397.             $precedence = 0;
  1398.             $value = 100;
  1399.             // ids: worth 100, classes: worth 10, elements: worth 1
  1400.             $search = ['\\#', '\\.', ''];
  1401.  
  1402.             foreach ($search as $s) {
  1403.                 if (trim($selector) === '') {
  1404.                     break;
  1405.                 }
  1406.                 $number = 0;
  1407.                 $selector = preg_replace('/' . $s . '\\w+/', '', $selector, -1, $number);
  1408.                 $precedence += ($value * $number);
  1409.                 $value /= 10;
  1410.             }
  1411.             $this->caches[self::CACHE_KEY_SELECTOR][$selectorKey] = $precedence;
  1412.         }
  1413.  
  1414.         return $this->caches[self::CACHE_KEY_SELECTOR][$selectorKey];
  1415.     }
  1416.  
  1417.     /**
  1418.      * Maps a CSS selector to an XPath query string.
  1419.      *
  1420.      * @see http://plasmasturm.org/log/444/
  1421.      *
  1422.      * @param string $cssSelector a CSS selector
  1423.      *
  1424.      * @return string the corresponding XPath selector
  1425.      */
  1426.     private function translateCssToXpath($cssSelector)
  1427.     {
  1428.         $paddedSelector = ' ' . $cssSelector . ' ';
  1429.         $lowercasePaddedSelector = preg_replace_callback(
  1430.             '/\\s+\\w+\\s+/',
  1431.             function (array $matches) {
  1432.                 return strtolower($matches[0]);
  1433.             },
  1434.             $paddedSelector
  1435.         );
  1436.         $trimmedLowercaseSelector = trim($lowercasePaddedSelector);
  1437.         $xPathKey = md5($trimmedLowercaseSelector);
  1438.         if (isset($this->caches[self::CACHE_KEY_XPATH][$xPathKey])) {
  1439.             return $this->caches[self::CACHE_KEY_SELECTOR][$xPathKey];
  1440.         }
  1441.  
  1442.         $hasNotSelector = (bool)preg_match(
  1443.             '/^([^:]+):not\\(\\s*([[:ascii:]]+)\\s*\\)$/',
  1444.             $trimmedLowercaseSelector,
  1445.             $matches
  1446.         );
  1447.         if (!$hasNotSelector) {
  1448.             $xPath = '//' . $this->translateCssToXpathPass($trimmedLowercaseSelector);
  1449.         } else {
  1450.             /** @var string[] $matches */
  1451.             $partBeforeNot = $matches[1];
  1452.             $notContents = $matches[2];
  1453.             $xPath = '//' . $this->translateCssToXpathPass($partBeforeNot) .
  1454.                 '[not(' . $this->translateCssToXpathPassInline($notContents) . ')]';
  1455.         }
  1456.         $this->caches[self::CACHE_KEY_SELECTOR][$xPathKey] = $xPath;
  1457.  
  1458.         return $this->caches[self::CACHE_KEY_SELECTOR][$xPathKey];
  1459.     }
  1460.  
  1461.     /**
  1462.      * Flexibly translates the CSS selector $trimmedLowercaseSelector to an xPath selector.
  1463.      *
  1464.      * @param string $trimmedLowercaseSelector
  1465.      *
  1466.      * @return string
  1467.      */
  1468.     private function translateCssToXpathPass($trimmedLowercaseSelector)
  1469.     {
  1470.         return $this->translateCssToXpathPassWithMatchClassAttributesCallback(
  1471.             $trimmedLowercaseSelector,
  1472.             [$this, 'matchClassAttributes']
  1473.         );
  1474.     }
  1475.  
  1476.     /**
  1477.      * Flexibly translates the CSS selector $trimmedLowercaseSelector to an xPath selector for inline usage.
  1478.      *
  1479.      * @param string $trimmedLowercaseSelector
  1480.      *
  1481.      * @return string
  1482.      */
  1483.     private function translateCssToXpathPassInline($trimmedLowercaseSelector)
  1484.     {
  1485.         return $this->translateCssToXpathPassWithMatchClassAttributesCallback(
  1486.             $trimmedLowercaseSelector,
  1487.             [$this, 'matchClassAttributesInline']
  1488.         );
  1489.     }
  1490.  
  1491.     /**
  1492.      * Flexibly translates the CSS selector $trimmedLowercaseSelector to an xPath selector while using
  1493.      * $matchClassAttributesCallback as to match the class attributes.
  1494.      *
  1495.      * @param string $trimmedLowercaseSelector
  1496.      * @param callable $matchClassAttributesCallback
  1497.      *
  1498.      * @return string
  1499.      */
  1500.     private function translateCssToXpathPassWithMatchClassAttributesCallback(
  1501.         $trimmedLowercaseSelector,
  1502.         callable $matchClassAttributesCallback
  1503.     ) {
  1504.         $roughXpath = preg_replace(array_keys($this->xPathRules), $this->xPathRules, $trimmedLowercaseSelector);
  1505.         $xPathWithIdAttributeMatchers = preg_replace_callback(
  1506.             self::ID_ATTRIBUTE_MATCHER,
  1507.             [$this, 'matchIdAttributes'],
  1508.             $roughXpath
  1509.         );
  1510.         $xPathWithIdAttributeAndClassMatchers = preg_replace_callback(
  1511.             self::CLASS_ATTRIBUTE_MATCHER,
  1512.             $matchClassAttributesCallback,
  1513.             $xPathWithIdAttributeMatchers
  1514.         );
  1515.  
  1516.         // Advanced selectors are going to require a bit more advanced emogrification.
  1517.         $xPathWithIdAttributeAndClassMatchers = preg_replace_callback(
  1518.             '/([^\\/]+):nth-child\\(\\s*(odd|even|[+\\-]?\\d|[+\\-]?\\d?n(\\s*[+\\-]\\s*\\d)?)\\s*\\)/i',
  1519.             [$this, 'translateNthChild'],
  1520.             $xPathWithIdAttributeAndClassMatchers
  1521.         );
  1522.         $finalXpath = preg_replace_callback(
  1523.             '/([^\\/]+):nth-of-type\\(\s*(odd|even|[+\\-]?\\d|[+\\-]?\\d?n(\\s*[+\\-]\\s*\\d)?)\\s*\\)/i',
  1524.             [$this, 'translateNthOfType'],
  1525.             $xPathWithIdAttributeAndClassMatchers
  1526.         );
  1527.  
  1528.         return $finalXpath;
  1529.     }
  1530.  
  1531.     /**
  1532.      * @param string[] $match
  1533.      *
  1534.      * @return string
  1535.      */
  1536.     private function matchIdAttributes(array $match)
  1537.     {
  1538.         return ($match[1] !== '' ? $match[1] : '*') . '[@id="' . $match[2] . '"]';
  1539.     }
  1540.  
  1541.     /**
  1542.      * @param string[] $match
  1543.      *
  1544.      * @return string xPath class attribute query wrapped in element selector
  1545.      */
  1546.     private function matchClassAttributes(array $match)
  1547.     {
  1548.         return ($match[1] !== '' ? $match[1] : '*') . '[' . $this->matchClassAttributesInline($match) . ']';
  1549.     }
  1550.  
  1551.     /**
  1552.      * @param string[] $match
  1553.      *
  1554.      * @return string xPath class attribute query
  1555.      */
  1556.     private function matchClassAttributesInline(array $match)
  1557.     {
  1558.         return 'contains(concat(" ",@class," "),concat(" ","' .
  1559.             implode(
  1560.                 '"," "))][contains(concat(" ",@class," "),concat(" ","',
  1561.                 explode('.', substr($match[2], 1))
  1562.             ) . '"," "))';
  1563.     }
  1564.  
  1565.     /**
  1566.      * @param string[] $match
  1567.      *
  1568.      * @return string
  1569.      */
  1570.     private function translateNthChild(array $match)
  1571.     {
  1572.         $parseResult = $this->parseNth($match);
  1573.  
  1574.         if (isset($parseResult[self::MULTIPLIER])) {
  1575.             if ($parseResult[self::MULTIPLIER] < 0) {
  1576.                 $parseResult[self::MULTIPLIER] = abs($parseResult[self::MULTIPLIER]);
  1577.                 $xPathExpression = sprintf(
  1578.                     '*[(last() - position()) mod %1%u = %2$u]/self::%3$s',
  1579.                     $parseResult[self::MULTIPLIER],
  1580.                     $parseResult[self::INDEX],
  1581.                     $match[1]
  1582.                 );
  1583.             } else {
  1584.                 $xPathExpression = sprintf(
  1585.                     '*[position() mod %1$u = %2$u]/self::%3$s',
  1586.                     $parseResult[self::MULTIPLIER],
  1587.                     $parseResult[self::INDEX],
  1588.                     $match[1]
  1589.                 );
  1590.             }
  1591.         } else {
  1592.             $xPathExpression = sprintf('*[%1$u]/self::%2$s', $parseResult[self::INDEX], $match[1]);
  1593.         }
  1594.  
  1595.         return $xPathExpression;
  1596.     }
  1597.  
  1598.     /**
  1599.      * @param string[] $match
  1600.      *
  1601.      * @return string
  1602.      */
  1603.     private function translateNthOfType(array $match)
  1604.     {
  1605.         $parseResult = $this->parseNth($match);
  1606.  
  1607.         if (isset($parseResult[self::MULTIPLIER])) {
  1608.             if ($parseResult[self::MULTIPLIER] < 0) {
  1609.                 $parseResult[self::MULTIPLIER] = abs($parseResult[self::MULTIPLIER]);
  1610.                 $xPathExpression = sprintf(
  1611.                     '%1$s[(last() - position()) mod %2$u = %3$u]',
  1612.                     $match[1],
  1613.                     $parseResult[self::MULTIPLIER],
  1614.                     $parseResult[self::INDEX]
  1615.                 );
  1616.             } else {
  1617.                 $xPathExpression = sprintf(
  1618.                     '%1$s[position() mod %2$u = %3$u]',
  1619.                     $match[1],
  1620.                     $parseResult[self::MULTIPLIER],
  1621.                     $parseResult[self::INDEX]
  1622.                 );
  1623.             }
  1624.         } else {
  1625.             $xPathExpression = sprintf('%1$s[%2$u]', $match[1], $parseResult[self::INDEX]);
  1626.         }
  1627.  
  1628.         return $xPathExpression;
  1629.     }
  1630.  
  1631.     /**
  1632.      * @param string[] $match
  1633.      *
  1634.      * @return int[]
  1635.      */
  1636.     private function parseNth(array $match)
  1637.     {
  1638.         if (in_array(strtolower($match[2]), ['even', 'odd'], true)) {
  1639.             // we have "even" or "odd"
  1640.             $index = strtolower($match[2]) === 'even' ? 0 : 1;
  1641.             return [self::MULTIPLIER => 2, self::INDEX => $index];
  1642.         }
  1643.         if (stripos($match[2], 'n') === false) {
  1644.             // if there is a multiplier
  1645.             $index = (int)str_replace(' ', '', $match[2]);
  1646.             return [self::INDEX => $index];
  1647.         }
  1648.  
  1649.         if (isset($match[3])) {
  1650.             $multipleTerm = str_replace($match[3], '', $match[2]);
  1651.             $index = (int)str_replace(' ', '', $match[3]);
  1652.         } else {
  1653.             $multipleTerm = $match[2];
  1654.             $index = 0;
  1655.         }
  1656.  
  1657.         $multiplier = str_ireplace('n', '', $multipleTerm);
  1658.  
  1659.         if ($multiplier === '') {
  1660.             $multiplier = 1;
  1661.         } elseif ($multiplier === '0') {
  1662.             return [self::INDEX => $index];
  1663.         } else {
  1664.             $multiplier = (int)$multiplier;
  1665.         }
  1666.  
  1667.         while ($index < 0) {
  1668.             $index += abs($multiplier);
  1669.         }
  1670.  
  1671.         return [self::MULTIPLIER => $multiplier, self::INDEX => $index];
  1672.     }
  1673.  
  1674.     /**
  1675.      * Parses a CSS declaration block into property name/value pairs.
  1676.      *
  1677.      * Example:
  1678.      *
  1679.      * The declaration block
  1680.      *
  1681.      *   "color: #000; font-weight: bold;"
  1682.      *
  1683.      * will be parsed into the following array:
  1684.      *
  1685.      *   "color" => "#000"
  1686.      *   "font-weight" => "bold"
  1687.      *
  1688.      * @param string $cssDeclarationsBlock the CSS declarations block without the curly braces, may be empty
  1689.      *
  1690.      * @return string[]
  1691.      *         the CSS declarations with the property names as array keys and the property values as array values
  1692.      */
  1693.     private function parseCssDeclarationsBlock($cssDeclarationsBlock)
  1694.     {
  1695.         if (isset($this->caches[self::CACHE_KEY_CSS_DECLARATIONS_BLOCK][$cssDeclarationsBlock])) {
  1696.             return $this->caches[self::CACHE_KEY_CSS_DECLARATIONS_BLOCK][$cssDeclarationsBlock];
  1697.         }
  1698.  
  1699.         $properties = [];
  1700.         $declarations = preg_split('/;(?!base64|charset)/', $cssDeclarationsBlock);
  1701.  
  1702.         foreach ($declarations as $declaration) {
  1703.             $matches = [];
  1704.             if (!preg_match('/^([A-Za-z\\-]+)\\s*:\\s*(.+)$/', trim($declaration), $matches)) {
  1705.                 continue;
  1706.             }
  1707.  
  1708.             $propertyName = strtolower($matches[1]);
  1709.             $propertyValue = $matches[2];
  1710.             $properties[$propertyName] = $propertyValue;
  1711.         }
  1712.         $this->caches[self::CACHE_KEY_CSS_DECLARATIONS_BLOCK][$cssDeclarationsBlock] = $properties;
  1713.  
  1714.         return $properties;
  1715.     }
  1716.  
  1717.     /**
  1718.      * Find the nodes that are not to be emogrified.
  1719.      *
  1720.      * @param \DOMXPath $xPath
  1721.      *
  1722.      * @return \DOMElement[]
  1723.      *
  1724.      * @throws \InvalidArgumentException
  1725.      */
  1726.     private function getNodesToExclude(\DOMXPath $xPath)
  1727.     {
  1728.         $excludedNodes = [];
  1729.         foreach (array_keys($this->excludedSelectors) as $selectorToExclude) {
  1730.             try {
  1731.                 $matchingNodes = $xPath->query($this->translateCssToXpath($selectorToExclude));
  1732.             } catch (\InvalidArgumentException $e) {
  1733.                 if ($this->debug) {
  1734.                     throw $e;
  1735.                 }
  1736.                 continue;
  1737.             }
  1738.             foreach ($matchingNodes as $node) {
  1739.                 $excludedNodes[] = $node;
  1740.             }
  1741.         }
  1742.  
  1743.         return $excludedNodes;
  1744.     }
  1745.  
  1746.     /**
  1747.      * Handles invalid xPath expression warnings, generated during the process() method,
  1748.      * during querying \DOMDocument and trigger \InvalidArgumentException with invalid selector
  1749.      * or \RuntimeException, depending on the source of the warning.
  1750.      *
  1751.      * @param int $type
  1752.      * @param string $message
  1753.      * @param string $file
  1754.      * @param int $line
  1755.      * @param array $context
  1756.      *
  1757.      * @return bool always false
  1758.      *
  1759.      * @throws \InvalidArgumentException
  1760.      * @throws \RuntimeException
  1761.      */
  1762.     public function handleXpathQueryWarnings( // @codingStandardsIgnoreLine
  1763.         $type,
  1764.         $message,
  1765.         $file,
  1766.         $line,
  1767.         array $context
  1768.     ) {
  1769.         $selector = '';
  1770.         if (isset($context['cssRule']['selector'])) {
  1771.             // warnings generated by invalid/unrecognized selectors in method process()
  1772.             $selector = $context['cssRule']['selector'];
  1773.         } elseif (isset($context['selectorToExclude'])) {
  1774.             // warnings generated by invalid/unrecognized selectors in method getNodesToExclude()
  1775.             $selector = $context['selectorToExclude'];
  1776.         } elseif (isset($context['cssSelector'])) {
  1777.             // warnings generated by invalid/unrecognized selectors in method existsMatchForCssSelector()
  1778.             $selector = $context['cssSelector'];
  1779.         }
  1780.  
  1781.         if ($selector !== '') {
  1782.             throw new \InvalidArgumentException(
  1783.                 sprintf('%1$s in selector >> %2$s << in %3$s on line %4$u', $message, $selector, $file, $line),
  1784.                 1509279985
  1785.             );
  1786.         }
  1787.  
  1788.         // Catches eventual warnings generated by method getAllNodesWithStyleAttribute()
  1789.         if (isset($context['xPath'])) {
  1790.             throw new \RuntimeException(
  1791.                 sprintf('%1$s in %2$s on line %3$u', $message, $file, $line),
  1792.                 1509280067
  1793.             );
  1794.         }
  1795.  
  1796.         // the normal error handling continues when handler return false
  1797.         return false;
  1798.     }
  1799.  
  1800.     /**
  1801.      * Sets the debug mode.
  1802.      *
  1803.      * @param bool $debug set to true to enable debug mode
  1804.      *
  1805.      * @return void
  1806.      */
  1807.     public function setDebug($debug)
  1808.     {
  1809.         $this->debug = $debug;
  1810.     }
  1811. }
Add Comment
Please, Sign In to add comment