Scroll through all body tag elements using the DOM
$html = file_get_contents("test.html"); $doc = new DOMDocument(); $doc->loadHTML($html); $xpath = new DOMXPath($doc); $body = $xpath->query('//body'); I want to skip all the elements of the body tag of an HTML file and print the "style" attribute associated with these elements. How can i do this?
You can take RecursiveDOMIterator for this:
Code (with seal)
class RecursiveDOMIterator implements RecursiveIterator { protected $_position; protected $_nodeList; public function __construct(DOMNode $domNode) { $this->_position = 0; $this->_nodeList = $domNode->childNodes; } public function getChildren() { return new self($this->current()); } public function key() { return $this->_position; } public function next() { $this->_position++; } public function rewind() { $this->_position = 0; } public function valid() { return $this->_position < $this->_nodeList->length; } public function hasChildren() { return $this->current()->hasChildNodes(); } public function current() { return $this->_nodeList->item($this->_position); } } Application:
$dom = new DOMDocument; $dom->loadHTMLFile('http://stackoverflow.com/questions/4431142/'); $dit = new RecursiveIteratorIterator( new RecursiveDOMIterator($dom), RecursiveIteratorIterator::SELF_FIRST ); foreach($dit as $node) { if($node->nodeType === XML_ELEMENT_NODE && $node->hasAttribute('style')) { printf( 'Element %s - Styles: %s%s', $node->nodeName, $node->getAttribute('style'), PHP_EOL ); } } Output:
Element div - Styles: margin-top: 8px; height:24px; Element div - Styles: margin-top: 8px; height:24px; display:none; Element a - Styles: font-size: 200%; margin-left: 30px; Element div - Styles: display:none Element div - Styles: display:none Element span - Styles: color:#FE7A15;font-size:140% Element span - Styles: color:#FE7A15;font-size:140% Element span - Styles: color:#FE7A15;font-size:140% Element span - Styles: color:#E8272C;font-size:140% Element span - Styles: color:#00AFEF;font-size:140% Element span - Styles: color:#969696;font-size:140% Element span - Styles: color:#46937D;font-size:140% Element span - Styles: color:#C0D0DC;font-size:140% Element span - Styles: color:#000;font-size:140% Element span - Styles: color:#dd4814;font-size:140% Element span - Styles: color:#9ce4fe;font-size:140% Element span - Styles: color:#cf4d3f;font-size:140% Element span - Styles: color:#f4f28d;font-size:140% Element span - Styles: color:#0f3559;font-size:140% Element span - Styles: color:#f2f2f2;font-size:140% Element span - Styles: color:#037187;font-size:140% Element span - Styles: color:#f1e7cc;font-size:140% Element span - Styles: color:#e1cdae;font-size:140% Element span - Styles: color:#a2d9f6;font-size:140% Another option would be to use XPath only to find elements that come from <body> and have a style attribute, for example:
$dom = new DOMDocument; $dom->loadHTMLFile('https://stackoverflow.com/questions/4431142/'); $xpath = new DOMXPath($dom); $nodes = $xpath->query('/html/body//*[@style]'); foreach($nodes as $node) { printf( 'Element %s - Styles: %s%s', $node->nodeName, $node->getAttribute('style'), PHP_EOL ); } The result is the same as in the Gordon answer , and the only important line is $nodes = β¦
I did it recursively. I am not sure if this is the most efficient way. I tried the method on this web page and it worked perfectly.
$dom = new DOMDocument(); $dom->loadHTML($html); $xpath = new DOMXPath($dom); $body = $xpath->query('//body')->item(0); recursePrintStyles($body); function recursePrintStyles($node) { if ($node->nodeType !== XML_ELEMENT_NODE) { return; } echo $node->tagName; echo "\t"; echo $node->getAttribute('style'); echo "\n"; foreach ($node->childNodes as $childNode) { recursePrintStyles($childNode); } }