public static function findNodes(DOMDocument $dom, array $options, $isHtml = TRUE)
{
$valid = array('id', 'class', 'tag', 'content', 'attributes', 'parent', 'child', 'ancestor', 'descendant', 'children');
$filtered = array();
$options = self::assertValidKeys($options, $valid);
// find the element by id
if ($options['id']) {
$options['attributes']['id'] = $options['id'];
}
if ($options['class']) {
$options['attributes']['class'] = $options['class'];
}
// find the element by a tag type
if ($options['tag']) {
if ($isHtml) {
$elements = self::getElementsByCaseInsensitiveTagName($dom, $options['tag']);
} else {
$elements = $dom->getElementsByTagName($options['tag']);
}
foreach ($elements as $element) {
$nodes[] = $element;
}
if (empty($nodes)) {
return FALSE;
}
} else {
$tags = array('a', 'abbr', 'acronym', 'address', 'area', 'b', 'base', 'bdo', 'big', 'blockquote', 'body', 'br', 'button', 'caption', 'cite', 'code', 'col', 'colgroup', 'dd', 'del', 'div', 'dfn', 'dl', 'dt', 'em', 'fieldset', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'hr', 'html', 'i', 'iframe', 'img', 'input', 'ins', 'kbd', 'label', 'legend', 'li', 'link', 'map', 'meta', 'noframes', 'noscript', 'object', 'ol', 'optgroup', 'option', 'p', 'param', 'pre', 'q', 'samp', 'script', 'select', 'small', 'span', 'strong', 'style', 'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'title', 'tr', 'tt', 'ul', 'var');
foreach ($tags as $tag) {
if ($isHtml) {
$elements = self::getElementsByCaseInsensitiveTagName($dom, $tag);
} else {
$elements = $dom->getElementsByTagName($tag);
}
foreach ($elements as $element) {
$nodes[] = $element;
}
}
if (empty($nodes)) {
return FALSE;
}
}
// filter by attributes
if ($options['attributes']) {
foreach ($nodes as $node) {
$invalid = FALSE;
foreach ($options['attributes'] as $name => $value) {
// match by regexp if like "regexp:/foo/i"
if (preg_match('/^regexp\\s*:\\s*(.*)/i', $value, $matches)) {
if (!preg_match($matches[1], $node->getAttribute($name))) {
$invalid = TRUE;
}
} else {
if ($name == 'class') {
// split to individual classes
$findClasses = explode(' ', preg_replace("/\\s+/", " ", $value));
$allClasses = explode(' ', preg_replace("/\\s+/", " ", $node->getAttribute($name)));
// make sure each class given is in the actual node
foreach ($findClasses as $findClass) {
if (!in_array($findClass, $allClasses)) {
$invalid = TRUE;
}
}
} else {
if ($node->getAttribute($name) != $value) {
$invalid = TRUE;
}
}
}
}
// if every attribute given matched
if (!$invalid) {
$filtered[] = $node;
}
}
$nodes = $filtered;
$filtered = array();
if (empty($nodes)) {
return FALSE;
}
}
// filter by content
if ($options['content'] !== NULL) {
foreach ($nodes as $node) {
$invalid = FALSE;
// match by regexp if like "regexp:/foo/i"
if (preg_match('/^regexp\\s*:\\s*(.*)/i', $options['content'], $matches)) {
if (!preg_match($matches[1], self::getNodeText($node))) {
$invalid = TRUE;
}
} else {
if (strstr(self::getNodeText($node), $options['content']) === FALSE) {
$invalid = TRUE;
}
}
if (!$invalid) {
$filtered[] = $node;
}
}
$nodes = $filtered;
$filtered = array();
if (empty($nodes)) {
return FALSE;
}
}
// filter by parent node
if ($options['parent']) {
$parentNodes = self::findNodes($dom, $options['parent'], $isHtml);
$parentNode = isset($parentNodes[0]) ? $parentNodes[0] : NULL;
foreach ($nodes as $node) {
if ($parentNode !== $node->parentNode) {
break;
}
$filtered[] = $node;
}
$nodes = $filtered;
$filtered = array();
if (empty($nodes)) {
return FALSE;
}
}
// filter by child node
if ($options['child']) {
$childNodes = self::findNodes($dom, $options['child'], $isHtml);
$childNodes = !empty($childNodes) ? $childNodes : array();
foreach ($nodes as $node) {
foreach ($node->childNodes as $child) {
foreach ($childNodes as $childNode) {
if ($childNode === $child) {
$filtered[] = $node;
}
}
}
}
$nodes = $filtered;
$filtered = array();
if (empty($nodes)) {
return FALSE;
}
}
// filter by ancestor
if ($options['ancestor']) {
$ancestorNodes = self::findNodes($dom, $options['ancestor'], $isHtml);
$ancestorNode = isset($ancestorNodes[0]) ? $ancestorNodes[0] : NULL;
foreach ($nodes as $node) {
$parent = $node->parentNode;
while ($parent->nodeType != XML_HTML_DOCUMENT_NODE) {
if ($parent === $ancestorNode) {
$filtered[] = $node;
}
$parent = $parent->parentNode;
}
}
$nodes = $filtered;
$filtered = array();
if (empty($nodes)) {
return FALSE;
}
}
// filter by descendant
if ($options['descendant']) {
$descendantNodes = self::findNodes($dom, $options['descendant'], $isHtml);
$descendantNodes = !empty($descendantNodes) ? $descendantNodes : array();
foreach ($nodes as $node) {
foreach (self::getDescendants($node) as $descendant) {
foreach ($descendantNodes as $descendantNode) {
if ($descendantNode === $descendant) {
$filtered[] = $node;
}
}
}
}
$nodes = $filtered;
$filtered = array();
if (empty($nodes)) {
return FALSE;
}
}
// filter by children
if ($options['children']) {
$validChild = array('count', 'greater_than', 'less_than', 'only');
$childOptions = self::assertValidKeys($options['children'], $validChild);
foreach ($nodes as $node) {
$childNodes = $node->childNodes;
foreach ($childNodes as $childNode) {
if ($childNode->nodeType !== XML_CDATA_SECTION_NODE && $childNode->nodeType !== XML_TEXT_NODE) {
$children[] = $childNode;
}
}
// we must have children to pass this filter
if (!empty($children)) {
// exact count of children
if ($childOptions['count'] !== NULL) {
if (count($children) !== $childOptions['count']) {
break;
}
} else {
if ($childOptions['less_than'] !== NULL && $childOptions['greater_than'] !== NULL) {
if (count($children) >= $childOptions['less_than'] || count($children) <= $childOptions['greater_than']) {
break;
}
} else {
if ($childOptions['less_than'] !== NULL) {
if (count($children) >= $childOptions['less_than']) {
break;
}
} else {
if ($childOptions['greater_than'] !== NULL) {
if (count($children) <= $childOptions['greater_than']) {
break;
}
}
}
}
}
// match each child against a specific tag
if ($childOptions['only']) {
$onlyNodes = self::findNodes($dom, $childOptions['only'], $isHtml);
// try to match each child to one of the 'only' nodes
foreach ($children as $child) {
$matched = FALSE;
foreach ($onlyNodes as $onlyNode) {
if ($onlyNode === $child) {
$matched = TRUE;
}
}
if (!$matched) {
break 2;
}
}
}
$filtered[] = $node;
}
}
$nodes = $filtered;
$filtered = array();
if (empty($nodes)) {
return;
}
}
// return the first node that matches all criteria
return !empty($nodes) ? $nodes : array();
}