public static function get_images_from_html($html, $images_already_extracted)
{
$image_list = $images_already_extracted;
$from_html = Jetpack_PostImages::from_html($html);
if (!empty($from_html)) {
$srcs = wp_list_pluck($from_html, 'src');
foreach ($srcs as $image_url) {
if (($src = parse_url($image_url)) && isset($src['scheme'], $src['host'], $src['path'])) {
// Rebuild the URL without the query string
$queryless = $src['scheme'] . '://' . $src['host'] . $src['path'];
} elseif ($length = strpos($image_url, '?')) {
// If parse_url() didn't work, strip off the query string the old fashioned way
$queryless = substr($image_url, 0, $length);
} else {
// Failing that, there was no spoon! Err ... query string!
$queryless = $image_url;
}
// Discard URLs that are longer then 4KB, these are likely data URIs or malformed HTML.
if (4096 < strlen($queryless)) {
continue;
}
if (!in_array($queryless, $image_list)) {
$image_list[] = $queryless;
}
}
}
return $image_list;
}