/**
* Creates new pattern cache files
*
* @param string $content
*
* @return \Generator
*/
public function createPatterns($content)
{
// get all relevant patterns from the INI file
// - containing "*" or "?"
// - not containing "*" or "?", but not having a comment
preg_match_all('/(?<=\\[)(?:[^\\r\\n]*[?*][^\\r\\n]*)(?=\\])|(?<=\\[)(?:[^\\r\\n*?]+)(?=\\])(?![^\\[]*Comment=)/m', $content, $matches);
if (empty($matches[0]) || !is_array($matches[0])) {
(yield []);
return;
}
$quoterHelper = new Quoter();
$matches = $matches[0];
usort($matches, [$this, 'compareBcStrings']);
// build an array to structure the data. this requires some memory, but we need this step to be able to
// sort the data in the way we need it (see below).
$data = [];
foreach ($matches as $pattern) {
if ('GJK_Browscap_Version' === $pattern) {
continue;
}
$pattern = strtolower($pattern);
$patternhash = Pattern::getHashForPattern($pattern, false);
$tmpLength = Pattern::getPatternLength($pattern);
// special handling of default entry
if ($tmpLength === 0) {
$patternhash = str_repeat('z', 32);
}
if (!isset($data[$patternhash])) {
$data[$patternhash] = [];
}
if (!isset($data[$patternhash][$tmpLength])) {
$data[$patternhash][$tmpLength] = [];
}
$pattern = $quoterHelper->pregQuote($pattern);
// Check if the pattern contains digits - in this case we replace them with a digit regular expression,
// so that very similar patterns (e.g. only with different browser version numbers) can be compressed.
// This helps to speed up the first (and most expensive) part of the pattern search a lot.
if (strpbrk($pattern, '0123456789') !== false) {
$compressedPattern = preg_replace('/\\d/', '[\\d]', $pattern);
if (!in_array($compressedPattern, $data[$patternhash][$tmpLength])) {
$data[$patternhash][$tmpLength][] = $compressedPattern;
}
} else {
$data[$patternhash][$tmpLength][] = $pattern;
}
}
unset($matches);
// sorting of the data is important to check the patterns later in the correct order, because
// we need to check the most specific (=longest) patterns first, and the least specific
// (".*" for "Default Browser") last.
//
// sort by pattern start to group them
ksort($data);
// and then by pattern length (longest first)
foreach (array_keys($data) as $key) {
krsort($data[$key]);
}
// write optimized file (grouped by the first character of the has, generated from the pattern
// start) with multiple patterns joined by tabs. this is to speed up loading of the data (small
// array with pattern strings instead of an large array with single patterns) and also enables
// us to search for multiple patterns in one preg_match call for a fast first search
// (3-10 faster), followed by a detailed search for each single pattern.
$contents = [];
foreach ($data as $patternhash => $tmpEntries) {
if (empty($tmpEntries)) {
continue;
}
$subkey = SubKey::getPatternCacheSubkey($patternhash);
if (!isset($contents[$subkey])) {
$contents[$subkey] = [];
}
foreach ($tmpEntries as $tmpLength => $tmpPatterns) {
if (empty($tmpPatterns)) {
continue;
}
$chunks = array_chunk($tmpPatterns, self::COUNT_PATTERN);
foreach ($chunks as $chunk) {
$contents[$subkey][] = $patternhash . "\t" . $tmpLength . "\t" . implode("\t", $chunk);
}
}
}
unset($data);
$subkeys = SubKey::getAllPatternCacheSubkeys();
foreach ($contents as $subkey => $content) {
$subkey = (string) $subkey;
(yield [$subkey => $content]);
unset($subkeys[$subkey]);
}
foreach (array_keys($subkeys) as $subkey) {
$subkey = (string) $subkey;
(yield [$subkey => []]);
}
}