protected function cleanupSemanticScore($one, $two)
{
if ($one == '' || $two == '') {
// Edges are the best.
return 6;
}
// Each port of this function behaves slightly differently due to
// subtle differences in each language's definition of things like
// 'whitespace'. Since this function's purpose is largely cosmetic,
// the choice has been made to use each language's native features
// rather than force total conformity.
$char1 = mb_substr($one, -1, 1);
$char2 = mb_substr($two, 0, 1);
$nonAlphaNumeric1 = preg_match('/[^[:alnum:]]/u', $char1);
$nonAlphaNumeric2 = preg_match('/[^[:alnum:]]/u', $char2);
$whitespace1 = $nonAlphaNumeric1 && preg_match('/\\s/', $char1);
$whitespace2 = $nonAlphaNumeric2 && preg_match('/\\s/', $char2);
$lineBreak1 = $whitespace1 && preg_match('/[\\r\\n]/', $char1);
$lineBreak2 = $whitespace2 && preg_match('/[\\r\\n]/', $char2);
$blankLine1 = $lineBreak1 && preg_match('/\\n\\r?\\n$/', $one);
$blankLine2 = $lineBreak2 && preg_match('/^\\r?\\n\\r?\\n/', $two);
if ($blankLine1 || $blankLine2) {
// Five points for blank lines.
return 5;
} elseif ($lineBreak1 || $lineBreak2) {
// Four points for line breaks.
return 4;
} elseif ($nonAlphaNumeric1 && !$whitespace1 && $whitespace2) {
// Three points for end of sentences.
return 3;
} elseif ($whitespace1 || $whitespace2) {
// Two points for whitespace.
return 2;
} elseif ($nonAlphaNumeric1 || $nonAlphaNumeric2) {
// One point for non-alphanumeric.
return 1;
}
return 0;
}