public function cleanupSemantic()
{
$diffs = $this->getChanges();
$changes = false;
// Stack of indices where equalities are found.
$equalities = array();
// Always equal to diffs[equalities[-1]][1]
$lastequality = null;
// Index of current position.
$pointer = 0;
// Number of chars that changed prior to the equality.
$length_insertions1 = 0;
$length_deletions1 = 0;
// Number of chars that changed after the equality.
$length_insertions2 = 0;
$length_deletions2 = 0;
while ($pointer < count($diffs)) {
if ($diffs[$pointer][0] == self::EQUAL) {
$equalities[] = $pointer;
$length_insertions1 = $length_insertions2;
$length_insertions2 = 0;
$length_deletions1 = $length_deletions2;
$length_deletions2 = 0;
$lastequality = $diffs[$pointer][1];
} else {
if ($diffs[$pointer][0] == self::INSERT) {
$length_insertions2 += mb_strlen($diffs[$pointer][1]);
} else {
$length_deletions2 += mb_strlen($diffs[$pointer][1]);
}
// Eliminate an equality that is smaller or equal to the edits on both sides of it.
if ($lastequality != '' && mb_strlen($lastequality) <= max($length_insertions1, $length_deletions1) && mb_strlen($lastequality) <= max($length_insertions2, $length_deletions2)) {
$insertPointer = array_pop($equalities);
// Duplicate record.
array_splice($diffs, $insertPointer, 0, array(array(self::DELETE, $lastequality)));
// Change second copy to insert.
$diffs[$insertPointer + 1][0] = self::INSERT;
// Throw away the previous equality (it needs to be reevaluated).
if (count($equalities)) {
array_pop($equalities);
}
if (count($equalities)) {
$pointer = end($equalities);
} else {
$pointer = -1;
}
// Reset the counters.
$length_insertions1 = 0;
$length_deletions1 = 0;
$length_insertions2 = 0;
$length_deletions2 = 0;
$lastequality = null;
$changes = true;
}
}
$pointer++;
}
$this->setChanges($diffs);
// Normalize the diff.
if ($changes) {
$this->cleanupMerge();
}
$this->cleanupSemanticLossless();
$diffs = $this->getChanges();
// Find any overlaps between deletions and insertions.
// e.g: <del>abcxxx</del><ins>xxxdef</ins>
// -> <del>abc</del>xxx<ins>def</ins>
// e.g: <del>xxxabc</del><ins>defxxx</ins>
// -> <ins>def</ins>xxx<del>abc</del>
// Only extract an overlap if it is as big as the edit ahead or behind it.
$pointer = 1;
while ($pointer < count($diffs)) {
if ($diffs[$pointer - 1][0] == self::DELETE && $diffs[$pointer][0] == self::INSERT) {
$deletion = $diffs[$pointer - 1][1];
$insertion = $diffs[$pointer][1];
$overlap_length1 = $this->getToolkit()->commontOverlap($deletion, $insertion);
$overlap_length2 = $this->getToolkit()->commontOverlap($insertion, $deletion);
if ($overlap_length1 >= $overlap_length2) {
if ($overlap_length1 >= mb_strlen($deletion) / 2 || $overlap_length1 >= mb_strlen($insertion) / 2) {
// Overlap found. Insert an equality and trim the surrounding edits.
array_splice($diffs, $pointer, 0, array(array(self::EQUAL, mb_substr($insertion, 0, $overlap_length1))));
$diffs[$pointer - 1][1] = mb_substr($deletion, 0, -$overlap_length1);
$diffs[$pointer + 1][1] = mb_substr($insertion, $overlap_length1);
$pointer++;
}
} else {
if ($overlap_length2 >= mb_strlen($deletion) / 2 || $overlap_length2 >= mb_strlen($insertion) / 2) {
// Reverse overlap found.
// Insert an equality and swap and trim the surrounding edits.
array_splice($diffs, $pointer, 0, array(array(self::EQUAL, mb_substr($deletion, 0, $overlap_length2))));
$diffs[$pointer - 1] = array(self::INSERT, mb_substr($insertion, 0, $overlap_length2));
$diffs[$pointer + 1] = array(self::DELETE, mb_substr($deletion, $overlap_length2));
$pointer++;
}
}
$pointer++;
}
$pointer++;
}
$this->setChanges($diffs);
return $this;
}