Published on 2 Dec 2010. Tagged with php, algorithmicadvent.
Hint: If no support for varying character lengths is needed (e. g. when not working with UTF-8), the mb_* function calls should be replaced with their non-multibyte pendants. Especially mb_strpos
is very inefficient when used in loops.
/**
* Returns the longest intersection of two strings
*
* Usage example:
*
* $a = 'This is a wonderful test string.';
* $b = 'This is another wonderful string.';
* var_dump(getLongestIntersection($a, $b)); // string(11) " wonderful "
*
* @param string $haystack First string
* @param string $needle Second string
* @return string Longest intersection of both strings
*/
function getLongestIntersection($haystack, $needle)
{
// Length of needle string
$nLen = mb_strlen($needle);
// Starting point (in needle) of longest matching substring
$maxAnchor = 0;
// Length of (so far) longest matching substring
$maxLen = 0;
// Starting point (in needle) of current substring to test
$sAnchor = 0;
// Length of current substring to test
$sLen = 0;
// Do checks until either substring anchor runs out of needle string or
// until there is no room left for a longer match
while ($sAnchor < $nLen
&& $sAnchor + $sLen <= $nLen
) {
// Increase substring length
$sLen++;
if (mb_strpos($haystack,
mb_substr($needle, $sAnchor, $sLen)) !== false
) {
/* Substring found. A new match is always the longest match */
$maxLen = $sLen;
$maxAnchor = $sAnchor;
} else {
/* Substring not found */
// Shift anchor one position to the right
$sAnchor++;
// Don't bother looking for substrings shorter than current best
// match. This makes sure that a new match is always the longest
// match up to this point
$sLen--;
}
}
return mb_substr($needle, $maxAnchor, $maxLen);
}