Marc Ermshaus’ avatar

Marc Ermshaus

Linkblog

Algorithmic Advent: 09 – Run-length encoding

Published on 9 Dec 2010. Tagged with php, algorithmicadvent.

Use this functions to encode data that contains long sequences of the same byte such as barcodes or black-and-white images.

/**
 * Encodes the input using a byte-wise run-length algorithm
 *
 * Example:
 *
 *   $input = str_repeat('A', 1)
 *          . str_repeat('B', 32)
 *          . str_repeat('C', 17)
 *          . str_repeat('D', 517);
 *
 *   echo rle_encode($input);
 *
 * Outputs a string containing the following bytes:
 *
 *   01 41 20 42 11 43 FF 44 FF 44 07 44
 *
 * This translates to:
 *
 *   Amount  Byte
 *   ============
 *      1    "A"
 *     32    "B"
 *     17    "C"
 *    255    "D"
 *    255    "D"
 *      7    "D"
 *
 * @param string $data String to encode
 * @return string Encoded string
 */
function rle_encode($data)
{
    $data = (string) $data;
    $ret  = '';
    $len  = strlen($data);
    $char = '';
    $cnt  = 0;

    for ($i = 0; $i < $len; $i++) {
        $c = substr($data, $i, 1);
        if ($c !== $char || $cnt === 255) {
            if ($char !== '') {
                $ret .= chr($cnt) . $char;
            }
            $char = $c;
            $cnt = 1;
        } else {
            $cnt++;
        }
    }

    if ($char !== '') {
        $ret .= chr($cnt) . $char;
    }

    return $ret;
}

/**
 * Decodes a run-length encoded string
 *
 * See description of rle_encode for details.
 *
 * @param string $data Run-length encoded string
 * @return string Decoded string
 */
function rle_decode($data)
{
    $data = (string) $data;
    $ret  = '';
    $len  = strlen($data);

    if ($len % 2 !== 0) {
        throw new Exception('Input data is malformed');
    }

    for ($i = 0; $i < $len; $i += 2) {
        $cnt = substr($data, $i    , 1);
        $c   = substr($data, $i + 1, 1);

        $ret .= str_repeat($c, ord($cnt));
    }

    return $ret;
}



// Test

$input = str_repeat('A', 1)
       . str_repeat('B', 32)
       . str_repeat('C', 17)
       . str_repeat('D', 517);

$rle = rle_encode($input);

if ($input === rle_decode($rle)) {
    echo 'Success';
}