usr 发表于 2020-11-7 18:26:38

【C】使用霍夫曼树压缩数据

以下介绍使用StoneValley库的霍夫曼树进行数据的压缩和解压缩。
霍夫曼压缩算法位于StoneValley库的/src/svctree.c文件内。
压缩前和压缩后的数据存储在比特流(BITSTREAM)结构体内。
示例使用了treHuffmanEncoding对数据进行压缩,使用了treHuffmanDecoding解压缩数据。
#include <stdio.h>
#include <string.h>
#include "../src/svdef.h"
#include "../src/svstring.h"
#include "../src/svtree.h"

static const unsigned char sdata[] = { /* Prepared and compressed data. */
        0xE4, 0x4E, 0x0F, 0x56, 0x27, 0x71, 0x5A, 0xF7,
        0x1F, 0x2F, 0x59, 0xBF, 0xEE, 0x0F, 0x6A, 0x63,
        0x0F, 0x66, 0xE3, 0x93, 0x17, 0xA5, 0x9E, 0xB5,
        0x92, 0x5A, 0x3B, 0x6C, 0xAC, 0x88, 0x4D, 0x77,
        0x39, 0xAB, 0x09, 0x16, 0x7D, 0x8B, 0x53, 0xB3,
        0x37, 0xC3, 0xF5, 0xA8, 0x4C, 0x56, 0x61, 0xD7,
        0xCA, 0xFE, 0x92, 0x90
};

static const HFM_SYMBOL stabl[] = { /* Prepared symbol table. */
        0040, 0002, 0000, 0145, 0003, 0003, 0164, 0004,
        0017, 0157, 0004, 0013, 0151, 0004, 0012, 0150,
        0004, 0005, 0141, 0004, 0004, 0163, 0005, 0031,
        0154, 0005, 0030, 0156, 0005, 0020, 0143, 0005,
        0021, 0000, 0005, 0022, 0144, 0006, 0047, 0162,
        0006, 0073, 0147, 0006, 0046, 0111, 0006, 0071,
        0155, 0006, 0070, 0123, 0007, 0157, 0077, 0007,
        0164, 0126, 0007, 0155, 0056, 0007, 0156, 0160,
        0007, 0152, 0146, 0007, 0153, 0124, 0007, 0150,
        0047, 0007, 0165, 0166, 0007, 0151, 0171, 0007,
        0154
};

/* Strings. */
static const char * pstr = {"Hello, ", "library."};

/* This's a tricky function. */
int foo(void)
{
        P_ARRAY_Z ptbl;
        BITSTREAM bsin;
        P_BITSTREAM pbsout;
        if (NULL == strInitBitStream(&bsin))
                return __LINE__; /* Can not initialize bit-stream. */
        if (NULL == (ptbl = strCreateArrayZ(sizeof(stabl), sizeof(HFM_SYMBOL))))
                return __LINE__; /* Can not initialize array. */
        /* Set bsin and symbol table up. */
        bsin.bilc = 5; /* This number is recorded after encoding. */
        strResizeArrayZ(&bsin.arrz, sizeof(sdata), sizeof(UCHART));
        memcpy(bsin.arrz.pdata, sdata, sizeof(sdata));
        memcpy(ptbl->pdata, stabl, sizeof(stabl));
        /* Decode and display. */
        pbsout = treHuffmanDecoding(ptbl, &bsin);
        printf("%s%s\n", (char *)pbsout->arrz.pdata, pstr);
        /* Cleanup. */
        strDeleteBitStream(pbsout);
        /* Don't delete bsin here. Free bsin. */
        strFreeBitStream(&bsin);
        strDeleteArrayZ(ptbl);
        return 0;
}

/* This function illustrates how to use Huffman algorithm to compress data. */
int main(void)
{
        P_ARRAY_Z ptbl = NULL; /* Used to store a symbol table. */
        P_BITSTREAM pbsout, pbsin; /* Bit-streams that used to contain data. */
        /* Encode string 'hello'. Caution that table is important for decoding. */
        if (NULL == (pbsin = treHuffmanEncoding(&ptbl, (const PUCHAR)pstr, 8)))
        {
                fprintf(stderr, "Error! Can not Encode.\n");
                return __LINE__;
        }
        if (NULL == (pbsout = treHuffmanDecoding(ptbl, pbsin)))
        {
                fprintf(stderr, "Error! Can not Decode.\n");
                return __LINE__;
        }
        /* Print hello. */
        printf("%s", (char *)pbsout->arrz.pdata);
        /* Don't forget to free bit-streams and symbol table after use. */
        strDeleteBitStream(pbsout);
        strDeleteBitStream(pbsin);
        strDeleteArrayZ(ptbl);
        return foo();
}

比特流pbsout中包含了97Bytes的数据,压缩后的sdata全局变量只包含52Bytes的数据。
但是因为数据量小还要加上全局变量stabl内包含的符号表所以体现不出压缩的优势。

0xAA55 发表于 2020-11-7 23:41:16

我还以为你自己实现了Huffman压缩算法呢

usr 发表于 2020-11-8 08:46:36

0xAA55 发表于 2020-11-7 23:41
我还以为你自己实现了Huffman压缩算法呢

霍夫曼算法在StoneValley库里面实现了,而且是开源的。要把实现代码贴出来吗?

watermelon 发表于 2020-11-8 08:57:15

new_starter 发表于 2020-11-8 08:46
霍夫曼算法在StoneValley库里面实现了,而且是开源的。要把实现代码贴出来吗? ...

站长说的是你自己实现的Huffman算法......
就像我这样:https://www.0xaa55.com/thread-16843-1-1.html
页: [1]
查看完整版本: 【C】使用霍夫曼树压缩数据