3 Star 0 Fork 0

thor/zipzap

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
encoder.c 5.47 KB
一键复制 编辑 原始数据 按行查看 历史
thor 提交于 2014-06-01 13:25 +08:00 . opt
#include "encoder.h"
#include "defs.h"
#include "dict.h"
#define BUFSIZE 1024
#define SMALL_CW(cw) ((cw >> 7) == 0)
#define MAX_OFF 64
#define END_CODE 0
#define CLR_CODE 1
static int table_count = 0;
static int cw_max_bit = 0;
static int bit_change_count = 0;
static long long bytes_read = 0;
static long long bytes_write = 0;
static int hit_count[DICT_SIZE] = {};
static void enc_init();
static void enc_insert();
static void enc_finish();
static TrieNode *root;
static char *lookup_table[DICT_SIZE] = {};
static double compress_ratio = 1.0;
static void
enc_init()
{
if (verbose >= 1)
printf("refresh the dict\n");
root = trie_init();
table_count = 2;
trie_count = 2;
char d = 0;
int i;
for (i = 0; i < 256; i++) {
d = i;
enc_insert(root, &d, 1);
}
cw_max_bit = 9;
bit_change_count = (1 << cw_max_bit) + 1;
}
static void
enc_insert(TrieNode *node, char *data, int size)
{
if (table_count >= DICT_SIZE)
return;
if (size == 1)
trie_insert_c(node, *data);
else
trie_insert(node, data, size);
table_count++;
if (table_count == bit_change_count) {
cw_max_bit += 1;
bit_change_count = (1 << cw_max_bit) + 1;
}
}
static void
enc_insert_i(TrieNode *node, unsigned idx)
{
if (table_count >= DICT_SIZE)
return;
trie_insert_i(node, idx);
table_count++;
if (table_count == bit_change_count) {
cw_max_bit += 1;
bit_change_count = (1 << cw_max_bit) + 1;
}
}
static void
enc_finish()
{
trie_destroy(root);
}
//
// Write cw to file, but instead of write immediatly, write it to a buffer.
// If the buffer approaches its maximum size, write it to a file.
// If flush != 0, just flush contents in the buf to file.
//
static void
enc_writech(FILE *file, char ch, int flush)
{
static char buf[BUFSIZE];
static int ptr = 0;
bytes_write++;
if (flush) {
fwrite(buf, sizeof(char), ptr, file);
ptr = 0;
} else {
buf[ptr++] = ch;
if (ptr == BUFSIZE) {
fwrite(buf, sizeof(char), BUFSIZE, file);
ptr = 0;
}
}
}
//
// Write the lower len bit of byte to file
//
static void
enc_writebit(FILE *file, int data, int len, int flush)
{
static int bptr = 0;
static char buf = 0;
if (flush)
enc_writech(file, buf, 1);
int to_write = MIN(8-bptr, len);
char mask = (1<<to_write)-1;
buf |= (mask & data) << bptr;
bptr += to_write;
if (bptr == 8) {
enc_writech(file, buf, 0);
bptr = 0;
buf = 0;
}
// Not finished yet
if (to_write < len) {
data >>= to_write;
enc_writebit(file, data, len - to_write, flush);
}
}
//
// Write cw according to its length - variable coding.
//
static void
enc_writecw(FILE *file, int cw)
{
static int CW[MAX_OFF] = {};
static int cw_buf_ptr = 0;
static int cw_buf_full = 0;
char c = 0;
int i = 0;
hit_count[cw] += 1;
if (verbose == 1) {
printf("cw: %d\t\twidth: %d\tdictsize: %d\n", cw, cw_max_bit, table_count);
}
enc_writebit(file, cw, cw_max_bit, 0);
return;
// Index use 6 bit.
if (cw >> 6 == 0) {
// Start with 10
enc_writebit(file, 0x2, 2, 0);
enc_writebit(file, cw, 6, 0);
} else {
// Start with 0
int found = 0;
for (i = cw_buf_ptr-1; i >= MAX(0, i - MAX_OFF); i--) {
if (CW[i % MAX_OFF] == cw) {
found = 1;
break;
}
}
// found = 0;
// This cw is repeated
if (found) {
// Start with 11
int off = cw_buf_ptr - i;
enc_writebit(file, 0x11, 2, 0);
enc_writebit(file, c, 6, 0);
} else {
// Start with 0
enc_writebit(file, 0, 1, 0);
enc_writebit(file, cw, cw_max_bit, 0);
CW[cw_buf_ptr % MAX_OFF] = cw;
cw_buf_ptr++;
if (!cw_buf_full && cw_buf_ptr == MAX_OFF)
cw_buf_full = 1;
if (cw_buf_full)
cw_buf_ptr = cw_buf_ptr % MAX_OFF + MAX_OFF;
}
}
}
static int
enc_readbyte(FILE *file, char *result)
{
static char buf[BUFSIZE];
// ptr indicates buffer is not fresh, need to update
static int ptr = -1;
static int end = BUFSIZE;
if (ptr < 0) {
end = fread(buf, 1, BUFSIZE, file);
if (end == 0)
return -1;
ptr = 0;
}
// printf("2 %d %p\n", ptr, result);
*result = buf[ptr++];
bytes_read++;
if (ptr == end)
ptr = -1;
return 0;
}
//
// Read a file, encode it, and output.
// RETURNS
// 0 on success.
// < 0 on error.
//
int
encode(const char *inputfile, const char *outputfile)
{
FILE *infile = fopen(inputfile, "r");
FILE *outfile = fopen(outputfile, "wb");
TrieNode *node;
TrieNode *prev = NULL;
char P[BUFSIZE+1] = {};
char C;
int Pp;
enc_init();
enc_readbyte(infile, &C);
P[0] = C;
Pp = 1;
unsigned idx = C & 0xff;
prev = (TrieNode *)root->next[idx];
while (1) {
int r = enc_readbyte(infile, &C);
unsigned idx = C & 0xff;
// EOF
if (r < 0) {
enc_writecw(outfile, prev->num);
break;
} else {
P[Pp] = C;
node = (TrieNode *)prev->next[idx];
if (node == NULL) {
enc_writecw(outfile, prev->num);
enc_insert(prev, &C, 1);
if (table_count >= DICT_SIZE) {
if ((double)bytes_write/bytes_read > 0.7) {
enc_writecw(outfile, CLR_CODE);
enc_finish();
enc_init();
}
}
P[0] = C;
Pp = 1;
prev = (TrieNode *)root->next[idx];
} else {
if (Pp == BUFSIZE) {
enc_writecw(outfile, prev->num);
P[0] = C;
Pp = 1;
prev = (TrieNode *)root->next[idx];
} else {
Pp++;
prev = node;
}
}
}
}
// Flush the string and output EOF
enc_writecw(outfile, END_CODE);
enc_writebit(outfile, 0, 0, -1);
enc_finish();
fclose(infile);
fclose(outfile);
printf("original size %f (KB)\n", bytes_read / 1024.0);
printf("compress to %f (KB)\n", bytes_write / 1024.0);
printf("compress ratio: %f\n", (double)bytes_write/bytes_read);
return 0;
}
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
C
1
https://gitee.com/thor/zipzap.git
git@gitee.com:thor/zipzap.git
thor
zipzap
zipzap
master

搜索帮助