1 Star 0 Fork 0

Vantler/foremost

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
engine.c 15.92 KB
一键复制 编辑 原始数据 按行查看 历史
Jin 提交于 2016-02-05 09:21 +08:00 . first commit
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708
/* FOREMOST
*
* By Jesse Kornblum, Kris Kendall, & Nick Mikus
*
* This is a work of the US Government. In accordance with 17 USC 105,
* copyright protection is not available for any work of the US Government.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*
*/
#include "main.h"
int user_interrupt (f_state * s, f_info * i)
{
audit_msg(s, "Interrupt received at %s", current_time());
/* RBF - Write user_interrupt */
fclose(i->handle);
free(s);
free(i);
cleanup_output(s);
exit(-1);
return FALSE;
}
unsigned char *read_from_disk(u_int64_t offset, f_info *i, u_int64_t length)
{
u_int64_t bytesread = 0;
unsigned char *newbuf = (unsigned char *)malloc(length * sizeof(char));
if (!newbuf) {
fprintf(stderr, "Ran out of memory in read_from_disk()\n");
exit(1);
}
fseeko(i->handle, offset, SEEK_SET);
bytesread = fread(newbuf, 1, length, i->handle);
if (bytesread != length)
{
free(newbuf);
return NULL;
}
else
{
return newbuf;
}
}
/*
Perform a modified boyer-moore string search (w/ support for wildcards and case-insensitive searches)
and allows the starting position in the buffer to be manually set, which allows data to be skipped
*/
unsigned char *bm_search_skipn(unsigned char *needle, size_t needle_len, unsigned char *haystack,
size_t haystack_len, size_t table[UCHAR_MAX + 1], int casesensitive,
int searchtype, int start_pos)
{
register size_t shift = 0;
register size_t pos = start_pos;
unsigned char *here;
if (needle_len == 0)
return haystack;
if (searchtype == SEARCHTYPE_FORWARD || searchtype == SEARCHTYPE_FORWARD_NEXT)
{
while (pos < haystack_len)
{
while (pos < haystack_len && (shift = table[(unsigned char)haystack[pos]]) > 0)
{
pos += shift;
}
if (0 == shift)
{
here = (unsigned char *) &haystack[pos - needle_len + 1];
if (0 == memwildcardcmp(needle, here, needle_len, casesensitive))
{
return (here);
}
else
pos++;
}
}
return NULL;
}
else if (searchtype == SEARCHTYPE_REVERSE) //Run our search backwards
{
while (pos < haystack_len)
{
while
(
pos < haystack_len &&
(shift = table[(unsigned char)haystack[haystack_len - pos - 1]]) > 0
)
{
pos += shift;
}
if (0 == shift)
{
if (0 == memwildcardcmp(needle, here = (unsigned char *) &haystack[haystack_len - pos - 1],
needle_len, casesensitive))
{
return (here);
}
else
pos++;
}
}
return NULL;
}
return NULL;
}
/*
Perform a modified boyer-moore string search (w/ support for wildcards and case-insensitive searches)
and allows the starting position in the buffer to be manually set, which allows data to be skipped
*/
unsigned char *bm_search(unsigned char *needle, size_t needle_len, unsigned char *haystack,
size_t haystack_len, size_t table[UCHAR_MAX + 1], int case_sen,
int searchtype)
{
//printf("The needle2 is:\t");
//printx(needle,0,needle_len);
return bm_search_skipn(needle,
needle_len,
haystack,
haystack_len,
table,
case_sen,
searchtype,
needle_len - 1);
}
void setup_stream(f_state *s, f_info *i)
{
char buffer[MAX_STRING_LENGTH];
u_int64_t skip = (((u_int64_t) s->skip) * ((u_int64_t) s->block_size));
#ifdef DEBUG
printf("s->skip=%d s->block_size=%d total=%llu\n",
s->skip,
s->block_size,
(((u_int64_t) s->skip) * ((u_int64_t) s->block_size)));
#endif
i->bytes_read = 0;
i->total_megs = i->total_bytes / ONE_MEGABYTE;
if (i->total_bytes != 0)
{
audit_msg(s,
"Length: %s (%llu bytes)",
human_readable(i->total_bytes, buffer),
i->total_bytes);
}
else
audit_msg(s, "Length: Unknown");
if (s->skip != 0)
{
audit_msg(s, "Skipping: %s (%llu bytes)", human_readable(skip, buffer), skip);
fseeko(i->handle, skip, SEEK_SET);
if (i->total_bytes != 0)
i->total_bytes -= skip;
}
audit_msg(s, " ");
#ifdef __WIN32
i->last_read = 0;
i->overflow_count = 0;
#endif
}
void audit_layout(f_state *s)
{
audit_msg(s,
"Num\t %s (bs=%d)\t %10s\t %s\t %s \n",
"Name",
s->block_size,
"Size",
"File Offset",
"Comment");
}
void dumpInd(unsigned char *ind, int bs)
{
int i = 0;
printf("\n/*******************************/\n");
while (bs > 0)
{
if (i % 10 == 0)
printf("\n");
//printx(ind,0,10);
printf("%4u ", htoi(ind, FOREMOST_LITTLE_ENDIAN));
bs -= 4;
ind += 4;
i++;
}
printf("\n/*******************************/\n");
}
/********************************************************************************
*Function: ind_block
*Description: check if the block foundat is pointing to looks like an indirect
* block
*Return: TRUE/FALSE
**********************************************************************************/
int ind_block(unsigned char *foundat, u_int64_t buflen, int bs)
{
unsigned char *temp = foundat;
int jump = 12 * bs;
unsigned int block = 0;
unsigned int block2 = 0;
unsigned int dif = 0;
int i = 0;
unsigned int one = 1;
unsigned int numbers = (bs / 4) - 1;
//int reconstruct=FALSE;
/*Make sure we don't jump past the end of the buffer*/
if (buflen < jump + 16)
return FALSE;
while (i < numbers)
{
block = htoi(&temp[jump + (i * 4)], FOREMOST_LITTLE_ENDIAN);
if (block < 0)
return FALSE;
if (block == 0)
{
break;
}
i++;
block2 = htoi(&temp[jump + (i * 4)], FOREMOST_LITTLE_ENDIAN);
if (block2 < 0)
return FALSE;
if (block2 == 0)
{
break;
}
dif = block2 - block;
if (dif == one)
{
#ifdef DEBUG
printf("block1:=%u, block2:=%u dif=%u\n", block, block2, dif);
#endif
}
else
{
#ifdef DEBUG
printf("Failure, dif!=1\n");
printf("\tblock1:=%u, block2:=%u dif=%u\n", block, block2, dif);
#endif
return FALSE;
}
#ifdef DEBUG
printf("block1:=%u, block2:=%u dif=%u\n", block, block2, dif);
#endif
}
if (i == 0)
return FALSE;
/*Check if the rest of the bytes are zero'd out */
for (i = i + 1; i < numbers; i++)
{
block = htoi(&temp[jump + (i * 4)], FOREMOST_LITTLE_ENDIAN);
if (block != 0)
{
//printf("Failure, 0 test\n");
return FALSE;
}
}
return TRUE;
}
/********************************************************************************
*Function: search_chunk
*Description: Analyze the given chunk by running each defined search spec on it
*Return: TRUE/FALSE
**********************************************************************************/
int search_chunk(f_state *s, unsigned char *buf, f_info *i, u_int64_t chunk_size, u_int64_t f_offset)
{
u_int64_t c_offset = 0;
//u_int64_t foundat_off = 0;
//u_int64_t buf_off = 0;
unsigned char *foundat = buf;
unsigned char *current_pos = NULL;
unsigned char *header_pos = NULL;
unsigned char *newbuf = NULL;
unsigned char *ind_ptr = NULL;
u_int64_t current_buflen = chunk_size;
int tryBS[3] = { 4096, 1024, 512 };
unsigned char *extractbuf = NULL;
u_int64_t file_size = 0;
s_spec *needle = NULL;
int j = 0;
int bs = 0;
int rem = 0;
int x = 0;
int found_ind = FALSE;
off_t saveme;
//char comment[32];
for (j = 0; j < s->num_builtin; j++)
{
needle = &search_spec[j];
foundat = buf; /*reset the buffer for the next search spec*/
#ifdef DEBUG
printf(" SEARCHING FOR %s's\n", needle->suffix);
#endif
bs = 0;
current_buflen = chunk_size;
while (foundat)
{
needle->written = FALSE;
found_ind = FALSE;
memset(needle->comment, 0, COMMENT_LENGTH - 1);
if (chunk_size <= (foundat - buf)) {
#ifdef DEBUG
printf("avoided seg fault in search_chunk()\n");
#endif
foundat = NULL;
break;
}
current_buflen = chunk_size - (foundat - buf);
//if((foundat-buf)< 1 ) break;
#ifdef DEBUG
//foundat_off=foundat;
//buf_off=buf;
//printf("current buf:=%llu (foundat-buf)=%llu \n", current_buflen, (u_int64_t) (foundat_off - buf_off));
#endif
if (signal_caught == SIGTERM || signal_caught == SIGINT)
{
user_interrupt(s, i);
printf("Cleaning up.\n");
signal_caught = 0;
}
if (get_mode(s, mode_quick)) /*RUN QUICK SEARCH*/
{
#ifdef DEBUG
//printf("quick mode is on\n");
#endif
/*Check if we are not on a block head, adjust if so*/
rem = (foundat - buf) % s->block_size;
if (rem != 0)
{
foundat += (s->block_size - rem);
}
if (memwildcardcmp(needle->header, foundat, needle->header_len, needle->case_sen
) != 0)
{
/*No match, jump to the next block*/
if (current_buflen > s->block_size)
{
foundat += s->block_size;
continue;
}
else /*We are out of buffer lets go to the next search spec*/
{
foundat = NULL;
break;
}
}
header_pos = foundat;
}
else /**********RUN STANDARD SEARCH********************/
{
foundat = bm_search(needle->header,
needle->header_len,
foundat,
current_buflen, //How much to search through
needle->header_bm_table,
needle->case_sen, //casesensative
SEARCHTYPE_FORWARD);
header_pos = foundat;
}
if (foundat != NULL && foundat >= 0) /*We got something, run the appropriate heuristic to find the EOF*/
{
current_buflen = chunk_size - (foundat - buf);
if (get_mode(s, mode_ind_blk))
{
#ifdef DEBUG
printf("ind blk detection on\n");
#endif
//dumpInd(foundat+12*1024,1024);
for (x = 0; x < 3; x++)
{
bs = tryBS[x];
if (ind_block(foundat, current_buflen, bs))
{
if (get_mode(s, mode_verbose))
{
sprintf(needle->comment, " (IND BLK bs:=%d)", bs);
}
//dumpInd(foundat+12*bs,bs);
#ifdef DEBUG
printf("performing mem move\n");
#endif
if(current_buflen > 13 * bs)//Make sure we have enough buffer
{
if (!memmove(foundat + 12 * bs, foundat + 13 * bs, current_buflen - 13 * bs))
break;
found_ind = TRUE;
#ifdef DEBUG
printf("performing mem move complete\n");
#endif
ind_ptr = foundat + 12 * bs;
current_buflen -= bs;
chunk_size -= bs;
break;
}
}
}
}
c_offset = (foundat - buf);
current_pos = foundat;
/*Now lets analyze the file and see if we can determine its size*/
// printf("c_offset=%llu %x %x %llx\n", c_offset,foundat,buf,c_offset);
foundat = extract_file(s, c_offset, foundat, current_buflen, needle, f_offset);
#ifdef DEBUG
if (foundat == NULL)
{
printf("Foundat == NULL!!!\n");
}
#endif
if (get_mode(s, mode_write_all))
{
if (needle->written == FALSE)
{
/*write every header we find*/
if (current_buflen >= needle->max_len)
{
file_size = needle->max_len;
}
else
{
file_size = current_buflen;
}
sprintf(needle->comment, " (Header dump)");
extractbuf = (unsigned char *)malloc(file_size * sizeof(char));
memcpy(extractbuf, header_pos, file_size);
write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset);
free(extractbuf);
}
}
else if (!foundat) /*Should we search further?*/
{
/*We couldn't determine where the file ends, now lets check to see
* if we should try again
*/
if (current_buflen < needle->max_len) /*We need to bridge the gap*/
{
#ifdef DEBUG
printf(" Bridge the gap\n");
#endif
saveme = ftello(i->handle);
/*grow the buffer and try to extract again*/
newbuf = read_from_disk(c_offset + f_offset, i, needle->max_len);
if (newbuf == NULL)
break;
current_pos = extract_file(s,
c_offset,
newbuf,
needle->max_len,
needle,
f_offset);
/*Lets put the fp back*/
fseeko(i->handle, saveme, SEEK_SET);
free(newbuf);
}
else
{
foundat = header_pos; /*reset the foundat pointer to the location of the last header*/
foundat += needle->header_len + 1; /*jump past the header*/
}
}
}
if (found_ind)
{
/*Put the ind blk back in, re-arrange the buffer so that the future blks names come out correct*/
#ifdef DEBUG
printf("Replacing the ind block\n");
#endif
/*This is slow, should we do this??????*/
if (!memmove(ind_ptr + 1 * bs, ind_ptr, current_buflen - 13 * bs))
break;
memset(ind_ptr, 0, bs - 1);
chunk_size += bs;
memset(needle->comment, 0, COMMENT_LENGTH - 1);
}
} //end while
}
return TRUE;
}
/********************************************************************************
*Function: search_stream
*Description: Analyze the file by reading 1 chunk (default: 100MB) at a time and
*passing it to search_chunk
*Return: TRUE/FALSE
**********************************************************************************/
int search_stream(f_state *s, f_info *i)
{
u_int64_t bytesread = 0;
u_int64_t f_offset = 0;
u_int64_t chunk_size = ((u_int64_t) s->chunk_size) * MEGABYTE;
unsigned char *buf = (unsigned char *)malloc(sizeof(char) * chunk_size);
setup_stream(s, i);
audit_layout(s);
#ifdef DEBUG
printf("\n\t READING THE FILE INTO MEMORY\n");
#endif
while ((bytesread = fread(buf, 1, chunk_size, i->handle)) > 0)
{
if (signal_caught == SIGTERM || signal_caught == SIGINT)
{
user_interrupt(s, i);
printf("Cleaning up.\n");
signal_caught = 0;
}
#ifdef DEBUG
printf("\n\tbytes_read:=%llu\n", bytesread);
#endif
search_chunk(s, buf, i, bytesread, f_offset);
f_offset += bytesread;
if (!get_mode(s, mode_quiet))
{
fprintf(stderr, "*");
//displayPosition(s,i,f_offset);
}
/*FIX ME***
* We should jump back and make sure we didn't miss any headers that are
* bridged between chunks. What is the best way to do this?\
*/
}
if (!get_mode(s, mode_quiet))
{
fprintf(stderr, "|\n");
}
#ifdef DEBUG
printf("\n\tDONE READING bytes_read:=%llu\n", bytesread);
#endif
if (signal_caught == SIGTERM || signal_caught == SIGINT)
{
user_interrupt(s, i);
printf("Cleaning up.\n");
signal_caught = 0;
}
free(buf);
return FALSE;
}
void audit_start(f_state *s, f_info *i)
{
if (!get_mode(s, mode_quiet))
{
fprintf(stderr, "Processing: %s\n|", i->file_name);
}
audit_msg(s, FOREMOST_DIVIDER);
audit_msg(s, "File: %s", i->file_name);
audit_msg(s, "Start: %s", current_time());
}
void audit_finish(f_state *s, f_info *i)
{
audit_msg(s, "Finish: %s", current_time());
}
int process_file(f_state *s)
{
//printf("processing file\n");
f_info *i = (f_info *)malloc(sizeof(f_info));
char temp[PATH_MAX];
if ((realpath(s->input_file, temp)) == NULL)
{
print_error(s, s->input_file, strerror(errno));
return TRUE;
}
i->file_name = strdup(s->input_file);
i->is_stdin = FALSE;
audit_start(s, i);
// printf("opening file %s\n",i->file_name);
#if defined(__LINUX)
#ifdef DEBUG
printf("Using 64 bit fopen\n");
#endif
i->handle = fopen64(i->file_name, "rb");
#elif defined(__WIN32)
/*I would like to be able to read from
* physical devices in Windows, have played
* with different options to fopen and the
* dd src says you need write access on WinXP
* but nothing seems to work*/
i->handle = fopen(i->file_name, "rb");
#else
i->handle = fopen(i->file_name, "rb");
#endif
if (i->handle == NULL)
{
print_error(s, s->input_file, strerror(errno));
audit_msg(s, "Error: %s", strerror(errno));
return TRUE;
}
i->total_bytes = find_file_size(i->handle);
search_stream(s, i);
audit_finish(s, i);
fclose(i->handle);
free(i);
return FALSE;
}
int process_stdin(f_state *s)
{
f_info *i = (f_info *)malloc(sizeof(f_info));
i->file_name = strdup("stdin");
s->input_file = "stdin";
i->handle = stdin;
i->is_stdin = TRUE;
/* We can't compute the size of this stream, we just ignore it*/
i->total_bytes = 0;
audit_start(s, i);
search_stream(s, i);
free(i->file_name);
free(i);
return FALSE;
}
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
C
1
https://gitee.com/vantler/foremost.git
git@gitee.com:vantler/foremost.git
vantler
foremost
foremost
master

搜索帮助