Ai
1 Star 0 Fork 0

收拾旧河山/ extmem-c-database-query-processing

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
main.c 34.29 KB
一键复制 编辑 原始数据 按行查看 历史
收拾旧河山 提交于 2022-12-26 20:38 +08:00 . done
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934
# include <stdio.h>
# include <stdlib.h>
# include <string.h>
# include <stdbool.h>
# include "extmem.h"
#define tuples_per_blk 7 // 每个块存放的元组个数
#define R_blk_begin 1 // R关系元组数据存放的起始磁盘块号
#define R_blk_size 16 // R关系元组数据存储占用的磁盘块数
#define S_blk_begin 17 // S关系元组数据存放的起始磁盘块号
#define S_blk_size 32 // S关系元组数据存储占用的磁盘块数
#define R_tuple_num (R_blk_size * tuples_per_blk) // R关系元组数据个数
#define S_tuple_num (S_blk_size * tuples_per_blk) // S关系元组数据个数
#define buf_size 520 // 缓冲区中的缓存块总数
#define blk_size 64 // 缓存块大小
#define blks_per_set 6 // TPMMS每个集合最大块数
/*--------------------------------------------------------------------------for debug begin--------------------------------------------------------------------------*/
// 打印一个数组
int print_array(int* arr, int len){
printf("arr: ");
for(int i=0; i<=len-1; ++i){
printf("%d ",arr[i]);
}
printf("\n");
}
/*--------------------------------------------------------------------------for debug end--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------utils begin--------------------------------------------------------------------------*/
// 获取当前块的后继块号地址
int getNextAddr(unsigned char *blkPtr) {
char str[5];
memcpy(str, blkPtr + 7 * 8, 4);
return atoi(str);
}
// 获得块内第offset个元组
void read_tuple_from_blk(unsigned char *blkPtr, int off, int*t1, int* t2){
char str[5];
memcpy(str, blkPtr+off*8, 4);
*t1 = atoi(str);
memcpy(str, blkPtr+off*8+4, 4);
*t2 = atoi(str);
}
// 将元组写入块内第offset个位置
void write_tuple_into_blk(unsigned char *blkPtr, int off, int t1, int t2){
sprintf(blkPtr + off * 8, "%d", t1);
sprintf(blkPtr + off * 8 + 4, "%d", t2);
}
// 解析一个块 其内容存入元组数组
void read_tuples_from_blk(unsigned char *blkPtr, int* t1, int* t2) {
char str[5];
for(int i=0; i<tuples_per_blk; ++i){
read_tuple_from_blk(blkPtr, i, &t1[i], &t2[i]);
}
}
// 交换两个变量的值
void swap(int* a, int* b){
int temp;
temp=*a;
*a=*b;
*b=temp;
}
// 对元组数组进行升序排序 返回升序序列中的排列顺序
// order[i] = j 表示排序后数组的第j个元素在原数组中排第i个
void sort_tuples(int* t1, int* order, int length)
{
for(int i=0; i<length; ++i){
order[i] = i;
}
for(int j=0; j<length; j++){
for(int i=0; i<length-1-j; i++)
if(t1[i]>t1[i+1])
{
swap(&t1[i], &t1[i+1]);
swap(&order[i], &order[i+1]);
}
}
}
/*--------------------------------------------------------------------------utils end--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------core algorithm begin--------------------------------------------------------------------------*/
/**
* 在关系S中基于线性搜索的关系选择算法
*
* target:待寻找的目标值S.C
* did: 结果要存回的磁盘块号的起始地址
*
* return: 找到的元组个数
*/
int linear_search_relation_select(int target, int did) {
// 初始化缓冲区
Buffer buf;
if (!initBuffer(buf_size, blk_size, &buf))
{
perror("Buffer Initialization Failed!\n");
return -1;
}
// 初始化读写的缓存块
unsigned char *read_blk = getNewBlockInBuffer(&buf);
unsigned char *write_blk = getNewBlockInBuffer(&buf);
int find_cnt = 0; // 遍历过的块数量
int match_cnt = 0; // 匹配的元组数量
int write_cnt = 0; // 结果写回磁盘的数据块数
int read_addr = S_blk_begin; // 当前的read_blk块地址
int write_addr = did; // 当前的write_blk块地址
int Sc, Sd; // S属性的两个字段
// 逐块查找
while (++find_cnt <= S_blk_size){
printf("Read data block %d\n", read_addr);
// 读取当前块
if ((read_blk = readBlockFromDisk(read_addr, &buf)) == NULL){
perror("Reading Block Failed!\n");
return -1;
}
// 遍历块内元组
for(int i=0; i<tuples_per_blk; ++i){
// 读入当前元组的S.C、S.D
read_tuple_from_blk(read_blk, i, &Sc, &Sd);
// 匹配 则将其写入写回块
if(Sc == target){
printf("(X=%d, Y=%d)\n", Sc, Sd);
// 把这个元组写入writr_blk的offset位置
int offset = match_cnt % tuples_per_blk;
write_tuple_into_blk(write_blk, offset, Sc, Sd);
// 如果write_blk内已经存满了7个元组 此时应该将其写回磁盘了
if(++match_cnt % tuples_per_blk == 0){
// 在块最后写入下一块的地址
sprintf(write_blk + 7 * 8, "%d", write_addr+1);
// 将块写回磁盘
if (writeBlockToDisk(write_blk, write_addr, &buf) != 0)
{
perror("Writing Block Failed!\n");
return -1;
}
++write_addr;
// 将缓冲区写入磁盘后 会自动释放掉write_blk 因此要重新获取一次 并刷新一下块
write_blk = getNewBlockInBuffer(&buf);
memset(write_blk, 0, 64);
}
}
}
// 获取下一个块地址并释放当前块
read_addr = getNextAddr(read_blk);
freeBlockInBuffer(read_blk, &buf);
}
// 将当前write_blk中的剩余的元组写回磁盘
if(match_cnt % tuples_per_blk != 0 ){
// 在块最后写入下一块的地址
sprintf(write_blk + 7 * 8, "%d", write_addr + 1);
if (writeBlockToDisk(write_blk, write_addr, &buf) != 0)
{
perror("Writing Block Failed!\n");
return -1;
}
}
printf("Results written to disk: ");
for(int i=did; i<=write_addr; ++i){
printf("%d ", i);
}
printf("\n");
printf("The number of matched tuples is %d\n\n", match_cnt);
printf("The number of IO is %ld.\n", buf.numIO);
// 释放磁盘块
freeBlockInBuffer(write_blk, &buf);
freeBuffer(&buf);
return 0;
}
/**
* 两阶段多路归并排序算法(TPMMS)
*
* read_addr: 待排序关系的起始块地址
* read_size: 待排序关系的块数
* write_size: 排序结果写回块起始地址
*
* return: 排序成功返回0 否则返回-1
*/
int two_phase_multiway_merge_sort(int read_addr, int read_size, int write_addr){
// 初始化缓冲区
Buffer buf;
if (!initBuffer(buf_size, blk_size, &buf))
{
perror("Buffer Initialization Failed!\n");
return -1;
}
// 初始化读写的缓存块
unsigned char *write_blk = getNewBlockInBuffer(&buf);
unsigned char *read_blk[blks_per_set];
int rest_blk_cnt = read_size; // 剩余待排序块数
write_addr += 100; // 外排序结果存在位置 100偏移值只是为了方便
/*------------内排序------------*/
// printf("\n\ninner sort start!\n");
while(rest_blk_cnt > 0){
// 获取本轮需要排序的块数
int cur_blk_cnt = (rest_blk_cnt > blks_per_set ? blks_per_set : rest_blk_cnt );
// 读取本轮每一个待排序块 并将每个块内元组排序好
for(int i=0; i<cur_blk_cnt; ++i){
if ((read_blk[i] = readBlockFromDisk(read_addr, &buf)) == NULL){
perror("Reading Block Failed!\n");
return -1;
}
// 将块数据解析到元组数组
int t1[tuples_per_blk];
int t2[tuples_per_blk];
read_tuples_from_blk(read_blk[i], t1, t2);
// 升序排序元组并获得顺序
int order[tuples_per_blk];
sort_tuples(t1, order, tuples_per_blk);
// 将排序结果写回这个read块
for(int j=0; j<tuples_per_blk; ++j){
write_tuple_into_blk(read_blk[i], j, t1[j], t2[order[j]]);
}
read_addr = getNextAddr(read_blk[i]);
}
// 多路归并排序中每一路(每个块)的指针
int way_ptr[cur_blk_cnt];
memset(way_ptr, 0, sizeof(int) * cur_blk_cnt);
int rest_tuple_cnt = cur_blk_cnt * tuples_per_blk; // 记录剩余未归并的元组数
int write_offset = 0; // 记录write_blk块的下一个空闲位置
// 开始多路归并内排序
while(rest_tuple_cnt > 0){
int min_tuple_val = 1000; // 当前元组索引字段的最小值
int min_tuple_idx = -1; // 最小元组的块索引
int t1, t2; // 存放当前元组的临时变量
// 遍历找最小值
for(int i=0; i<cur_blk_cnt; ++i){
if(way_ptr[i] >= tuples_per_blk){
continue;
}
read_tuple_from_blk(read_blk[i], way_ptr[i], &t1, &t2);
if(t1 < min_tuple_val){
min_tuple_val = t1;
min_tuple_idx = i;
}
}
// 把最小元组值写进write_blk
read_tuple_from_blk(read_blk[min_tuple_idx], way_ptr[min_tuple_idx], &t1, &t2);
write_tuple_into_blk(write_blk, write_offset, t1, t2);
// printf("inner sort- minimum tuple value of this round: (%d, %d)\n", t1, t2);
// 如果write_blk写满了则将其写入磁盘
if(++write_offset >= tuples_per_blk){
sprintf(write_blk + 7 * 8, "%d", write_addr + 1);
if (writeBlockToDisk(write_blk, write_addr++, &buf) != 0){
perror("Writing Block Failed!\n");
return -1;
}
// printf("inner sort- write into disk block: %d\n", write_addr-1);
write_offset = 0;
write_blk = getNewBlockInBuffer(&buf);
}
--rest_tuple_cnt;
way_ptr[min_tuple_idx] += 1;
}
// 释放本轮所有块
for(int i=0; i<cur_blk_cnt; ++i){
freeBlockInBuffer(read_blk[i], &buf);
}
// 更新剩余块计数
rest_blk_cnt -= blks_per_set;
// printf("next round.\n\n");
}
// printf("inner sort finished : %d, %d\n", read_addr, write_addr);
// 外排序 例如关系R: 16个块 每个子集6个块 一共3个子集
rest_blk_cnt = read_size; // 记录当前未排好序的块数
int set_num = read_size / blks_per_set + 1; // 统计子集的数目
unsigned char *set_blk[set_num]; // 每个子集当前读到的块
int last_set_blks = read_size % blks_per_set; // 最后一个子集的块数
int start_addr = write_addr - read_size; // 内排序结果存放在磁盘的起始位置
write_addr = start_addr - 100; // 外排序结果存放在磁盘的起始位置
int blk_ptr[set_num]; // 每个子集下一个待读取块的地址
int tuple_ptr[set_num]; // 指向每个子集当前块中下一个要进行比较的元组的位置
int write_offset = 0; // 记录write_blk块的下一个空闲位置
int blk_finished[set_num]; // 记录每个子集的块是否读完了
int all_finished[set_num]; // 记录每个子集的所有元组是否归并完了
int blk_init[set_num]; // 各个块是否初始化
memset(blk_finished , 0, sizeof(int)*set_num);
memset(all_finished , 0, sizeof(int)*set_num);
memset(blk_init , 0, sizeof(int)*set_num);
for(int i=0; i<set_num; ++i){
blk_ptr[i] = i * blks_per_set + start_addr;
tuple_ptr[i] = tuples_per_blk;
}
// printf("\n\nexternal sort start!\n");
while (rest_blk_cnt > 0){
// 当子集的tuple_ptr为0时,需要为这个子集读入下一个块
for(int i=0; i<set_num; ++i){
// 当前子集已读取过的块数
int blk_num = blk_ptr[i] - ( i * blks_per_set + start_addr);
// 如果当前子集的所有块都被读完了则跳过
if(i == set_num - 1 && blk_num >= last_set_blks){
blk_finished[i] = 1;
continue;
}
else if(blk_num >= blks_per_set){
blk_finished[i] = 1;
continue;
}
if(tuple_ptr[i] == tuples_per_blk){
if(blk_init[i]){
freeBlockInBuffer(set_blk[i], &buf);
}
// 读入这个块并更新块指针
if ((set_blk[i] = readBlockFromDisk(blk_ptr[i]++, &buf)) == NULL){
perror("Reading Block Failed!\n");
return -1;
}
blk_init[i] = 1;
tuple_ptr[i] = 0;
// printf("external sort- read the disk block: %d\n", blk_ptr[i]-1);
}
}
int min_tuple_val = 1000; // 当前元组索引字段的最小值
int min_tuple_idx = -1; // 最小元组的块索引
int t1, t2; // 存放当前元组的临时变量
// 遍历找最小值
for(int i=0; i<set_num; ++i){
// 跳过已经归并结束的子集
if(all_finished[i]){
continue;
}
read_tuple_from_blk(set_blk[i], tuple_ptr[i], &t1, &t2);
if(t1 < min_tuple_val){
min_tuple_val = t1;
min_tuple_idx = i;
}
}
// 把最小元组值写进write_blk
read_tuple_from_blk(set_blk[min_tuple_idx], tuple_ptr[min_tuple_idx], &t1, &t2);
write_tuple_into_blk(write_blk, write_offset, t1, t2);
// printf("external sort- minimum tuple value of this round: (%d, %d)\n", t1, t2);
if(++tuple_ptr[min_tuple_idx] >= tuples_per_blk){
// 如果一个子集的所有块都读完了, 且这个子集当前块也读完了 说明全读完了
if(blk_finished[min_tuple_idx]){
all_finished[min_tuple_idx] = 1;
}
}
// 如果write_blk写满了则将其写入磁盘
if(++write_offset >= tuples_per_blk){
sprintf(write_blk + 7 * 8, "%d", write_addr + 1);
if (writeBlockToDisk(write_blk, write_addr++, &buf) != 0){
perror("Writing Block Failed!\n");
return -1;
}
printf("external sort- write data into disk block: %d\n", write_addr-1);
write_offset = 0;
write_blk = getNewBlockInBuffer(&buf);
--rest_blk_cnt;
}
}
// printf("\n\nexternal sort finished!\n");
freeBuffer(&buf);
return 0;
}
/**
* 为升序序列关系创建索引文件
*
* read_addr: 关系的起始块地址
* read_size: 关系的块数
* write_size: 索引文件结果写回块起始地址
*
* return: 返回0为成功 -1为失败
*/
int create_index(int read_addr, int read_size, int write_addr){
// 初始化缓冲区
Buffer buf;
if (!initBuffer(buf_size, blk_size, &buf))
{
perror("Buffer Initialization Failed!\n");
return -1;
}
// 初始化读写的缓存块
unsigned char *read_blk;
unsigned char *write_blk = getNewBlockInBuffer(&buf);
int find_cnt = 0; // 遍历过的块数量
int write_offset = 0; // write_blk中下一个待写的位置
for(int i=0; i<read_size; ++i){
// 读取当前块
if ((read_blk = readBlockFromDisk(read_addr, &buf)) == NULL){
perror("Reading Block Failed!\n");
return -1;
}
int t1,t2;
// 读取块的最后一个元组 将索引字段+块地址写入write_blk
read_tuple_from_blk(read_blk, 6, &t1, &t2);
write_tuple_into_blk(write_blk, write_offset, t1, read_addr);
printf("index %d: (%d, %d)\n", i+1, t1, read_addr);
// 块满了就写回磁盘
if(++write_offset >= tuples_per_blk){
sprintf(write_blk + 7 * 8, "%d", write_addr + 1);
if (writeBlockToDisk(write_blk, write_addr++, &buf) != 0)
{
perror("Writing Block Failed!\n");
return -1;
}
write_blk = getNewBlockInBuffer(&buf);
printf("create index - write data into block: %d\n", write_addr - 1);
write_offset = 0;
}
// 获取下一个块的地址
read_addr = getNextAddr(read_blk);
freeBlockInBuffer(read_blk, &buf);
}
// 将write_blk最后剩下的也写入磁盘
sprintf(write_blk + 7 * 8, "%d", write_addr + 1);
if (writeBlockToDisk(write_blk, write_addr++, &buf) != 0)
{
perror("Writing Block Failed!\n");
return -1;
}
printf("create index - write data into block: %d\n", write_addr - 1);
return 0;
}
/**
* 在关系S中基于索引的关系选择算法
*
* target:待寻找的目标值S.C
* did: 结果要存回的磁盘块号的起始地址
* idx_addr: 索引文件起始地址
*
* return: 找到的元组个数
*/
int indexed_search_relation_select(int target, int did, int idx_addr){
// 初始化缓冲区
Buffer buf;
if (!initBuffer(buf_size, blk_size, &buf))
{
perror("Buffer Initialization Failed!\n");
return -1;
}
// 初始化读写的缓存块
unsigned char *read_blk;
unsigned char *write_blk = getNewBlockInBuffer(&buf);
int idx_cnt = 0; // 遍历过的索引块数量
int match_cnt = 0; // 匹配的元组数量
int write_cnt = 0; // 结果写回磁盘的数据块数
int read_addr = idx_addr; // 当前的read_blk块地址
int write_addr = did; // 当前的write_blk块地址
int Sc, Sd; // S属性的两个字段
int idx_size = S_blk_size / tuples_per_blk + 1;
for(int i=0; i<idx_size; ++i){
printf("Read index block %d\n", read_addr);
// 读取当前块
if ((read_blk = readBlockFromDisk(read_addr, &buf)) == NULL){
perror("Reading Block Failed!\n");
return -1;
}
// 把当前索引块的数据读出来
int t1[tuples_per_blk];
int t2[tuples_per_blk];
read_tuples_from_blk(read_blk, t1, t2);
// 不处理找不到的情况!!
for(int j=0; j<tuples_per_blk; ++j){
if(t1[j] >= target){
read_addr = t2[j];
freeBlockInBuffer(read_blk, &buf);
// 一直找到不匹配的为止
while (true){
printf("Read data block %d\n", read_addr);
// 读取当前块
if ((read_blk = readBlockFromDisk(read_addr, &buf)) == NULL){
perror("Reading Block Failed!\n");
return -1;
}
// 遍历块内元组
for(int k=0; k<tuples_per_blk; ++k){
// 读入当前元组的S.C、S.D
read_tuple_from_blk(read_blk, k, &Sc, &Sd);
// 匹配 则将其写入写回块
if(Sc == target){
printf("(X=%d, Y=%d)\n", Sc, Sd);
// 把这个元组写入writr_blk的offset位置
int offset = match_cnt % tuples_per_blk;
write_tuple_into_blk(write_blk, offset, Sc, Sd);
// 如果write_blk内已经存满了7个元组 此时应该将其写回磁盘了
if(++match_cnt % tuples_per_blk == 0){
// 在块最后写入下一块的地址
sprintf(write_blk + 7 * 8, "%d", write_addr+1);
// 将块写回磁盘
if (writeBlockToDisk(write_blk, write_addr, &buf) != 0)
{
perror("Writing Block Failed!\n");
return -1;
}
++write_addr;
// 将缓冲区写入磁盘后 会自动释放掉write_blk 因此要重新获取一次 并刷新一下块
write_blk = getNewBlockInBuffer(&buf);
memset(write_blk, 0, 64);
}
}
else{
// 将当前write_blk中的剩余的元组写回磁盘
if(match_cnt % tuples_per_blk != 0 ){
// 在块最后写入下一块的地址
sprintf(write_blk + 7 * 8, "%d", write_addr + 1);
if (writeBlockToDisk(write_blk, write_addr, &buf) != 0)
{
perror("Writing Block Failed!\n");
return -1;
}
}
printf("Results written to disk: ");
for(int l=did; l<=write_addr; ++l){
printf("%d ", l);
}
printf("\n");
printf("The number of matched tuples is %d\n\n", match_cnt);
printf("The number of IO is %ld.\n", buf.numIO);
// 释放磁盘块
freeBlockInBuffer(write_blk, &buf);
freeBlockInBuffer(read_blk, &buf);
freeBuffer(&buf);
return 0;
}
}
read_addr = getNextAddr(read_blk);
freeBlockInBuffer(read_blk, &buf);
}
}
}
read_addr = getNextAddr(read_blk);
freeBlockInBuffer(read_blk, &buf);
}
return -1;
}
/**
* 基于排序的连接算法 select S.C, S.D, R.A, R.B from S inner join R on S.C = R.A
*
* write_addr: 结果存放在的磁盘块位置
*
* return: -1为失败 0为成功
*/
int sort_merge_join(int write_addr){
// 初始化缓冲区
Buffer buf;
if (!initBuffer(buf_size, blk_size, &buf))
{
perror("Buffer Initialization Failed!\n");
return -1;
}
// 初始化读写的缓存块
unsigned char *R_read_blk;
unsigned char *S_read_blk;
unsigned char *write_blk = getNewBlockInBuffer(&buf);
if ((R_read_blk = readBlockFromDisk(200, &buf)) == NULL){
perror("Reading Block Failed!\n");
return -1;
}
if ((S_read_blk = readBlockFromDisk(216, &buf)) == NULL){
perror("Reading Block Failed!\n");
return -1;
}
// printf("Read data block %d\n", 200);
// printf("Read data block %d\n", 216);
int R_tuple_ptr = 0;
int S_tuple_ptr = 0;
int R_blk_ptr = 200;
int S_blk_ptr = 216;
int write_offset = 0;
int match_flag = 0;
int temp_ptr = 0;
int R_is_over = 0;
int join_cnt = 0;
while (true) {
R_blk_ptr = 200 + (R_tuple_ptr / tuples_per_blk);
S_blk_ptr = 216 + (S_tuple_ptr / tuples_per_blk);
// 如果当前内存块不包含指定的元组号 则将新的块替换进内存
if(R_blk_ptr != (getNextAddr(R_read_blk) - 1)){
freeBlockInBuffer(R_read_blk, &buf);
if ((R_read_blk = readBlockFromDisk(R_blk_ptr, &buf)) == NULL){
perror("Reading R Block Failed!\n");
return -1;
}
// printf("Read data block %d\n", R_blk_ptr);
}
if(S_blk_ptr != (getNextAddr(S_read_blk) - 1)){
freeBlockInBuffer(S_read_blk, &buf);
if ((S_read_blk = readBlockFromDisk(S_blk_ptr, &buf)) == NULL){
perror("Reading S Block Failed!\n");
return -1;
}
// printf("Read data block %d\n", S_blk_ptr);
}
int R_blk_offset = R_tuple_ptr % tuples_per_blk;
int S_blk_offset = S_tuple_ptr % tuples_per_blk;
int Sc,Sd,Ra,Rb;
read_tuple_from_blk(R_read_blk, R_blk_offset, &Ra, &Rb);
read_tuple_from_blk(S_read_blk, S_blk_offset, &Sc, &Sd);
// 特殊处理出口临界情况
if(R_blk_ptr > 215){
Ra = 999;
}
// 此后都不会发生匹配了
if(Sc > 140){
// 临界情况处理 最后回溯时会多匹配一次 将那次记录抹去即可
write_tuple_into_blk(write_blk, 4, 0, 0);
write_tuple_into_blk(write_blk, 5, 0, 0);
sprintf(write_blk + 7 * 8, "%d", write_addr + 1);
if (writeBlockToDisk(write_blk, write_addr++, &buf) != 0)
{
perror("Writing Block Failed!\n");
return -1;
}
write_blk = getNewBlockInBuffer(&buf);
printf("write data into block: %d\n", write_addr - 1);
write_offset = 0;
printf("\n\nThe number of joining is %d\n", join_cnt);
printf("The number of IO is %ld\n", buf.numIO);
return 0;
}
if(Sc > Ra){
++R_tuple_ptr;
}
else if(Sc < Ra){
// 回溯
if(match_flag){
R_tuple_ptr = temp_ptr;
match_flag = 0;
}
++S_tuple_ptr;
}
else{
write_tuple_into_blk(write_blk, write_offset++, Sc, Sd);
write_tuple_into_blk(write_blk, write_offset++, Ra, Rb);
printf("(Sc=%d, Sd=%d, Ra=%d, Rb=%d)\n", Sc, Sd, Ra, Rb);
++join_cnt;
if(!match_flag){
temp_ptr = R_tuple_ptr;
}
++R_tuple_ptr;
match_flag = 1;
}
// 读满了一个块则写入磁盘
if(write_offset >= 6){
sprintf(write_blk + 7 * 8, "%d", write_addr + 1);
if (writeBlockToDisk(write_blk, write_addr++, &buf) != 0)
{
perror("Writing Block Failed!\n");
return -1;
}
write_blk = getNewBlockInBuffer(&buf);
printf("write data into block: %d\n", write_addr - 1);
write_offset = 0;
}
}
}
/**
* 基于排序的集合交运算 select S.C, S.D, R.A, R.B from S inner join R on S.C = R.A
*
* write_addr: 结果存放在的磁盘块位置
*
* return: -1为失败 0为成功
*/
int sort_merge_intersect(int write_addr){
// 初始化缓冲区
Buffer buf;
if (!initBuffer(buf_size, blk_size, &buf))
{
perror("Buffer Initialization Failed!\n");
return -1;
}
// 初始化读写的缓存块
unsigned char *R_read_blk;
unsigned char *S_read_blk;
unsigned char *write_blk = getNewBlockInBuffer(&buf);
if ((R_read_blk = readBlockFromDisk(200, &buf)) == NULL){
perror("Reading Block Failed!\n");
return -1;
}
if ((S_read_blk = readBlockFromDisk(216, &buf)) == NULL){
perror("Reading Block Failed!\n");
return -1;
}
// printf("Read data block %d\n", 200);
// printf("Read data block %d\n", 216);
int R_tuple_ptr = 0;
int S_tuple_ptr = 0;
int R_blk_ptr = 200;
int S_blk_ptr = 216;
int write_offset = 0;
int match_flag = 0;
int temp_ptr = 0;
int R_is_over = 0;
int intersect_cnt = 0;
while (true) {
R_blk_ptr = 200 + (R_tuple_ptr / tuples_per_blk);
S_blk_ptr = 216 + (S_tuple_ptr / tuples_per_blk);
// 如果当前内存块不包含指定的元组号 则将新的块替换进内存
if(R_blk_ptr != (getNextAddr(R_read_blk) - 1)){
freeBlockInBuffer(R_read_blk, &buf);
if ((R_read_blk = readBlockFromDisk(R_blk_ptr, &buf)) == NULL){
perror("Reading R Block Failed!\n");
return -1;
}
// printf("Read data block %d\n", R_blk_ptr);
}
if(S_blk_ptr != (getNextAddr(S_read_blk) - 1)){
freeBlockInBuffer(S_read_blk, &buf);
if ((S_read_blk = readBlockFromDisk(S_blk_ptr, &buf)) == NULL){
perror("Reading S Block Failed!\n");
return -1;
}
// printf("Read data block %d\n", S_blk_ptr);
}
int R_blk_offset = R_tuple_ptr % tuples_per_blk;
int S_blk_offset = S_tuple_ptr % tuples_per_blk;
int Sc,Sd,Ra,Rb;
read_tuple_from_blk(R_read_blk, R_blk_offset, &Ra, &Rb);
read_tuple_from_blk(S_read_blk, S_blk_offset, &Sc, &Sd);
// 特殊处理出口临界情况
if(R_blk_ptr > 215){
Ra = 999;
}
// 此后都不会发生匹配了
if(Sc > 140){
sprintf(write_blk + 7 * 8, "%d", write_addr + 1);
if (writeBlockToDisk(write_blk, write_addr++, &buf) != 0)
{
perror("Writing Block Failed!\n");
return -1;
}
write_blk = getNewBlockInBuffer(&buf);
printf("write data into block: %d\n", write_addr - 1);
write_offset = 0;
printf("\n\nThe intersection of S and R has %d tuples.\n", intersect_cnt);
printf("The number of IO is %ld\n", buf.numIO);
return 0;
}
if(Sc < Ra){
++S_tuple_ptr;
if(match_flag){
R_tuple_ptr = temp_ptr;
match_flag = 0;
}
}
else if(Sc > Ra){
++R_tuple_ptr;
}
else if(Sc == Ra){
if(!match_flag){
temp_ptr = R_tuple_ptr;
}
match_flag = 1;
if(Sd == Rb){
write_tuple_into_blk(write_blk, write_offset++, Sc, Sd);
printf("(X=%d, Y=%d)\n",Sc,Sd);
++intersect_cnt;
// ++S_tuple_ptr;
}
++R_tuple_ptr;
}
// 读满了一个块则写入磁盘
if(write_offset >= tuples_per_blk){
sprintf(write_blk + 7 * 8, "%d", write_addr + 1);
if (writeBlockToDisk(write_blk, write_addr++, &buf) != 0)
{
perror("Writing Block Failed!\n");
return -1;
}
write_blk = getNewBlockInBuffer(&buf);
printf("write data into block: %d\n", write_addr - 1);
write_offset = 0;
}
}
}
/*--------------------------------------------------------------------------core algorithm end--------------------------------------------------------------------------*/
int main() {
/*--------------------------------------------------------------------------task 1--------------------------------------------------------------------------*/
printf("----------------------------------------------------------------\n");
printf("Selection algorithm based on linear search S.C=128:\n");
printf("----------------------------------------------------------------\n");
linear_search_relation_select(128, 100);
/*--------------------------------------------------------------------------task 2--------------------------------------------------------------------------*/
printf("\n\n----------------------------------------------------------------\n");
printf("Two Phase Multiway Merge Sort:\n");
printf("----------------------------------------------------------------\n");
printf("\nSort R\n");
two_phase_multiway_merge_sort(R_blk_begin, R_blk_size, 200);
printf("\nSort S\n");
two_phase_multiway_merge_sort(S_blk_begin, S_blk_size, 216);
/*--------------------------------------------------------------------------task 3--------------------------------------------------------------------------*/
printf("\n\n----------------------------------------------------------------\n");
printf("Create indexs for S:\n");
printf("----------------------------------------------------------------\n");
create_index(216, S_blk_size, 250);
printf("\n\n----------------------------------------------------------------\n");
printf("Selection algorithm based on indexs S.C=128:\n");
printf("----------------------------------------------------------------\n");
indexed_search_relation_select(128, 120, 250);
printf("\n\n----------------------------------------------------------------\n");
printf("Sort-Merge-Join select S.C, S.D, R.A, R.B from S inner join R on S.C = R.A:\n");
printf("----------------------------------------------------------------\n");
sort_merge_join(400);
printf("\n\n----------------------------------------------------------------\n");
printf("Sort-Merge-Intersect :\n");
printf("----------------------------------------------------------------\n");
sort_merge_intersect(600);
return 0;
}
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/Hmount/extmem-c-database-query-processing.git
git@gitee.com:Hmount/extmem-c-database-query-processing.git
Hmount
extmem-c-database-query-processing
extmem-c-database-query-processing
master

搜索帮助