Ai
1 Star 0 Fork 0

王晓铎/htslib

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
hts_expr.c 27.83 KB
一键复制 编辑 原始数据 按行查看 历史
Andrew Whitwham 提交于 2024-08-15 22:18 +08:00 . Copyright updates for summer 2024. (#1822)
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927
/* hts_expr.c -- filter expression parsing and processing.
Copyright (C) 2020-2022, 2024 Genome Research Ltd.
Author: James Bonfield <jkb@sanger.ac.uk>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notices and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
// TODO:
// - ?: operator for conditionals?
#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
#include <config.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <stdint.h>
#include <float.h>
#include <regex.h>
#include <math.h>
#include "htslib/hts_expr.h"
#include "htslib/hts_log.h"
#include "textutils_internal.h"
// Could also cache hts_expr_val_t stack here for kstring reuse?
#define MAX_REGEX 10
struct hts_filter_t {
char *str;
int parsed;
int curr_regex, max_regex;
regex_t preg[MAX_REGEX];
};
/*
* This is designed to be mostly C like with mostly same the precedence rules,
* with the exception of bit operators (widely considered as a mistake in C).
* It's not full C (eg no bit-shifting), but good enough for our purposes.
*
* Supported syntax, in order of precedence:
*
* Grouping: (, ), eg "(1+2)*3"
* Values: integers, floats, strings or variables
* Unary ops: +, -, !, ~ eg -10 +10, !10 (0), ~5 (bitwise not)
* Math ops: *, /, % [TODO: add // for floor division?]
* Math ops: +, -
* Bit-wise: &, ^, | [NB as 3 precedence levels, in that order]
* Conditionals: >, >=, <, <=,
* Equality: ==, !=, =~, !~
* Boolean: &&, ||
*/
// Skip to start of term
static char *ws(char *str) {
while (*str && (*str == ' ' || *str == '\t'))
str++;
return str;
}
static int expression(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
char *str, char **end, hts_expr_val_t *res);
/*
* Simple functions operating on strings only.
* length, min, max, avg.
*
* All return 0 on success,
* -1 on failure
*/
static int expr_func_length(hts_expr_val_t *res) {
if (!res->is_str)
return -1;
res->is_str = 0;
res->d = res->s.l;
return 0;
}
static int expr_func_min(hts_expr_val_t *res) {
if (!res->is_str)
return -1;
size_t l = res->s.l;
int v = INT_MAX;
const uint8_t *x = (uint8_t *)res->s.s;
for (l = 0; l < res->s.l; l++)
if (v > x[l])
v = x[l];
res->is_str = 0;
res->d = v == INT_MAX ? NAN : v;
return 0;
}
static int expr_func_max(hts_expr_val_t *res) {
if (!res->is_str)
return -1;
size_t l = res->s.l;
int v = INT_MIN;
const uint8_t *x = (uint8_t *)res->s.s;
for (l = 0; l < res->s.l; l++)
if (v < x[l])
v = x[l];
res->is_str = 0;
res->d = v == INT_MIN ? NAN : v;
return 0;
}
static int expr_func_avg(hts_expr_val_t *res) {
if (!res->is_str)
return -1;
size_t l = res->s.l;
double v = 0;
const uint8_t *x = (uint8_t *)res->s.s;
for (l = 0; l < res->s.l; l++)
v += x[l];
if (l)
v /= l;
res->is_str = 0;
res->d = v;
return 0;
}
/*
* functions: FUNC(expr).
* Note for simplicity of parsing, the "(" must immediately follow FUNC,
* so "FUNC (x)" is invalid.
*/
static int func_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
char *str, char **end, hts_expr_val_t *res) {
int func_ok = -1;
switch (*str) {
case 'a':
if (strncmp(str, "avg(", 4) == 0) {
if (expression(filt, data, fn, str+4, end, res)) return -1;
func_ok = expr_func_avg(res);
}
break;
case 'd':
if (strncmp(str, "default(", 8) == 0) {
if (expression(filt, data, fn, str+8, end, res)) return -1;
if (**end != ',')
return -1;
(*end)++;
hts_expr_val_t val = HTS_EXPR_VAL_INIT;
if (expression(filt, data, fn, ws(*end), end, &val)) return -1;
func_ok = 1;
if (!hts_expr_val_existsT(res)) {
kstring_t swap = res->s;
*res = val;
val.s = swap;
hts_expr_val_free(&val);
}
}
break;
case 'e':
if (strncmp(str, "exists(", 7) == 0) {
if (expression(filt, data, fn, str+7, end, res)) return -1;
func_ok = 1;
res->is_true = res->d = hts_expr_val_existsT(res);
res->is_str = 0;
} else if (strncmp(str, "exp(", 4) == 0) {
if (expression(filt, data, fn, str+4, end, res)) return -1;
func_ok = 1;
res->d = exp(res->d);
res->is_str = 0;
if (isnan(res->d))
hts_expr_val_undef(res);
}
break;
case 'l':
if (strncmp(str, "length(", 7) == 0) {
if (expression(filt, data, fn, str+7, end, res)) return -1;
func_ok = expr_func_length(res);
} else if (strncmp(str, "log(", 4) == 0) {
if (expression(filt, data, fn, str+4, end, res)) return -1;
func_ok = 1;
res->d = log(res->d);
res->is_str = 0;
if (isnan(res->d))
hts_expr_val_undef(res);
}
break;
case 'm':
if (strncmp(str, "min(", 4) == 0) {
if (expression(filt, data, fn, str+4, end, res)) return -1;
func_ok = expr_func_min(res);
} else if (strncmp(str, "max(", 4) == 0) {
if (expression(filt, data, fn, str+4, end, res)) return -1;
func_ok = expr_func_max(res);
}
break;
case 'p':
if (strncmp(str, "pow(", 4) == 0) {
if (expression(filt, data, fn, str+4, end, res)) return -1;
func_ok = 1;
if (**end != ',')
return -1;
(*end)++;
hts_expr_val_t val = HTS_EXPR_VAL_INIT;
if (expression(filt, data, fn, ws(*end), end, &val)) return -1;
if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) {
hts_expr_val_undef(res);
} else if (res->is_str || val.is_str) {
hts_expr_val_free(&val); // arith on strings
return -1;
} else {
func_ok = 1;
res->d = pow(res->d, val.d);
hts_expr_val_free(&val);
res->is_str = 0;
}
if (isnan(res->d))
hts_expr_val_undef(res);
}
break;
case 's':
if (strncmp(str, "sqrt(", 5) == 0) {
if (expression(filt, data, fn, str+5, end, res)) return -1;
func_ok = 1;
res->d = sqrt(res->d);
res->is_str = 0;
if (isnan(res->d))
hts_expr_val_undef(res);
}
break;
}
if (func_ok < 0)
return -1;
str = ws(*end);
if (*str != ')') {
fprintf(stderr, "Missing ')'\n");
return -1;
}
*end = str+1;
return 0;
}
/*
* simple_expr
* : identifier
* | constant
* | string
* | func_expr
* | '(' expression ')'
*/
static int simple_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
char *str, char **end, hts_expr_val_t *res) {
// Main recursion step
str = ws(str);
if (*str == '(') {
if (expression(filt, data, fn, str+1, end, res)) return -1;
str = ws(*end);
if (*str != ')') {
fprintf(stderr, "Missing ')'\n");
return -1;
}
*end = str+1;
return 0;
}
// Otherwise a basic element.
int fail = 0;
double d = hts_str2dbl(str, end, &fail);
if (str != *end) {
res->is_str = 0;
res->d = d;
} else {
// Not valid floating point syntax.
// TODO: add function call names in here; len(), sqrt(), pow(), etc
if (*str == '"') {
res->is_str = 1;
char *e = str+1;
int backslash = 0;
while (*e && *e != '"') {
if (*e == '\\')
backslash=1, e+=1+(e[1]!='\0');
else
e++;
}
kputsn(str+1, e-(str+1), ks_clear(&res->s));
if (backslash) {
size_t i, j;
for (i = j = 0; i < res->s.l; i++) {
res->s.s[j++] = res->s.s[i];
if (res->s.s[i] == '\\') {
switch (res->s.s[++i]) {
case '"': res->s.s[j-1] = '"'; break;
case '\\':res->s.s[j-1] = '\\'; break;
case 't': res->s.s[j-1] = '\t'; break;
case 'n': res->s.s[j-1] = '\n'; break;
case 'r': res->s.s[j-1] = '\r'; break;
default: res->s.s[j++] = res->s.s[i];
}
}
}
res->s.s[j] = 0;
res->s.l = j;
}
if (*e != '"')
return -1;
*end = e+1;
} else if (fn) {
// Try lookup as variable, if not as function
if (fn(data, str, end, res) == 0)
return 0;
else
return func_expr(filt, data, fn, str, end, res);
} else {
return -1;
}
}
return 0;
}
/*
* unary_expr
* : simple_expr
* | '+' simple_expr
* | '-' simple_expr
* | '!' unary_expr // higher precedence
* | '~' unary_expr // higher precedence
*/
static int unary_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
char *str, char **end, hts_expr_val_t *res) {
int err;
str = ws(str);
if (*str == '+' || *str == '-') {
err = simple_expr(filt, data, fn, str+1, end, res);
if (!hts_expr_val_exists(res)) {
hts_expr_val_undef(res);
} else {
err |= res->is_str;
if (*str == '-')
res->d = -res->d;
res->is_true = res->d != 0;
}
} else if (*str == '!') {
err = unary_expr(filt, data, fn, str+1, end, res);
if (res->is_true) {
// Any explicitly true value becomes false
res->d = res->is_true = 0;
} else if (!hts_expr_val_exists(res)) {
// We can also still negate undef values by toggling the
// is_true override value.
res->d = res->is_true = !res->is_true;
} else if (res->is_str) {
// !null = true, !"foo" = false, NOTE: !"" = false also
res->d = res->is_true = (res->s.s == NULL);
} else {
res->d = !(int64_t)res->d;
res->is_true = res->d != 0;
}
res->is_str = 0;
} else if (*str == '~') {
err = unary_expr(filt, data, fn, str+1, end, res);
if (!hts_expr_val_exists(res)) {
hts_expr_val_undef(res);
} else {
err |= res->is_str;
if (!hts_expr_val_exists(res)) {
hts_expr_val_undef(res);
} else {
res->d = ~(int64_t)res->d;
res->is_true = res->d != 0;
}
}
} else {
err = simple_expr(filt, data, fn, str, end, res);
}
return err ? -1 : 0;
}
/*
* mul_expr
* : unary_expr (
* '*' unary_expr
* | '/' unary_expr
* | '%' unary_expr
* )*
*/
static int mul_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
char *str, char **end, hts_expr_val_t *res) {
if (unary_expr(filt, data, fn, str, end, res))
return -1;
str = *end;
hts_expr_val_t val = HTS_EXPR_VAL_INIT;
while (*str) {
str = ws(str);
if (*str == '*' || *str == '/' || *str == '%') {
if (unary_expr(filt, data, fn, str+1, end, &val)) return -1;
if (!hts_expr_val_exists(&val) || !hts_expr_val_exists(res)) {
hts_expr_val_undef(res);
} else if (val.is_str || res->is_str) {
hts_expr_val_free(&val);
return -1; // arith on strings
}
}
if (*str == '*')
res->d *= val.d;
else if (*str == '/')
res->d /= val.d;
else if (*str == '%') {
if (val.d)
res->d = (int64_t)res->d % (int64_t)val.d;
else
hts_expr_val_undef(res);
} else
break;
res->is_true = hts_expr_val_exists(res) && (res->d != 0);
str = *end;
}
hts_expr_val_free(&val);
return 0;
}
/*
* add_expr
* : mul_expr (
* '+' mul_expr
* | '-' mul_expr
* )*
*/
static int add_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
char *str, char **end, hts_expr_val_t *res) {
if (mul_expr(filt, data, fn, str, end, res))
return -1;
str = *end;
hts_expr_val_t val = HTS_EXPR_VAL_INIT;
while (*str) {
str = ws(str);
int undef = 0;
if (*str == '+' || *str == '-') {
if (mul_expr(filt, data, fn, str+1, end, &val)) return -1;
if (!hts_expr_val_exists(&val) || !hts_expr_val_exists(res)) {
undef = 1;
} else if (val.is_str || res->is_str) {
hts_expr_val_free(&val);
return -1; // arith on strings
}
}
if (*str == '+')
res->d += val.d;
else if (*str == '-')
res->d -= val.d;
else
break;
if (undef)
hts_expr_val_undef(res);
else
res->is_true = res->d != 0;
str = *end;
}
hts_expr_val_free(&val);
return 0;
}
/*
* bitand_expr
* : add_expr
* | bitand_expr '&' add_expr
*/
static int bitand_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
char *str, char **end, hts_expr_val_t *res) {
if (add_expr(filt, data, fn, str, end, res)) return -1;
hts_expr_val_t val = HTS_EXPR_VAL_INIT;
int undef = 0;
for (;;) {
str = ws(*end);
if (*str == '&' && str[1] != '&') {
if (add_expr(filt, data, fn, str+1, end, &val)) return -1;
if (!hts_expr_val_exists(&val) || !hts_expr_val_exists(res)) {
undef = 1;
} else if (res->is_str || val.is_str) {
hts_expr_val_free(&val);
return -1;
} else {
res->is_true =
(res->d = ((int64_t)res->d & (int64_t)val.d)) != 0;
}
} else {
break;
}
}
hts_expr_val_free(&val);
if (undef)
hts_expr_val_undef(res);
return 0;
}
/*
* bitxor_expr
* : bitand_expr
* | bitxor_expr '^' bitand_expr
*/
static int bitxor_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
char *str, char **end, hts_expr_val_t *res) {
if (bitand_expr(filt, data, fn, str, end, res)) return -1;
hts_expr_val_t val = HTS_EXPR_VAL_INIT;
int undef = 0;
for (;;) {
str = ws(*end);
if (*str == '^') {
if (bitand_expr(filt, data, fn, str+1, end, &val)) return -1;
if (!hts_expr_val_exists(&val) || !hts_expr_val_exists(res)) {
undef = 1;
} else if (res->is_str || val.is_str) {
hts_expr_val_free(&val);
return -1;
} else {
res->is_true =
(res->d = ((int64_t)res->d ^ (int64_t)val.d)) != 0;
}
} else {
break;
}
}
hts_expr_val_free(&val);
if (undef)
hts_expr_val_undef(res);
return 0;
}
/*
* bitor_expr
* : bitxor_expr
* | bitor_expr '|' bitxor_expr
*/
static int bitor_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
char *str, char **end, hts_expr_val_t *res) {
if (bitxor_expr(filt, data, fn, str, end, res)) return -1;
hts_expr_val_t val = HTS_EXPR_VAL_INIT;
int undef = 0;
for (;;) {
str = ws(*end);
if (*str == '|' && str[1] != '|') {
if (bitxor_expr(filt, data, fn, str+1, end, &val)) return -1;
if (!hts_expr_val_exists(&val) || !hts_expr_val_exists(res)) {
undef = 1;
} else if (res->is_str || val.is_str) {
hts_expr_val_free(&val);
return -1;
} else {
res->is_true =
(res->d = ((int64_t)res->d | (int64_t)val.d)) != 0;
}
} else {
break;
}
}
hts_expr_val_free(&val);
if (undef)
hts_expr_val_undef(res);
return 0;
}
/*
* cmp_expr
* : bitor_expr
* | cmp_expr '<=' bitor_expr
* | cmp_expr '<' bitor_expr
* | cmp_expr '>=' bitor_expr
* | cmp_expr '>' bitor_expr
*/
static int cmp_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
char *str, char **end, hts_expr_val_t *res) {
if (bitor_expr(filt, data, fn, str, end, res)) return -1;
str = ws(*end);
hts_expr_val_t val = HTS_EXPR_VAL_INIT;
int err = 0, cmp_done = 0;
if (*str == '>' && str[1] == '=') {
cmp_done = 1;
err = cmp_expr(filt, data, fn, str+2, end, &val);
if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) {
hts_expr_val_undef(res);
} else {
res->is_true=res->d
= res->is_str && res->s.s && val.is_str && val.s.s
? strcmp(res->s.s, val.s.s) >= 0
: !res->is_str && !val.is_str && res->d >= val.d;
res->is_str = 0;
}
} else if (*str == '>') {
cmp_done = 1;
err = cmp_expr(filt, data, fn, str+1, end, &val);
if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) {
hts_expr_val_undef(res);
} else {
res->is_true=res->d
= res->is_str && res->s.s && val.is_str && val.s.s
? strcmp(res->s.s, val.s.s) > 0
: !res->is_str && !val.is_str && res->d > val.d;
res->is_str = 0;
}
} else if (*str == '<' && str[1] == '=') {
cmp_done = 1;
err = cmp_expr(filt, data, fn, str+2, end, &val);
if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) {
hts_expr_val_undef(res);
} else {
res->is_true=res->d
= res->is_str && res->s.s && val.is_str && val.s.s
? strcmp(res->s.s, val.s.s) <= 0
: !res->is_str && !val.is_str && res->d <= val.d;
res->is_str = 0;
}
} else if (*str == '<') {
cmp_done = 1;
err = cmp_expr(filt, data, fn, str+1, end, &val);
if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) {
hts_expr_val_undef(res);
} else {
res->is_true=res->d
= res->is_str && res->s.s && val.is_str && val.s.s
? strcmp(res->s.s, val.s.s) < 0
: !res->is_str && !val.is_str && res->d < val.d;
res->is_str = 0;
}
}
if (cmp_done && (!hts_expr_val_exists(&val) || !hts_expr_val_exists(res)))
hts_expr_val_undef(res);
hts_expr_val_free(&val);
return err ? -1 : 0;
}
/*
* eq_expr
* : cmp_expr
* | eq_expr '==' cmp_expr
* | eq_expr '!=' cmp_expr
* | eq_expr '=~' cmp_expr
* | eq_expr '!~' cmp_expr
*/
static int eq_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
char *str, char **end, hts_expr_val_t *res) {
if (cmp_expr(filt, data, fn, str, end, res)) return -1;
str = ws(*end);
int err = 0, eq_done = 0;
hts_expr_val_t val = HTS_EXPR_VAL_INIT;
// numeric vs numeric comparison is as expected
// string vs string comparison is as expected
// numeric vs string is false
if (str[0] == '=' && str[1] == '=') {
eq_done = 1;
if ((err = eq_expr(filt, data, fn, str+2, end, &val))) {
res->is_true = res->d = 0;
} else {
if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) {
hts_expr_val_undef(res);
} else {
res->is_true = res->d = res->is_str
? (res->s.s && val.s.s ?strcmp(res->s.s, val.s.s)==0 :0)
: !res->is_str && !val.is_str && res->d == val.d;
}
}
res->is_str = 0;
} else if (str[0] == '!' && str[1] == '=') {
eq_done = 1;
if ((err = eq_expr(filt, data, fn, str+2, end, &val))) {
res->is_true = res->d = 0;
} else {
if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) {
hts_expr_val_undef(res);
} else {
res->is_true = res->d = res->is_str
? (res->s.s && val.s.s ?strcmp(res->s.s, val.s.s) != 0 :1)
: res->is_str != val.is_str || res->d != val.d;
}
}
res->is_str = 0;
} else if ((str[0] == '=' && str[1] == '~') ||
(str[0] == '!' && str[1] == '~')) {
eq_done = 1;
err = eq_expr(filt, data, fn, str+2, end, &val);
if (!val.is_str || !res->is_str) {
hts_expr_val_free(&val);
return -1;
}
if (val.s.s && res->s.s && val.is_true >= 0 && res->is_true >= 0) {
regex_t preg_, *preg;
if (filt->curr_regex >= filt->max_regex) {
// Compile regex if not seen before
if (filt->curr_regex >= MAX_REGEX) {
preg = &preg_;
} else {
preg = &filt->preg[filt->curr_regex];
filt->max_regex++;
}
int ec = regcomp(preg, val.s.s, REG_EXTENDED | REG_NOSUB);
if (ec != 0) {
char errbuf[1024];
regerror(ec, preg, errbuf, 1024);
fprintf(stderr, "Failed regex: %.1024s\n", errbuf);
hts_expr_val_free(&val);
return -1;
}
} else {
preg = &filt->preg[filt->curr_regex];
}
res->is_true = res->d = regexec(preg, res->s.s, 0, NULL, 0) == 0
? *str == '=' // matcn
: *str == '!'; // no-match
if (preg == &preg_)
regfree(preg);
filt->curr_regex++;
} else {
// nul regexp or input is considered false
res->is_true = 0;
}
res->is_str = 0;
}
if (eq_done && ((!hts_expr_val_exists(&val)) || !hts_expr_val_exists(res)))
hts_expr_val_undef(res);
hts_expr_val_free(&val);
return err ? -1 : 0;
}
/*
* and_expr
* : eq_expr
* | and_expr 'and' eq_expr
* | and_expr 'or' eq_expr
*/
static int and_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
char *str, char **end, hts_expr_val_t *res) {
if (eq_expr(filt, data, fn, str, end, res)) return -1;
for (;;) {
hts_expr_val_t val = HTS_EXPR_VAL_INIT;
str = ws(*end);
if (str[0] == '&' && str[1] == '&') {
if (eq_expr(filt, data, fn, str+2, end, &val)) return -1;
if (!hts_expr_val_existsT(res) || !hts_expr_val_existsT(&val)) {
hts_expr_val_undef(res);
res->d = 0;
} else {
res->is_true = res->d =
(res->is_true || (res->is_str && res->s.s) || res->d) &&
(val.is_true || (val.is_str && val.s.s) || val.d);
res->is_str = 0;
}
} else if (str[0] == '|' && str[1] == '|') {
if (eq_expr(filt, data, fn, str+2, end, &val)) return -1;
if (!hts_expr_val_existsT(res) && !hts_expr_val_existsT(&val)) {
// neither defined
hts_expr_val_undef(res);
res->d = 0;
} else if (!hts_expr_val_existsT(res) &&
!(val.is_true || (val.is_str && val.s.s ) || val.d)) {
// LHS undef and RHS false
hts_expr_val_undef(res);
res->d = 0;
} else if (!hts_expr_val_existsT(&val) &&
!(res->is_true || (res->is_str && res->s.s) || res->d)){
// RHS undef and LHS false
hts_expr_val_undef(res);
res->d = 0;
} else {
res->is_true = res->d =
res->is_true || (res->is_str && res->s.s) || res->d ||
val.is_true || (val.is_str && val.s.s ) || val.d;
res->is_str = 0;
}
} else {
break;
}
hts_expr_val_free(&val);
}
return 0;
}
static int expression(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
char *str, char **end, hts_expr_val_t *res) {
return and_expr(filt, data, fn, str, end, res);
}
hts_filter_t *hts_filter_init(const char *str) {
hts_filter_t *f = calloc(1, sizeof(*f));
if (!f) return NULL;
// Oversize to permit faster comparisons with memcmp over strcmp
size_t len = strlen(str)+100;
if (!(f->str = malloc(len))) {
free(f);
return NULL;
}
strcpy(f->str, str);
return f;
}
void hts_filter_free(hts_filter_t *filt) {
if (!filt)
return;
int i;
for (i = 0; i < filt->max_regex; i++)
regfree(&filt->preg[i]);
free(filt->str);
free(filt);
}
static int hts_filter_eval_(hts_filter_t *filt,
void *data, hts_expr_sym_func *fn,
hts_expr_val_t *res) {
char *end = NULL;
filt->curr_regex = 0;
if (expression(filt, data, fn, filt->str, &end, res))
return -1;
if (end && *ws(end)) {
fprintf(stderr, "Unable to parse expression at %s\n", filt->str);
return -1;
}
// Strings evaluate to true. An empty string is also true, but an
// absent (null) string is false, unless overriden by is_true. An
// empty string has kstring length of zero, but a pointer as it's
// nul-terminated.
if (res->is_str) {
res->is_true |= res->s.s != NULL;
res->d = res->is_true;
} else if (hts_expr_val_exists(res)) {
res->is_true |= res->d != 0;
}
return 0;
}
int hts_filter_eval(hts_filter_t *filt,
void *data, hts_expr_sym_func *fn,
hts_expr_val_t *res) {
if (res->s.l != 0 || res->s.m != 0 || res->s.s != NULL) {
// As *res is cleared below, it's not safe to call this function
// with res->s.s set, as memory would be leaked. It's also not
// possible to know is res was initialised correctly, so in
// either case we fail.
hts_log_error("Results structure must be cleared before calling this function");
return -1;
}
memset(res, 0, sizeof(*res));
return hts_filter_eval_(filt, data, fn, res);
}
int hts_filter_eval2(hts_filter_t *filt,
void *data, hts_expr_sym_func *fn,
hts_expr_val_t *res) {
ks_free(&res->s);
memset(res, 0, sizeof(*res));
return hts_filter_eval_(filt, data, fn, res);
}
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/xdds/htslib.git
git@gitee.com:xdds/htslib.git
xdds
htslib
htslib
master

搜索帮助