diff --git a/include/re/re.h b/include/re/re.h index f876f635a..deab6caed 100644 --- a/include/re/re.h +++ b/include/re/re.h @@ -46,8 +46,6 @@ enum re_errno { RE_EERRNO = 1 | RE_MISC, RE_EBADDIALECT = 2 | RE_MISC, RE_EBADGROUP = 3 | RE_MISC, - RE_EUNSUPCAPTUR = 4 | RE_MISC, - RE_EUNSUPPPCRE = 5 | RE_MISC, RE_ENEGRANGE = 0 | RE_MARK | RE_GROUP, RE_ENEGCOUNT = 1 | RE_MARK | RE_GROUP, diff --git a/src/libre/ast_analysis.c b/src/libre/ast_analysis.c index 24db8b6d7..03d6d79b7 100644 --- a/src/libre/ast_analysis.c +++ b/src/libre/ast_analysis.c @@ -783,7 +783,7 @@ analysis_iter_anchoring(struct anchoring_env *env, struct ast_expr *n) "%s: LITERAL: rejecting non-optional newline match after $ as unsupported\n", __func__); set_flags(n, AST_FLAG_UNSATISFIABLE); - return AST_ANALYSIS_ERROR_UNSUPPORTED_PCRE; + return AST_ANALYSIS_ERROR_UNSUPPORTED; } } break; @@ -1010,8 +1010,7 @@ analysis_iter_anchoring(struct anchoring_env *env, struct ast_expr *n) all_set_past_always_consuming &= child_env.past_always_consuming; any_sat = 1; } - } else if (res == AST_ANALYSIS_ERROR_UNSUPPORTED_CAPTURE - || res == AST_ANALYSIS_ERROR_UNSUPPORTED_PCRE) { + } else if (res == AST_ANALYSIS_ERROR_UNSUPPORTED) { continue; } else { return res; @@ -1201,7 +1200,7 @@ analysis_iter_anchoring(struct anchoring_env *env, struct ast_expr *n) "%s: SUBTRACT: rejecting non-optional newline match after $ as unsupported\n", __func__); set_flags(n, AST_FLAG_UNSATISFIABLE); - return AST_ANALYSIS_ERROR_UNSUPPORTED_PCRE; + return AST_ANALYSIS_ERROR_UNSUPPORTED; } if (res != AST_ANALYSIS_OK) { @@ -1287,9 +1286,9 @@ analysis_iter_reverse_anchoring(struct anchoring_env *env, struct ast_expr *n) if (env->followed_by_consuming) { if (env->followed_by_consuming_newline) { LOG(3 - LOG_ANCHORING, - "%s: END anchor & followed_by_consuming, returning UNSUPPORTED_PCRE\n", + "%s: END anchor & followed_by_consuming, returning UNSUPPORTED\n", __func__); - return AST_ANALYSIS_ERROR_UNSUPPORTED_PCRE; + return AST_ANALYSIS_ERROR_UNSUPPORTED; } else { LOG(3 - LOG_ANCHORING, "%s: END anchor & followed_by_consuming, setting UNSATISFIABLE\n", @@ -1446,9 +1445,8 @@ analysis_iter_reverse_anchoring(struct anchoring_env *env, struct ast_expr *n) any_set_followed_by_consuming_newline |= child_env.followed_by_consuming_newline; any_sat = 1; } - } else if (res == AST_ANALYSIS_ERROR_UNSUPPORTED_CAPTURE - || res == AST_ANALYSIS_ERROR_UNSUPPORTED_PCRE) { - LOG(3 - LOG_ANCHORING, "%s: got res of UNSUPPORTED_*, bubbling up\n", __func__); + } else if (res == AST_ANALYSIS_ERROR_UNSUPPORTED) { + LOG(3 - LOG_ANCHORING, "%s: got res of UNSUPPORTED, bubbling up\n", __func__); return res; } else { return res; @@ -1883,7 +1881,7 @@ analysis_iter_repetition(struct ast_expr *n, struct ast_expr *outermost_repeat_p * * An example input that triggers this is '^(($)|)+$' . */ set_flags(n, AST_FLAG_UNSATISFIABLE); - return AST_ANALYSIS_ERROR_UNSUPPORTED_PCRE; + return AST_ANALYSIS_ERROR_UNSUPPORTED; } } @@ -1994,7 +1992,7 @@ analysis_iter_repetition(struct ast_expr *n, struct ast_expr *outermost_repeat_p && repeat_plus_ancestor->u.repeat.max == AST_COUNT_UNBOUNDED); LOG(3 - LOG_REPETITION_CASES, "%s: not yet implemented, skipping\n", __func__); - /* return AST_ANALYSIS_ERROR_UNSUPPORTED_PCRE; */ + /* return AST_ANALYSIS_ERROR_UNSUPPORTED; */ } res = analysis_iter_repetition(n->u.group.e, outermost_repeat_parent, diff --git a/src/libre/ast_analysis.h b/src/libre/ast_analysis.h index 5390cce57..0628bd127 100644 --- a/src/libre/ast_analysis.h +++ b/src/libre/ast_analysis.h @@ -31,8 +31,7 @@ enum ast_analysis_res { AST_ANALYSIS_ERROR_NULL = -1, AST_ANALYSIS_ERROR_MEMORY = -2, - AST_ANALYSIS_ERROR_UNSUPPORTED_CAPTURE = -3, - AST_ANALYSIS_ERROR_UNSUPPORTED_PCRE = -4 + AST_ANALYSIS_ERROR_UNSUPPORTED = -3 }; enum ast_analysis_res diff --git a/src/libre/dialect/comp.h b/src/libre/dialect/comp.h index 7841f776b..d223ab90e 100644 --- a/src/libre/dialect/comp.h +++ b/src/libre/dialect/comp.h @@ -17,7 +17,7 @@ typedef struct ast * re_dialect_parse_fun(re_getchar_fun *getchar, void *opaque, const struct fsm_options *opt, enum re_flags flags, int overlap, - struct re_err *err); + struct re_err *err, struct re_pos *end); re_dialect_parse_fun parse_re_literal; re_dialect_parse_fun parse_re_glob; diff --git a/src/libre/dialect/glob/parser.c b/src/libre/dialect/glob/parser.c index 0de67cf26..746220d4c 100644 --- a/src/libre/dialect/glob/parser.c +++ b/src/libre/dialect/glob/parser.c @@ -648,7 +648,7 @@ ZL0:; DIALECT_PARSE(re_getchar_fun *f, void *opaque, const struct fsm_options *opt, enum re_flags flags, int overlap, - struct re_err *err) + struct re_err *err, struct re_pos *end) { struct ast *ast; @@ -661,6 +661,7 @@ ZL0:; struct LX_STATE *lx; assert(f != NULL); + assert(end != NULL); ast = ast_new(); @@ -714,6 +715,9 @@ ZL0:; DIALECT_ENTRY(&flags, lex_state, act_state, err, &ast->expr); + /* we output this for reporting errors found through AST analysis */ + mark(end, &lx->end); + lx->free(lx->buf_opaque); if (err->e != RE_ESUCCESS) { @@ -779,6 +783,6 @@ ZL0:; return NULL; } -#line 783 "src/libre/dialect/glob/parser.c" +#line 787 "src/libre/dialect/glob/parser.c" /* END OF FILE */ diff --git a/src/libre/dialect/glob/parser.h b/src/libre/dialect/glob/parser.h index cfcce16b7..c0ce31ed6 100644 --- a/src/libre/dialect/glob/parser.h +++ b/src/libre/dialect/glob/parser.h @@ -28,7 +28,7 @@ extern void p_re__glob(flags, lex_state, act_state, err, t_ast__expr *); /* BEGINNING OF TRAILER */ -#line 1204 "src/libre/parser.act" +#line 1208 "src/libre/parser.act" #line 35 "src/libre/dialect/glob/parser.h" diff --git a/src/libre/dialect/like/parser.c b/src/libre/dialect/like/parser.c index eec930c0f..6543095e3 100644 --- a/src/libre/dialect/like/parser.c +++ b/src/libre/dialect/like/parser.c @@ -648,7 +648,7 @@ ZL0:; DIALECT_PARSE(re_getchar_fun *f, void *opaque, const struct fsm_options *opt, enum re_flags flags, int overlap, - struct re_err *err) + struct re_err *err, struct re_pos *end) { struct ast *ast; @@ -661,6 +661,7 @@ ZL0:; struct LX_STATE *lx; assert(f != NULL); + assert(end != NULL); ast = ast_new(); @@ -714,6 +715,9 @@ ZL0:; DIALECT_ENTRY(&flags, lex_state, act_state, err, &ast->expr); + /* we output this for reporting errors found through AST analysis */ + mark(end, &lx->end); + lx->free(lx->buf_opaque); if (err->e != RE_ESUCCESS) { @@ -779,6 +783,6 @@ ZL0:; return NULL; } -#line 783 "src/libre/dialect/like/parser.c" +#line 787 "src/libre/dialect/like/parser.c" /* END OF FILE */ diff --git a/src/libre/dialect/like/parser.h b/src/libre/dialect/like/parser.h index 4c7949714..8563a04a6 100644 --- a/src/libre/dialect/like/parser.h +++ b/src/libre/dialect/like/parser.h @@ -28,7 +28,7 @@ extern void p_re__like(flags, lex_state, act_state, err, t_ast__expr *); /* BEGINNING OF TRAILER */ -#line 1204 "src/libre/parser.act" +#line 1208 "src/libre/parser.act" #line 35 "src/libre/dialect/like/parser.h" diff --git a/src/libre/dialect/literal/parser.c b/src/libre/dialect/literal/parser.c index 7b25cbc6b..f52793a0c 100644 --- a/src/libre/dialect/literal/parser.c +++ b/src/libre/dialect/literal/parser.c @@ -561,7 +561,7 @@ ZL0:; DIALECT_PARSE(re_getchar_fun *f, void *opaque, const struct fsm_options *opt, enum re_flags flags, int overlap, - struct re_err *err) + struct re_err *err, struct re_pos *end) { struct ast *ast; @@ -574,6 +574,7 @@ ZL0:; struct LX_STATE *lx; assert(f != NULL); + assert(end != NULL); ast = ast_new(); @@ -627,6 +628,9 @@ ZL0:; DIALECT_ENTRY(&flags, lex_state, act_state, err, &ast->expr); + /* we output this for reporting errors found through AST analysis */ + mark(end, &lx->end); + lx->free(lx->buf_opaque); if (err->e != RE_ESUCCESS) { @@ -692,6 +696,6 @@ ZL0:; return NULL; } -#line 696 "src/libre/dialect/literal/parser.c" +#line 700 "src/libre/dialect/literal/parser.c" /* END OF FILE */ diff --git a/src/libre/dialect/literal/parser.h b/src/libre/dialect/literal/parser.h index b1c20e4bb..672d23e8e 100644 --- a/src/libre/dialect/literal/parser.h +++ b/src/libre/dialect/literal/parser.h @@ -28,7 +28,7 @@ extern void p_re__literal(flags, lex_state, act_state, err, t_ast__expr *); /* BEGINNING OF TRAILER */ -#line 1204 "src/libre/parser.act" +#line 1208 "src/libre/parser.act" #line 35 "src/libre/dialect/literal/parser.h" diff --git a/src/libre/dialect/native/parser.c b/src/libre/dialect/native/parser.c index 7aa79896c..05a3ff0e7 100644 --- a/src/libre/dialect/native/parser.c +++ b/src/libre/dialect/native/parser.c @@ -3057,7 +3057,7 @@ ZL0:; DIALECT_PARSE(re_getchar_fun *f, void *opaque, const struct fsm_options *opt, enum re_flags flags, int overlap, - struct re_err *err) + struct re_err *err, struct re_pos *end) { struct ast *ast; @@ -3070,6 +3070,7 @@ ZL0:; struct LX_STATE *lx; assert(f != NULL); + assert(end != NULL); ast = ast_new(); @@ -3123,6 +3124,9 @@ ZL0:; DIALECT_ENTRY(&flags, lex_state, act_state, err, &ast->expr); + /* we output this for reporting errors found through AST analysis */ + mark(end, &lx->end); + lx->free(lx->buf_opaque); if (err->e != RE_ESUCCESS) { @@ -3188,6 +3192,6 @@ ZL0:; return NULL; } -#line 3192 "src/libre/dialect/native/parser.c" +#line 3196 "src/libre/dialect/native/parser.c" /* END OF FILE */ diff --git a/src/libre/dialect/native/parser.h b/src/libre/dialect/native/parser.h index c833c9629..8b1c4d866 100644 --- a/src/libre/dialect/native/parser.h +++ b/src/libre/dialect/native/parser.h @@ -28,7 +28,7 @@ extern void p_re__native(flags, lex_state, act_state, err, t_ast__expr *); /* BEGINNING OF TRAILER */ -#line 1204 "src/libre/parser.act" +#line 1208 "src/libre/parser.act" #line 35 "src/libre/dialect/native/parser.h" diff --git a/src/libre/dialect/pcre/parser.c b/src/libre/dialect/pcre/parser.c index 12987d311..2400a0812 100644 --- a/src/libre/dialect/pcre/parser.c +++ b/src/libre/dialect/pcre/parser.c @@ -4341,7 +4341,7 @@ ZL0:; DIALECT_PARSE(re_getchar_fun *f, void *opaque, const struct fsm_options *opt, enum re_flags flags, int overlap, - struct re_err *err) + struct re_err *err, struct re_pos *end) { struct ast *ast; @@ -4354,6 +4354,7 @@ ZL0:; struct LX_STATE *lx; assert(f != NULL); + assert(end != NULL); ast = ast_new(); @@ -4407,6 +4408,9 @@ ZL0:; DIALECT_ENTRY(&flags, lex_state, act_state, err, &ast->expr); + /* we output this for reporting errors found through AST analysis */ + mark(end, &lx->end); + lx->free(lx->buf_opaque); if (err->e != RE_ESUCCESS) { @@ -4472,6 +4476,6 @@ ZL0:; return NULL; } -#line 4476 "src/libre/dialect/pcre/parser.c" +#line 4480 "src/libre/dialect/pcre/parser.c" /* END OF FILE */ diff --git a/src/libre/dialect/pcre/parser.h b/src/libre/dialect/pcre/parser.h index 17a017297..a0543aebe 100644 --- a/src/libre/dialect/pcre/parser.h +++ b/src/libre/dialect/pcre/parser.h @@ -28,7 +28,7 @@ extern void p_re__pcre(flags, lex_state, act_state, err, t_ast__expr *); /* BEGINNING OF TRAILER */ -#line 1204 "src/libre/parser.act" +#line 1208 "src/libre/parser.act" #line 35 "src/libre/dialect/pcre/parser.h" diff --git a/src/libre/dialect/sql/parser.c b/src/libre/dialect/sql/parser.c index dd60efe03..078b8f67d 100644 --- a/src/libre/dialect/sql/parser.c +++ b/src/libre/dialect/sql/parser.c @@ -2054,7 +2054,7 @@ ZL0:; DIALECT_PARSE(re_getchar_fun *f, void *opaque, const struct fsm_options *opt, enum re_flags flags, int overlap, - struct re_err *err) + struct re_err *err, struct re_pos *end) { struct ast *ast; @@ -2067,6 +2067,7 @@ ZL0:; struct LX_STATE *lx; assert(f != NULL); + assert(end != NULL); ast = ast_new(); @@ -2120,6 +2121,9 @@ ZL0:; DIALECT_ENTRY(&flags, lex_state, act_state, err, &ast->expr); + /* we output this for reporting errors found through AST analysis */ + mark(end, &lx->end); + lx->free(lx->buf_opaque); if (err->e != RE_ESUCCESS) { @@ -2185,6 +2189,6 @@ ZL0:; return NULL; } -#line 2189 "src/libre/dialect/sql/parser.c" +#line 2193 "src/libre/dialect/sql/parser.c" /* END OF FILE */ diff --git a/src/libre/dialect/sql/parser.h b/src/libre/dialect/sql/parser.h index cbbd5b111..e9850510a 100644 --- a/src/libre/dialect/sql/parser.h +++ b/src/libre/dialect/sql/parser.h @@ -28,7 +28,7 @@ extern void p_re__sql(flags, lex_state, act_state, err, t_ast__expr *); /* BEGINNING OF TRAILER */ -#line 1204 "src/libre/parser.act" +#line 1208 "src/libre/parser.act" #line 35 "src/libre/dialect/sql/parser.h" diff --git a/src/libre/parser.act b/src/libre/parser.act index 0156fbf08..d8d0e2ac4 100644 --- a/src/libre/parser.act +++ b/src/libre/parser.act @@ -1070,7 +1070,7 @@ DIALECT_PARSE(re_getchar_fun *f, void *opaque, const struct fsm_options *opt, enum re_flags flags, int overlap, - struct re_err *err) + struct re_err *err, struct re_pos *end) { struct ast *ast; @@ -1083,6 +1083,7 @@ struct LX_STATE *lx; assert(f != NULL); + assert(end != NULL); ast = ast_new(); @@ -1136,6 +1137,9 @@ DIALECT_ENTRY(&flags, lex_state, act_state, err, &ast->expr); + /* we output this for reporting errors found through AST analysis */ + mark(end, &lx->end); + lx->free(lx->buf_opaque); if (err->e != RE_ESUCCESS) { diff --git a/src/libre/re.c b/src/libre/re.c index 499bbf194..2923b7d3d 100644 --- a/src/libre/re.c +++ b/src/libre/re.c @@ -98,6 +98,7 @@ re_parse(enum re_dialect dialect, int (*getc)(void *opaque), void *opaque, const struct dialect *m; struct ast *ast = NULL; enum ast_analysis_res res; + struct re_pos end; assert(getc != NULL); @@ -109,7 +110,7 @@ re_parse(enum re_dialect dialect, int (*getc)(void *opaque), void *opaque, flags |= m->flags; - ast = m->parse(getc, opaque, opt, flags, m->overlap, err); + ast = m->parse(getc, opaque, opt, flags, m->overlap, err, &end); if (ast == NULL) { return NULL; @@ -122,25 +123,8 @@ re_parse(enum re_dialect dialect, int (*getc)(void *opaque), void *opaque, /* Do a complete pass over the AST, filling in other details. */ res = ast_analysis(ast, flags); - if (res < 0) { - ast_free(ast); - if (err != NULL) { - if (res == AST_ANALYSIS_ERROR_UNSUPPORTED_PCRE) { - err->e = RE_EUNSUPPPCRE; - } else if (res == AST_ANALYSIS_ERROR_MEMORY) { - /* This case comes up during fuzzing. */ - if (err->e == RE_ESUCCESS) { - err->e = RE_EERRNO; - errno = ENOMEM; - } - } else if (res == AST_ANALYSIS_ERROR_UNSUPPORTED_CAPTURE) { - err->e = RE_EUNSUPCAPTUR; - } else if (err->e == RE_ESUCCESS) { - err->e = RE_EERRNO; - } - } - return NULL; + goto error; } if (unsatisfiable != NULL) { @@ -148,6 +132,47 @@ re_parse(enum re_dialect dialect, int (*getc)(void *opaque), void *opaque, } return ast; + +error: + + ast_free(ast); + + if (err == NULL) { + return NULL; + } + + switch (res) { + case AST_ANALYSIS_ERROR_MEMORY: + /* This case comes up during fuzzing. */ + if (err->e == RE_ESUCCESS) { + err->e = RE_EERRNO; + errno = ENOMEM; + } + break; + + case AST_ANALYSIS_ERROR_UNSUPPORTED: + err->e = RE_EUNSUPPORTED; + + /* + * We can't tag AST nodes with re_pos, because it's + * also possible to construct an AST from an .fsm file. + * We detect RE_EUNSUPPORTED from annotations (e.g. nullable) + * on arbitary nodes. So at best we could tag an expr. + * But since in general we'd need to fabricate a pos anyway, + * I'm blaming the entire expression here. + */ + err->start.byte = 0; + err->end.byte = end.byte; + break; + + default: + if (err->e == RE_ESUCCESS) { + err->e = RE_EERRNO; + } + break; + } + + return NULL; } struct fsm * diff --git a/src/libre/strerror.c b/src/libre/strerror.c index d66e750a4..009d61df2 100644 --- a/src/libre/strerror.c +++ b/src/libre/strerror.c @@ -20,8 +20,6 @@ re_strerror(enum re_errno e) case RE_EERRNO: return strerror(errno); case RE_EBADDIALECT: return "Bad dialect"; case RE_EBADGROUP: return "Bad group"; - case RE_EUNSUPCAPTUR: return "Cannot support captures in this case"; - case RE_EUNSUPPPCRE: return "Unsupported PCRE edge case"; case RE_ENEGRANGE: return "Negative group range"; case RE_ENEGCOUNT: return "Negative count range"; diff --git a/tests/pcre/out48.err b/tests/pcre/out48.err index b03d96d7e..d96141a09 100644 --- a/tests/pcre/out48.err +++ b/tests/pcre/out48.err @@ -1 +1 @@ -tests/pcre/in48.re: Unsupported PCRE edge case +tests/pcre/in48.re:1-11: Unsupported operator diff --git a/tests/pcre/out49.err b/tests/pcre/out49.err index d22e4cd6d..b92cc933b 100644 --- a/tests/pcre/out49.err +++ b/tests/pcre/out49.err @@ -1 +1 @@ -tests/pcre/in49.re: Unsupported PCRE edge case +tests/pcre/in49.re:1-17: Unsupported operator