47 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
54 ptrdiff_t
len = end - s;
70 c = *a; *a = *b; *b = c;
105 if (m == 0)
return 0;
118 if (bs[i] != 0)
return 0;
125 onig_is_prelude(
void)
155 buf->
alloc = (
unsigned int)size;
161 #ifdef USE_SUBEXP_CALL
190 size = uslist->
alloc * 2;
212 #ifdef USE_COMBINATION_EXPLOSION_CHECK
214 add_state_check_num(
regex_t* reg,
int num)
315 #define IS_NEED_STR_LEN_OP_EXACT(op) \
316 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
317 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
370 if (empty_info != 0) {
381 if (empty_info != 0) {
395 #ifdef USE_SUBEXP_CALL
416 for (i = 0; i < n; i++) {
425 regex_t* reg ARG_UNUSED,
int ignore_case)
436 len += mb_len * str_len;
465 int rlen, r,
len, prev_len, slen, ambig;
471 if (sn->
end <= sn->
s)
482 for (; p < sn->
end; ) {
484 if (len == prev_len) {
504 if (sn->
end <= sn->
s)
513 int r,
len, prev_len, slen, ambig;
519 if (sn->
end <= sn->
s)
526 prev_len =
enclen(enc, p, end);
531 len =
enclen(enc, p, end);
532 if (len == prev_len) {
552 if (sn->
end <= sn->
s)
561 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
601 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
657 #define REPEAT_RANGE_ALLOC 4
731 #define QUANTIFIER_EXPAND_LIMIT_SIZE 50
732 #define CKN_ON (ckn > 0)
734 #ifdef USE_COMBINATION_EXPLOSION_CHECK
739 int len, mod_tlen, cklen;
745 if (tlen < 0)
return tlen;
753 if (qn->
greedy && infinite) {
766 if (infinite && qn->
lower <= 1) {
784 else if (qn->
upper == 0) {
791 if (qn->
lower == 0) {
793 len = SIZE_OP_STATE_CHECK_PUSH + tlen;
810 len += SIZE_OP_STATE_CHECK;
825 if (tlen < 0)
return tlen;
839 r = add_state_check_num(reg, ckn);
858 r = add_state_check_num(reg, ckn);
869 if (infinite && qn->
lower <= 1) {
871 if (qn->
lower == 1) {
880 r = add_state_check_num(reg, ckn);
895 if (qn->
lower == 0) {
904 r = add_state_check_num(reg, ckn);
907 -(mod_tlen + (
int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
913 else if (qn->
upper == 0) {
923 if (qn->
lower == 0) {
927 r = add_state_check_num(reg, ckn);
943 r = add_state_check_num(reg, ckn);
962 r = add_state_check_num(reg, ckn);
978 if (tlen < 0)
return tlen;
982 if (qn->
greedy && infinite) {
1001 len = tlen * qn->
lower;
1018 else if (!infinite && qn->
greedy &&
1021 len = tlen * qn->
lower;
1043 if (tlen < 0)
return tlen;
1064 if (empty_info != 0)
1133 else if (!infinite && qn->
greedy &&
1141 for (i = 0; i < n; i++) {
1173 if (tlen < 0)
return tlen;
1220 if (tlen < 0)
return tlen;
1225 switch (node->
type) {
1227 #ifdef USE_SUBEXP_CALL
1255 if (tlen < 0)
return tlen;
1257 len = tlen * qn->
lower
1283 switch (node->
type) {
1285 #ifdef USE_SUBEXP_CALL
1315 #ifdef USE_SUBEXP_CALL
1348 if (len < 0)
return len;
1384 if (tlen < 0)
return tlen;
1387 switch (node->
type) {
1414 switch (node->
type) {
1424 #ifdef USE_WORD_BEGIN_END
1439 if (len < 0)
return len;
1504 if (r < 0)
return r;
1543 #ifdef USE_BACKREF_WITH_LEVEL
1560 #ifdef USE_SUBEXP_CALL
1643 switch (
NCTYPE(node)->ctype) {
1667 #ifdef USE_BACKREF_WITH_LEVEL
1676 goto add_bacref_mems;
1711 #ifdef USE_BACKREF_WITH_LEVEL
1717 for (i = br->
back_num - 1; i >= 0; i--) {
1725 #ifdef USE_SUBEXP_CALL
1727 r = compile_call(
NCALL(node), reg);
1745 fprintf(stderr,
"compile_tree: undefined node type %d\n",
NTYPE(node));
1753 #ifdef USE_NAMED_GROUP
1759 Node* node = *plink;
1761 switch (
NTYPE(node)) {
1765 r = noname_disable_map(&(
NCAR(node)), map, counter);
1771 Node** ptarget = &(
NQTFR(node)->target);
1772 Node* old = *ptarget;
1773 r = noname_disable_map(ptarget, map, counter);
1788 r = noname_disable_map(&(en->
target), map, counter);
1794 r = noname_disable_map(plink, map, counter);
1798 r = noname_disable_map(&(en->
target), map, counter);
1810 r = noname_disable_map(&(an->
target), map, counter);
1826 int i, pos, n, old_num;
1839 for (i = 0, pos = 0; i < old_num; i++) {
1856 switch (
NTYPE(node)) {
1860 r = renumber_by_map(
NCAR(node), map);
1864 r = renumber_by_map(
NQTFR(node)->target, map);
1867 r = renumber_by_map(
NENCLOSE(node)->target, map);
1871 r = renumber_node_backref(node, map);
1882 r = renumber_by_map(an->
target, map);
1896 numbered_ref_check(
Node* node)
1900 switch (
NTYPE(node)) {
1904 r = numbered_ref_check(
NCAR(node));
1908 r = numbered_ref_check(
NQTFR(node)->target);
1911 r = numbered_ref_check(
NENCLOSE(node)->target);
1929 int r,
i, pos, counter;
1935 for (i = 1; i <= env->
num_mem; i++) {
1939 r = noname_disable_map(root, map, &counter);
1940 if (r != 0)
return r;
1942 r = renumber_by_map(*root, map);
1943 if (r != 0)
return r;
1945 for (i = 1, pos = 1; i <= env->
num_mem; i++) {
1946 if (map[i].new_val > 0) {
1967 #ifdef USE_SUBEXP_CALL
1975 for (i = 0; i < uslist->
num; i++) {
1987 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
1989 quantifiers_memory_node_info(
Node* node)
1993 switch (
NTYPE(node)) {
1999 v = quantifiers_memory_node_info(
NCAR(node));
2005 #ifdef USE_SUBEXP_CALL
2011 r = quantifiers_memory_node_info(
NCALL(node)->target);
2018 if (qn->
upper != 0) {
2019 r = quantifiers_memory_node_info(qn->
target);
2034 r = quantifiers_memory_node_info(en->
target);
2063 switch (
NTYPE(node)) {
2076 for (i = 1; i < br->
back_num; i++) {
2080 if (*min > tmin) *min = tmin;
2085 #ifdef USE_SUBEXP_CALL
2100 if (r == 0) *min += tmin;
2112 if (y == node) *min = tmin;
2113 else if (*min > tmin) *min = tmin;
2121 *min = sn->
end - sn->
s;
2138 if (qn->
lower > 0) {
2151 #ifdef USE_SUBEXP_CALL
2186 switch (
NTYPE(node)) {
2198 if (r == 0 && *max < tmax) *max = tmax;
2205 *max = sn->
end - sn->
s;
2229 for (i = 0; i < br->
back_num; i++) {
2233 if (*max < tmax) *max = tmax;
2238 #ifdef USE_SUBEXP_CALL
2251 if (qn->
upper != 0) {
2253 if (r == 0 && *max != 0) {
2268 #ifdef USE_SUBEXP_CALL
2296 #define GET_CHAR_LEN_VARLEN -1
2297 #define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2308 switch (
NTYPE(node)) {
2347 while (s < sn->
end) {
2367 #ifdef USE_SUBEXP_CALL
2390 #ifdef USE_SUBEXP_CALL
2457 tmp = x; x = y; y = tmp;
2477 switch (
NCTYPE(y)->ctype) {
2479 if (
NCTYPE(y)->not == 0) {
2553 switch (
NCTYPE(y)->ctype) {
2558 return !(
NCTYPE(y)->not);
2586 for (i = 0, p = ys->
s, q = xs->
s; (
OnigDistance)i < len; i++, p++, q++) {
2587 if (*p != *q)
return 1;
2611 switch (
NTYPE(node)) {
2615 #ifdef USE_SUBEXP_CALL
2635 if (sn->
end <= sn->
s)
2650 if (qn->
lower > 0) {
2719 if ((en->
type & enclose_mask) == 0)
2728 if ((type & anchor_mask) == 0)
2733 type_mask, enclose_mask, anchor_mask);
2742 #ifdef USE_SUBEXP_CALL
2744 #define RECURSION_EXIST 1
2745 #define RECURSION_INFINITE 2
2748 subexp_inf_recursive_check(
Node* node,
ScanEnv* env,
int head)
2763 ret = subexp_inf_recursive_check(
NCAR(x), env, head);
2764 if (ret < 0 || ret == RECURSION_INFINITE)
return ret;
2768 if (ret != 0)
return ret;
2769 if (min != 0) head = 0;
2778 r = RECURSION_EXIST;
2780 ret = subexp_inf_recursive_check(
NCAR(node), env, head);
2781 if (ret < 0 || ret == RECURSION_INFINITE)
return ret;
2788 r = subexp_inf_recursive_check(
NQTFR(node)->target, env, head);
2789 if (r == RECURSION_EXIST) {
2790 if (
NQTFR(node)->lower == 0) r = 0;
2802 r = subexp_inf_recursive_check(an->
target, env, head);
2809 r = subexp_inf_recursive_check(
NCALL(node)->target, env, head);
2816 return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2819 r = subexp_inf_recursive_check(
NENCLOSE(node)->target, env, head);
2832 subexp_inf_recursive_check_trav(
Node* node,
ScanEnv* env)
2842 r = subexp_inf_recursive_check_trav(
NCAR(node), env);
2847 r = subexp_inf_recursive_check_trav(
NQTFR(node)->target, env);
2858 r = subexp_inf_recursive_check_trav(an->
target, env);
2870 r = subexp_inf_recursive_check(en->
target, env, 1);
2874 r = subexp_inf_recursive_check_trav(en->
target, env);
2887 subexp_recursive_check(
Node* node)
2891 switch (
NTYPE(node)) {
2895 r |= subexp_recursive_check(
NCAR(node));
2900 r = subexp_recursive_check(
NQTFR(node)->target);
2911 r = subexp_recursive_check(an->
target);
2918 r = subexp_recursive_check(
NCALL(node)->target);
2929 r = subexp_recursive_check(
NENCLOSE(node)->target);
2943 subexp_recursive_check_trav(
Node* node,
ScanEnv* env)
2945 #define FOUND_CALLED_NODE 1
2957 ret = subexp_recursive_check_trav(
NCAR(node), env);
2958 if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
2959 else if (ret < 0)
return ret;
2965 r = subexp_recursive_check_trav(
NQTFR(node)->target, env);
2966 if (
NQTFR(node)->upper == 0) {
2967 if (r == FOUND_CALLED_NODE)
2968 NQTFR(node)->is_refered = 1;
2980 r = subexp_recursive_check_trav(an->
target, env);
2993 r = subexp_recursive_check(en->
target);
2998 r = subexp_recursive_check_trav(en->
target, env);
3000 r |= FOUND_CALLED_NODE;
3021 r = setup_subexp_call(
NCAR(node), env);
3027 r = setup_subexp_call(
NCAR(node), env);
3032 r = setup_subexp_call(
NQTFR(node)->target, env);
3035 r = setup_subexp_call(
NENCLOSE(node)->target, env);
3046 #ifdef USE_NAMED_GROUP
3059 #ifdef USE_NAMED_GROUP
3072 #ifdef USE_NAMED_GROUP
3106 r = setup_subexp_call(an->
target, env);
3127 Node *head, *np, *insert_node;
3129 int anc_type = an->
type;
3142 NCAR(np) = insert_node;
3185 #ifdef USE_QTFR_PEEK_NEXT
3193 if (qn->
lower <= 1) {
3227 UChar *sbuf, *ebuf, *sp;
3233 sbuf_size = (end - sn->
s) * 2;
3236 ebuf = sbuf + sbuf_size;
3243 for (i = 0; i <
len; i++) {
3247 sp = sbuf + sbuf_size;
3249 ebuf = sbuf + sbuf_size;
3293 int r,
i, j,
len, varlen;
3294 Node *anode, *var_anode, *snode, *xnode, *an;
3300 for (i = 0; i < item_num; i++) {
3301 if (items[i].byte_len != slen) {
3312 if (
IS_NULL(xnode))
goto mem_err;
3313 NCAR(var_anode) = xnode;
3316 if (
IS_NULL(anode))
goto mem_err;
3317 NCAR(xnode) = anode;
3325 if (
IS_NULL(snode))
goto mem_err;
3327 NCAR(anode) = snode;
3329 for (i = 0; i < item_num; i++) {
3331 if (
IS_NULL(snode))
goto mem_err;
3333 for (j = 0; j < items[
i].
code_len; j++) {
3341 if (r != 0)
goto mem_err2;
3349 if (items[i].byte_len != slen) {
3379 NCDR(var_anode) = an;
3403 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3405 int r, n,
len, alt_num;
3407 Node *top_root, *root, *snode, *prev_node;
3415 if (start >= end)
return 0;
3418 top_root = root = prev_node = snode =
NULL_NODE;
3442 if (
IS_NULL(snode))
goto mem_err;
3452 if (r != 0)
goto err;
3467 if (r < 0)
goto mem_err;
3470 top_root = prev_node;
3479 root =
NCAR(prev_node);
3500 if (r != 0)
goto mem_err;
3523 top_root = (
IS_NOT_NULL(top_root) ? top_root : prev_node);
3537 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3539 #define CEC_THRES_NUM_BIG_REPEAT 512
3540 #define CEC_INFINITE_NUM 0x7fffffff
3542 #define CEC_IN_INFINITE_REPEAT (1<<0)
3543 #define CEC_IN_FINITE_REPEAT (1<<1)
3544 #define CEC_CONT_BIG_REPEAT (1<<2)
3558 r = setup_comb_exp_check(
NCAR(node), r, env);
3568 ret = setup_comb_exp_check(
NCAR(node), state, env);
3576 int child_state =
state;
3583 if (qn->
upper > 1) {
3585 child_state |= CEC_IN_FINITE_REPEAT;
3598 child_state =
state;
3607 if (state & CEC_IN_FINITE_REPEAT) {
3608 qn->comb_exp_check_num = -1;
3612 var_num = CEC_INFINITE_NUM;
3613 child_state |= CEC_IN_INFINITE_REPEAT;
3619 if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3620 add_state |= CEC_CONT_BIG_REPEAT;
3622 if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3623 ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3624 var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3625 if (qn->comb_exp_check_num == 0) {
3626 env->num_comb_exp_check++;
3627 qn->comb_exp_check_num = env->num_comb_exp_check;
3628 if (env->curr_max_regnum > env->comb_exp_max_regnum)
3629 env->comb_exp_max_regnum = env->curr_max_regnum;
3634 r = setup_comb_exp_check(target, child_state, env);
3646 if (env->curr_max_regnum < en->
regnum)
3647 env->curr_max_regnum = en->
regnum;
3649 r = setup_comb_exp_check(en->
target, state, env);
3654 r = setup_comb_exp_check(en->
target, state, env);
3660 #ifdef USE_SUBEXP_CALL
3663 env->has_recursion = 1;
3665 r = setup_comb_exp_check(
NCALL(node)->target, state, env);
3677 #define IN_ALT (1<<0)
3678 #define IN_NOT (1<<1)
3679 #define IN_REPEAT (1<<2)
3680 #define IN_VAR_REPEAT (1<<3)
3731 #ifdef USE_SUBEXP_CALL
3743 for (i = 0; i < br->
back_num; i++) {
3747 #ifdef USE_BACKREF_WITH_LEVEL
3772 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3773 r = quantifiers_memory_node_info(target);
3781 if (r == 0 && d == 0) {
3800 #define EXPAND_STRING_MAX_LENGTH 100
3810 for (i = 0; i < n; i++) {
3820 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
3891 #define ALLOWED_TYPE_IN_LB \
3892 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
3893 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
3895 #define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY )
3896 #define ALLOWED_ENCLOSE_IN_LB_NOT 0
3898 #define ALLOWED_ANCHOR_IN_LB \
3899 ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
3900 #define ALLOWED_ANCHOR_IN_LB_NOT \
3901 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
3907 if (r < 0)
return r;
3910 if (r != 0)
return r;
3920 if (r < 0)
return r;
3923 if (r != 0)
return r;
3942 UChar skip[],
int** int_skip)
3950 for (i = 0; i < len - 1; i++)
3951 skip[s[i]] = len - 1 - i;
3960 for (i = 0; i < len - 1; i++)
3961 (*int_skip)[s[
i]] = (int)(len - 1 - i);
3966 #define OPT_EXACT_MAXLEN 24
4019 static const short int ByteValTable[] = {
4020 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
4021 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4022 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
4023 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
4024 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4025 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
4026 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4027 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
4030 if (i < (
int )(
sizeof(ByteValTable)/
sizeof(ByteValTable[0]))) {
4034 return (
int )ByteValTable[
i];
4044 static const short int dist_vals[] = {
4045 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4046 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4047 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4048 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4049 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4050 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4051 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4052 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4053 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4054 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4062 if (d <
sizeof(dist_vals)/
sizeof(dist_vals[0]))
4064 return (
int )dist_vals[d];
4072 if (v2 <= 0)
return -1;
4073 if (v1 <= 0)
return 1;
4078 if (v2 > v1)
return 1;
4079 if (v2 < v1)
return -1;
4081 if (d2->
min < d1->
min)
return 1;
4082 if (d2->
min > d1->
min)
return -1;
4162 if (left_len == 0) {
4167 if (right_len == 0) {
4254 for (i = to->
len; p < end; ) {
4255 len =
enclen(enc, p, end);
4257 for (j = 0; j < len && p < end; j++)
4277 len =
enclen(enc, p, end);
4279 for (j = 0; j < len && p < end; j++)
4291 if (add->
len == 0 || to->
len == 0) {
4301 for (i = 0; i < to->
len && i < add->
len; ) {
4302 if (to->
s[i] != add->
s[i])
break;
4305 for (j = 1; j <
len; j++) {
4306 if (to->
s[i+j] != add->
s[i+j])
break;
4312 if (! add->
reach_end || i < add->len || i < to->len) {
4337 else if (v1 <= 2 && v2 <= 2) {
4342 if (now->
len > 1) v1 += 5;
4343 if (alt->
len > 1) v2 += 5;
4359 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4360 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4361 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4362 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4363 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4364 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4365 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4366 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4367 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4368 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4369 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4370 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4371 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4372 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4373 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4374 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4390 if (map->
map[c] == 0) {
4408 if (n < 0)
return n;
4410 for (i = 0; i < n; i++) {
4421 const int z = 1<<15;
4426 if (now->
value == 0) {
4431 v1 = z / now->
value;
4432 v2 = z / alt->
value;
4440 #define COMP_EM_BASE 20
4443 if (m->
value <= 0)
return -1;
4505 int exb_reach, exm_reach;
4533 else if (exm_reach) {
4574 #define MAX_NODE_OPT_INFO_REF_COUNT 5
4696 switch (
NCTYPE(node)->ctype) {
4698 if (
NCTYPE(node)->not != 0) {
4731 switch (
NANCHOR(node)->type) {
4749 else if (nopt.
exm.
len > 0)
4784 for (i = 1; i < br->
back_num; i++) {
4789 if (min > tmin) min = tmin;
4790 if (max < tmax) max = tmax;
4796 #ifdef USE_SUBEXP_CALL
4829 if (qn->
lower > 0) {
4833 for (i = 2; i <= qn->
lower &&
4837 if (i < qn->lower) {
4878 #ifdef USE_SUBEXP_CALL
4910 if (!onig_is_prelude()) fprintf(stderr,
"optimize_node_left: undefined node type %d\n",
4925 if (e->
len == 0)
return 0;
4944 if (e->
len >= 3 || (e->
len >= 2 && allow_reverse)) {
4949 reg->
optimize = (allow_reverse != 0
4992 static void print_optimize_info(
FILE* f,
regex_t* reg);
5044 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5045 if (!onig_is_prelude()) print_optimize_info(stderr, reg);
5071 fprintf(fp,
"\nPATTERN: /");
5081 fprintf(fp,
" 0x%04x ", (
int )code);
5084 fputc((
int )code, fp);
5087 p +=
enclen(enc, p, end);
5092 fputc((
int )*s, fp);
5097 fprintf(fp,
"/ (%s)\n", enc->
name);
5117 print_anchor(
FILE* f,
int anchor)
5124 fprintf(f,
"begin-buf");
5128 if (q) fprintf(f,
", ");
5130 fprintf(f,
"begin-line");
5133 if (q) fprintf(f,
", ");
5135 fprintf(f,
"begin-pos");
5138 if (q) fprintf(f,
", ");
5140 fprintf(f,
"end-buf");
5143 if (q) fprintf(f,
", ");
5145 fprintf(f,
"semi-end-buf");
5148 if (q) fprintf(f,
", ");
5150 fprintf(f,
"end-line");
5153 if (q) fprintf(f,
", ");
5155 fprintf(f,
"anychar-star");
5158 if (q) fprintf(f,
", ");
5159 fprintf(f,
"anychar-star-pl");
5168 static const char* on[] = {
"NONE",
"EXACT",
"EXACT_BM",
"EXACT_BM_NOT_REV",
5169 "EXACT_IC",
"MAP" };
5171 fprintf(f,
"optimize: %s\n", on[reg->
optimize]);
5172 fprintf(f,
" anchor: "); print_anchor(f, reg->
anchor);
5178 fprintf(f,
" sub anchor: "); print_anchor(f, reg->
sub_anchor);
5185 fprintf(f,
"exact: [");
5186 for (p = reg->
exact; p < reg->exact_end; p++) {
5195 if (reg->
map[i]) n++;
5197 fprintf(f,
"map: n=%d\n", n);
5202 if (reg->
map[i] != 0) {
5203 if (c > 0) fputs(
", ", f);
5209 fprintf(f,
"%d", i);
5230 #ifdef USE_NAMED_GROUP
5248 size_t size =
sizeof(
regex_t);
5259 #define REGEX_TRANSFER(to,from) do {\
5260 (to)->state = ONIG_STATE_MODIFY;\
5261 onig_free_body(to);\
5262 xmemcpy(to, from, sizeof(regex_t));\
5274 #define REGEX_CHAIN_HEAD(reg) do {\
5275 while (IS_NOT_NULL((reg)->chain)) {\
5276 (reg) = (reg)->chain;\
5308 static void print_compiled_byte_code_list
P_((
FILE* f,
regex_t* reg));
5310 #ifdef ONIG_DEBUG_PARSE_TREE
5311 static void print_tree
P_((
FILE* f,
Node* node));
5316 OnigErrorInfo* einfo,
const char *sourcefile,
int sourceline)
5318 #define COMPILE_INIT_SIZE 20
5324 #ifdef USE_SUBEXP_CALL
5335 if (!onig_is_prelude()) print_enc_string(stderr, reg->
enc, pattern, pattern_end);
5338 if (reg->
alloc == 0) {
5339 init_size = (pattern_end - pattern) * 2;
5342 if (r != 0)
goto end;
5352 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5357 if (r != 0)
goto err;
5359 #ifdef ONIG_DEBUG_PARSE_TREE
5361 fprintf(stderr,
"ORIGINAL PARSE TREE:\n");
5362 if (!onig_is_prelude()) {
5363 print_tree(stderr, root);
5368 #ifdef USE_NAMED_GROUP
5374 r = disable_noname_group_capture(&root, reg, &scan_env);
5376 r = numbered_ref_check(root);
5378 if (r != 0)
goto err;
5382 #ifdef USE_SUBEXP_CALL
5384 r = unset_addr_list_init(&uslist, scan_env.
num_call);
5385 if (r != 0)
goto err;
5387 r = setup_subexp_call(root, &scan_env);
5388 if (r != 0)
goto err_unset;
5389 r = subexp_recursive_check_trav(root, &scan_env);
5390 if (r < 0)
goto err_unset;
5391 r = subexp_inf_recursive_check_trav(root, &scan_env);
5392 if (r != 0)
goto err_unset;
5401 if (r != 0)
goto err_unset;
5403 #ifdef ONIG_DEBUG_PARSE_TREE
5404 if (!onig_is_prelude()) print_tree(stderr, root);
5417 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5423 setup_comb_exp_check(root, 0, &scan_env);
5424 #ifdef USE_SUBEXP_CALL
5425 if (scan_env.has_recursion != 0) {
5426 scan_env.num_comb_exp_check = 0;
5430 if (scan_env.comb_exp_max_regnum > 0) {
5432 for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
5434 scan_env.num_comb_exp_check = 0;
5445 #ifndef ONIG_DONT_OPTIMIZE
5447 if (r != 0)
goto err_unset;
5458 #ifdef USE_SUBEXP_CALL
5460 r = unset_addr_list_fix(&uslist, reg);
5461 unset_addr_list_end(&uslist);
5475 #ifdef USE_SUBEXP_CALL
5477 unset_addr_list_end(&uslist);
5482 #ifdef ONIG_DEBUG_COMPILE
5483 #ifdef USE_NAMED_GROUP
5484 if (!onig_is_prelude()) onig_print_names(stderr, reg);
5486 if (!onig_is_prelude()) print_compiled_byte_code_list(stderr, reg);
5494 #ifdef USE_SUBEXP_CALL
5496 unset_addr_list_end(&uslist);
5502 einfo->
enc = scan_env.
enc;
5514 #ifdef USE_RECOMPILE_API
5523 r =
onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo);
5567 (reg)->syntax = syntax;
5568 (reg)->optimize = 0;
5570 (reg)->int_map = (
int* )
NULL;
5571 (reg)->int_map_backward = (
int* )
NULL;
5577 (reg)->name_table = (
void* )
NULL;
5579 (reg)->case_fold_flag = case_fold_flag;
5623 if (onig_inited != 0)
5634 #ifdef ONIG_DEBUG_STATISTICS
5635 onig_statistics_init();
5648 #ifdef ONIG_DEBUG_STATISTICS
5649 if (!onig_is_prelude()) onig_print_statistics(stderr);
5652 #ifdef USE_SHARED_CCLASS_TABLE
5656 #ifdef USE_PARSE_TREE_NODE_RECYCLE
5677 for (low = 0, high = n; low < high; ) {
5678 x = (low + high) >> 1;
5679 if (code > data[x * 2 + 1])
5685 return ((low < n && code >= data[low * 2]) ? 1 : 0);
5693 if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
5729 #define ARG_SPECIAL -1
5731 #define ARG_RELADDR 1
5732 #define ARG_ABSADDR 2
5733 #define ARG_LENGTH 3
5734 #define ARG_MEMNUM 4
5735 #define ARG_OPTION 5
5736 #define ARG_STATE_CHECK 6
5738 OnigOpInfoType OnigOpInfo[] = {
5740 {
OP_END,
"end", ARG_NON },
5796 {
OP_JUMP,
"jump", ARG_RELADDR },
5797 {
OP_PUSH,
"push", ARG_RELADDR },
5798 {
OP_POP,
"pop", ARG_NON },
5820 {
OP_CALL,
"call", ARG_ABSADDR },
5827 "state-check-anychar-ml*", ARG_STATE_CHECK },
5836 for (i = 0; OnigOpInfo[
i].opcode >= 0; i++) {
5837 if (opcode == OnigOpInfo[i].opcode)
5838 return OnigOpInfo[
i].name;
5844 op2arg_type(
int opcode)
5848 for (i = 0; OnigOpInfo[
i].opcode >= 0; i++) {
5849 if (opcode == OnigOpInfo[i].opcode)
5850 return OnigOpInfo[
i].arg_type;
5856 Indent(
FILE* f,
int indent)
5859 for (i = 0; i < indent; i++)
putc(
' ', f);
5866 while (len-- > 0) { fputc(*s++, f); }
5872 int x = len * mb_len;
5874 fprintf(f,
":%d:", len);
5875 while (x-- > 0) { fputc(*s++, f); }
5890 fprintf(f,
"[%s", op2name(*bp));
5891 arg_type = op2arg_type(*bp);
5892 if (arg_type != ARG_SPECIAL) {
5899 fprintf(f,
":(%d)", addr);
5903 fprintf(f,
":(%d)", addr);
5907 fprintf(f,
":%d", len);
5912 fprintf(f,
":%d", mem);
5918 fprintf(f,
":%d", option);
5922 case ARG_STATE_CHECK:
5925 fprintf(f,
":%d", scn);
5934 p_string(f, 1, bp++);
break;
5936 p_string(f, 2, bp); bp += 2;
break;
5938 p_string(f, 3, bp); bp += 3;
break;
5940 p_string(f, 4, bp); bp += 4;
break;
5942 p_string(f, 5, bp); bp += 5;
break;
5945 p_len_string(f, len, 1, bp);
5950 p_string(f, 2, bp); bp += 2;
break;
5952 p_string(f, 4, bp); bp += 4;
break;
5954 p_string(f, 6, bp); bp += 6;
break;
5957 p_len_string(f, len, 2, bp);
5962 p_len_string(f, len, 3, bp);
5971 fprintf(f,
":%d:%d:", mb_len, len);
5973 while (n-- > 0) { fputc(*bp++, f); }
5978 len =
enclen(enc, bp, bpend);
5979 p_string(f, len, bp);
5984 p_len_string(f, len, 1, bp);
5991 fprintf(f,
":%d", n);
5997 fprintf(f,
":%d", n);
6004 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6009 fprintf(f,
":%d:%d", (
int )code, len);
6018 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6023 fprintf(f,
":%d:%d:%d", n, (
int )code, len);
6031 n = bitset_on_num(cc->
bs);
6032 fprintf(f,
":%"PRIuPTR
":%d", (
uintptr_t)cc, n);
6039 fprintf(f,
":%d", mem);
6046 for (i = 0; i <
len; i++) {
6048 if (i > 0) fputs(
", ", f);
6049 fprintf(f,
"%d", mem);
6059 fprintf(f,
":%d", option);
6061 fprintf(f,
":%d", level);
6065 for (i = 0; i <
len; i++) {
6067 if (i > 0) fputs(
", ", f);
6068 fprintf(f,
"%d", mem);
6080 fprintf(f,
":%d:%d", mem, addr);
6088 fprintf(f,
":(%d)", addr);
6095 fprintf(f,
":%d", len);
6101 fprintf(f,
":%d:(%d)", len, addr);
6110 fprintf(f,
":%d:(%d)", scn, addr);
6114 fprintf(stderr,
"onig_print_compiled_byte_code: undefined code %d\n",
6119 if (nextp) *nextp =
bp;
6123 print_compiled_byte_code_list(
FILE* f,
regex_t* reg)
6129 fprintf(f,
"code length: %d\n", reg->
used);
6136 fprintf(f,
"\n%ld:", bp-reg->
p);
6138 fprintf(f,
" %ld:", bp-reg->
p);
6147 print_indent_tree(
FILE* f,
Node* node,
int indent)
6149 int i,
type, container_p = 0;
6155 fprintf(f,
"ERROR: null node!!!\n");
6164 fprintf(f,
"<list:%"PRIxPTR
">\n", (
intptr_t)node);
6166 fprintf(f,
"<alt:%"PRIxPTR
">\n", (
intptr_t)node);
6168 print_indent_tree(f,
NCAR(node), indent + add);
6170 if (
NTYPE(node) != type) {
6171 fprintf(f,
"ERROR: list/alt right is not a cons. %d\n",
NTYPE(node));
6174 print_indent_tree(f,
NCAR(node), indent + add);
6179 fprintf(f,
"<string%s:%"PRIxPTR
">",
6181 for (p =
NSTR(node)->s; p <
NSTR(node)->end; p++) {
6182 if (*p >= 0x20 && *p < 0x7f)
6185 fprintf(f,
" 0x%02x", *p);
6191 fprintf(f,
"<cclass:%"PRIxPTR
">", (
intptr_t)node);
6195 for (i = 0; i < (int)bbuf->
used; i++) {
6196 if (i > 0) fprintf(f,
",");
6197 fprintf(f,
"%0x", bbuf->
p[i]);
6203 fprintf(f,
"<ctype:%"PRIxPTR
"> ", (
intptr_t)node);
6204 switch (
NCTYPE(node)->ctype) {
6206 if (
NCTYPE(node)->not != 0)
6207 fputs(
"not word", f);
6213 fprintf(f,
"ERROR: undefined ctype.\n");
6219 fprintf(f,
"<anychar:%"PRIxPTR
">", (
intptr_t)node);
6223 fprintf(f,
"<anchor:%"PRIxPTR
"> ", (
intptr_t)node);
6224 switch (
NANCHOR(node)->type) {
6225 case ANCHOR_BEGIN_BUF: fputs(
"begin buf", f);
break;
6226 case ANCHOR_END_BUF: fputs(
"end buf", f);
break;
6227 case ANCHOR_BEGIN_LINE: fputs(
"begin line", f);
break;
6228 case ANCHOR_END_LINE: fputs(
"end line", f);
break;
6229 case ANCHOR_SEMI_END_BUF: fputs(
"semi end buf", f);
break;
6230 case ANCHOR_BEGIN_POSITION: fputs(
"begin position", f);
break;
6234 #ifdef USE_WORD_BEGIN_END
6244 fprintf(f,
"ERROR: undefined anchor type.\n");
6254 fprintf(f,
"<backref:%"PRIxPTR
">", (
intptr_t)node);
6255 for (i = 0; i < br->
back_num; i++) {
6256 if (i > 0) fputs(
", ", f);
6257 fprintf(f,
"%d", p[i]);
6262 #ifdef USE_SUBEXP_CALL
6266 fprintf(f,
"<call:%"PRIxPTR
">", (
intptr_t)node);
6273 fprintf(f,
"<quantifier:%"PRIxPTR
">{%d,%d}%s\n", (
intptr_t)node,
6275 (
NQTFR(node)->greedy ?
"" :
"?"));
6276 print_indent_tree(f,
NQTFR(node)->target, indent + add);
6280 fprintf(f,
"<enclose:%"PRIxPTR
"> ", (
intptr_t)node);
6283 fprintf(f,
"option:%d\n",
NENCLOSE(node)->option);
6284 print_indent_tree(f,
NENCLOSE(node)->target, indent + add);
6287 fprintf(f,
"memory:%d",
NENCLOSE(node)->regnum);
6290 fprintf(f,
"stop-bt");
6297 print_indent_tree(f,
NENCLOSE(node)->target, indent + add);
6301 fprintf(f,
"print_indent_tree: undefined node type %d\n",
NTYPE(node));
6309 if (container_p) print_indent_tree(f,
NANCHOR(node)->target, indent + add);
6315 #ifdef ONIG_DEBUG_PARSE_TREE
6319 print_indent_tree(f, node, 0);