Ruby  1.9.3p392(2013-02-22revision39386)
regcomp.c
Go to the documentation of this file.
1 /**********************************************************************
2  regcomp.c - Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5  * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in the
15  * documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include "regparse.h"
31 
33 
34 extern OnigCaseFoldType
36 {
38 }
39 
40 extern int
42 {
43  OnigDefaultCaseFoldFlag = case_fold_flag;
44  return 0;
45 }
46 
47 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
48 static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
49 #endif
50 
51 static UChar*
53 {
54  ptrdiff_t len = end - s;
55 
56  if (len > 0) {
57  UChar* r = (UChar* )xmalloc(len + 1);
59  xmemcpy(r, s, len);
60  r[len] = (UChar )0;
61  return r;
62  }
63  else return NULL;
64 }
65 
66 static void
68 {
69  Node c;
70  c = *a; *a = *b; *b = c;
71 
72  if (NTYPE(a) == NT_STR) {
73  StrNode* sn = NSTR(a);
74  if (sn->capa == 0) {
75  size_t len = sn->end - sn->s;
76  sn->s = sn->buf;
77  sn->end = sn->s + len;
78  }
79  }
80 
81  if (NTYPE(b) == NT_STR) {
82  StrNode* sn = NSTR(b);
83  if (sn->capa == 0) {
84  size_t len = sn->end - sn->s;
85  sn->s = sn->buf;
86  sn->end = sn->s + len;
87  }
88  }
89 }
90 
91 static OnigDistance
93 {
96  else {
97  if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
98  else return ONIG_INFINITE_DISTANCE;
99  }
100 }
101 
102 static OnigDistance
104 {
105  if (m == 0) return 0;
106 
107  if (d < ONIG_INFINITE_DISTANCE / m)
108  return d * m;
109  else
110  return ONIG_INFINITE_DISTANCE;
111 }
112 
113 static int
115 {
116  int i;
117  for (i = 0; i < (int )BITSET_SIZE; i++) {
118  if (bs[i] != 0) return 0;
119  }
120  return 1;
121 }
122 
123 #ifdef ONIG_DEBUG
124 static int
125 onig_is_prelude(void)
126 {
127  return !rb_const_defined(rb_cThread, rb_intern_const("MUTEX_FOR_THREAD_EXCLUSIVE"));
128 }
129 
130 static int
131 bitset_on_num(BitSetRef bs)
132 {
133  int i, n;
134 
135  n = 0;
136  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
137  if (BITSET_AT(bs, i)) n++;
138  }
139  return n;
140 }
141 #endif
142 
143 extern int
145 {
146  if (size <= 0) {
147  size = 0;
148  buf->p = NULL;
149  }
150  else {
151  buf->p = (UChar* )xmalloc(size);
152  if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
153  }
154 
155  buf->alloc = (unsigned int)size;
156  buf->used = 0;
157  return 0;
158 }
159 
160 
161 #ifdef USE_SUBEXP_CALL
162 
163 static int
164 unset_addr_list_init(UnsetAddrList* uslist, int size)
165 {
166  UnsetAddr* p;
167 
168  p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
170  uslist->num = 0;
171  uslist->alloc = size;
172  uslist->us = p;
173  return 0;
174 }
175 
176 static void
177 unset_addr_list_end(UnsetAddrList* uslist)
178 {
179  if (IS_NOT_NULL(uslist->us))
180  xfree(uslist->us);
181 }
182 
183 static int
184 unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
185 {
186  UnsetAddr* p;
187  int size;
188 
189  if (uslist->num >= uslist->alloc) {
190  size = uslist->alloc * 2;
191  p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
193  uslist->alloc = size;
194  uslist->us = p;
195  }
196 
197  uslist->us[uslist->num].offset = offset;
198  uslist->us[uslist->num].target = node;
199  uslist->num++;
200  return 0;
201 }
202 #endif /* USE_SUBEXP_CALL */
203 
204 
205 static int
206 add_opcode(regex_t* reg, int opcode)
207 {
208  BBUF_ADD1(reg, opcode);
209  return 0;
210 }
211 
212 #ifdef USE_COMBINATION_EXPLOSION_CHECK
213 static int
214 add_state_check_num(regex_t* reg, int num)
215 {
217 
218  BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
219  return 0;
220 }
221 #endif
222 
223 static int
224 add_rel_addr(regex_t* reg, int addr)
225 {
226  RelAddrType ra = (RelAddrType )addr;
227 
228  BBUF_ADD(reg, &ra, SIZE_RELADDR);
229  return 0;
230 }
231 
232 static int
233 add_abs_addr(regex_t* reg, int addr)
234 {
235  AbsAddrType ra = (AbsAddrType )addr;
236 
237  BBUF_ADD(reg, &ra, SIZE_ABSADDR);
238  return 0;
239 }
240 
241 static int
243 {
244  LengthType l = (LengthType )len;
245 
246  BBUF_ADD(reg, &l, SIZE_LENGTH);
247  return 0;
248 }
249 
250 static int
251 add_mem_num(regex_t* reg, int num)
252 {
253  MemNumType n = (MemNumType )num;
254 
255  BBUF_ADD(reg, &n, SIZE_MEMNUM);
256  return 0;
257 }
258 
259 static int
260 add_pointer(regex_t* reg, void* addr)
261 {
262  PointerType ptr = (PointerType )addr;
263 
264  BBUF_ADD(reg, &ptr, SIZE_POINTER);
265  return 0;
266 }
267 
268 static int
270 {
271  BBUF_ADD(reg, &option, SIZE_OPTION);
272  return 0;
273 }
274 
275 static int
276 add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
277 {
278  int r;
279 
280  r = add_opcode(reg, opcode);
281  if (r) return r;
282  r = add_rel_addr(reg, addr);
283  return r;
284 }
285 
286 static int
288 {
289  BBUF_ADD(reg, bytes, len);
290  return 0;
291 }
292 
293 static int
295 {
296  BBUF_ADD(reg, bs, SIZE_BITSET);
297  return 0;
298 }
299 
300 static int
301 add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
302 {
303  int r;
304 
305  r = add_opcode(reg, opcode);
306  if (r) return r;
307  r = add_option(reg, option);
308  return r;
309 }
310 
311 static int compile_length_tree(Node* node, regex_t* reg);
312 static int compile_tree(Node* node, regex_t* reg);
313 
314 
315 #define IS_NEED_STR_LEN_OP_EXACT(op) \
316  ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
317  (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
318 
319 static int
320 select_str_opcode(int mb_len, OnigDistance str_len, int ignore_case)
321 {
322  int op;
323 
324  if (ignore_case) {
325  switch (str_len) {
326  case 1: op = OP_EXACT1_IC; break;
327  default: op = OP_EXACTN_IC; break;
328  }
329  }
330  else {
331  switch (mb_len) {
332  case 1:
333  switch (str_len) {
334  case 1: op = OP_EXACT1; break;
335  case 2: op = OP_EXACT2; break;
336  case 3: op = OP_EXACT3; break;
337  case 4: op = OP_EXACT4; break;
338  case 5: op = OP_EXACT5; break;
339  default: op = OP_EXACTN; break;
340  }
341  break;
342 
343  case 2:
344  switch (str_len) {
345  case 1: op = OP_EXACTMB2N1; break;
346  case 2: op = OP_EXACTMB2N2; break;
347  case 3: op = OP_EXACTMB2N3; break;
348  default: op = OP_EXACTMB2N; break;
349  }
350  break;
351 
352  case 3:
353  op = OP_EXACTMB3N;
354  break;
355 
356  default:
357  op = OP_EXACTMBN;
358  break;
359  }
360  }
361  return op;
362 }
363 
364 static int
365 compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
366 {
367  int r;
368  int saved_num_null_check = reg->num_null_check;
369 
370  if (empty_info != 0) {
372  if (r) return r;
373  r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
374  if (r) return r;
375  reg->num_null_check++;
376  }
377 
378  r = compile_tree(node, reg);
379  if (r) return r;
380 
381  if (empty_info != 0) {
382  if (empty_info == NQ_TARGET_IS_EMPTY)
383  r = add_opcode(reg, OP_NULL_CHECK_END);
384  else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
386  else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
388 
389  if (r) return r;
390  r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
391  }
392  return r;
393 }
394 
395 #ifdef USE_SUBEXP_CALL
396 static int
397 compile_call(CallNode* node, regex_t* reg)
398 {
399  int r;
400 
401  r = add_opcode(reg, OP_CALL);
402  if (r) return r;
403  r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
404  node->target);
405  if (r) return r;
406  r = add_abs_addr(reg, 0 /*dummy addr.*/);
407  return r;
408 }
409 #endif
410 
411 static int
412 compile_tree_n_times(Node* node, int n, regex_t* reg)
413 {
414  int i, r;
415 
416  for (i = 0; i < n; i++) {
417  r = compile_tree(node, reg);
418  if (r) return r;
419  }
420  return 0;
421 }
422 
423 static int
424 add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance str_len,
425  regex_t* reg ARG_UNUSED, int ignore_case)
426 {
427  int len;
428  int op = select_str_opcode(mb_len, str_len, ignore_case);
429 
430  len = SIZE_OPCODE;
431 
432  if (op == OP_EXACTMBN) len += SIZE_LENGTH;
433  if (IS_NEED_STR_LEN_OP_EXACT(op))
434  len += SIZE_LENGTH;
435 
436  len += mb_len * str_len;
437  return len;
438 }
439 
440 static int
441 add_compile_string(UChar* s, int mb_len, OnigDistance str_len,
442  regex_t* reg, int ignore_case)
443 {
444  int op = select_str_opcode(mb_len, str_len, ignore_case);
445  add_opcode(reg, op);
446 
447  if (op == OP_EXACTMBN)
448  add_length(reg, mb_len);
449 
450  if (IS_NEED_STR_LEN_OP_EXACT(op)) {
451  if (op == OP_EXACTN_IC)
452  add_length(reg, mb_len * str_len);
453  else
454  add_length(reg, str_len);
455  }
456 
457  add_bytes(reg, s, mb_len * str_len);
458  return 0;
459 }
460 
461 
462 static int
464 {
465  int rlen, r, len, prev_len, slen, ambig;
466  OnigEncoding enc = reg->enc;
467  UChar *p, *prev;
468  StrNode* sn;
469 
470  sn = NSTR(node);
471  if (sn->end <= sn->s)
472  return 0;
473 
474  ambig = NSTRING_IS_AMBIG(node);
475 
476  p = prev = sn->s;
477  prev_len = enclen(enc, p, sn->end);
478  p += prev_len;
479  slen = 1;
480  rlen = 0;
481 
482  for (; p < sn->end; ) {
483  len = enclen(enc, p, sn->end);
484  if (len == prev_len) {
485  slen++;
486  }
487  else {
488  r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
489  rlen += r;
490  prev = p;
491  slen = 1;
492  prev_len = len;
493  }
494  p += len;
495  }
496  r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
497  rlen += r;
498  return rlen;
499 }
500 
501 static int
503 {
504  if (sn->end <= sn->s)
505  return 0;
506 
507  return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
508 }
509 
510 static int
512 {
513  int r, len, prev_len, slen, ambig;
514  OnigEncoding enc = reg->enc;
515  UChar *p, *prev, *end;
516  StrNode* sn;
517 
518  sn = NSTR(node);
519  if (sn->end <= sn->s)
520  return 0;
521 
522  end = sn->end;
523  ambig = NSTRING_IS_AMBIG(node);
524 
525  p = prev = sn->s;
526  prev_len = enclen(enc, p, end);
527  p += prev_len;
528  slen = 1;
529 
530  for (; p < end; ) {
531  len = enclen(enc, p, end);
532  if (len == prev_len) {
533  slen++;
534  }
535  else {
536  r = add_compile_string(prev, prev_len, slen, reg, ambig);
537  if (r) return r;
538 
539  prev = p;
540  slen = 1;
541  prev_len = len;
542  }
543 
544  p += len;
545  }
546  return add_compile_string(prev, prev_len, slen, reg, ambig);
547 }
548 
549 static int
551 {
552  if (sn->end <= sn->s)
553  return 0;
554 
555  return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
556 }
557 
558 static int
560 {
561 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
562  add_length(reg, mbuf->used);
563  return add_bytes(reg, mbuf->p, mbuf->used);
564 #else
565  int r, pad_size;
567 
568  GET_ALIGNMENT_PAD_SIZE(p, pad_size);
569  add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
570  if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
571 
572  r = add_bytes(reg, mbuf->p, mbuf->used);
573 
574  /* padding for return value from compile_length_cclass_node() to be fix. */
575  pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
576  if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
577  return r;
578 #endif
579 }
580 
581 static int
583 {
584  int len;
585 
586  if (IS_NCCLASS_SHARE(cc)) {
587  len = SIZE_OPCODE + SIZE_POINTER;
588  return len;
589  }
590 
591  if (IS_NULL(cc->mbuf)) {
592  len = SIZE_OPCODE + SIZE_BITSET;
593  }
594  else {
595  if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
596  len = SIZE_OPCODE;
597  }
598  else {
599  len = SIZE_OPCODE + SIZE_BITSET;
600  }
601 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
602  len += SIZE_LENGTH + cc->mbuf->used;
603 #else
604  len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
605 #endif
606  }
607 
608  return len;
609 }
610 
611 static int
613 {
614  int r;
615 
616  if (IS_NCCLASS_SHARE(cc)) {
618  r = add_pointer(reg, cc);
619  return r;
620  }
621 
622  if (IS_NULL(cc->mbuf)) {
623  if (IS_NCCLASS_NOT(cc))
625  else
626  add_opcode(reg, OP_CCLASS);
627 
628  r = add_bitset(reg, cc->bs);
629  }
630  else {
631  if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
632  if (IS_NCCLASS_NOT(cc))
634  else
635  add_opcode(reg, OP_CCLASS_MB);
636 
637  r = add_multi_byte_cclass(cc->mbuf, reg);
638  }
639  else {
640  if (IS_NCCLASS_NOT(cc))
642  else
644 
645  r = add_bitset(reg, cc->bs);
646  if (r) return r;
647  r = add_multi_byte_cclass(cc->mbuf, reg);
648  }
649  }
650 
651  return r;
652 }
653 
654 static int
655 entry_repeat_range(regex_t* reg, int id, int lower, int upper)
656 {
657 #define REPEAT_RANGE_ALLOC 4
658 
660 
661  if (reg->repeat_range_alloc == 0) {
664  reg->repeat_range = p;
666  }
667  else if (reg->repeat_range_alloc <= id) {
668  int n;
671  sizeof(OnigRepeatRange) * n);
673  reg->repeat_range = p;
674  reg->repeat_range_alloc = n;
675  }
676  else {
677  p = reg->repeat_range;
678  }
679 
680  p[id].lower = lower;
681  p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
682  return 0;
683 }
684 
685 static int
686 compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
687  regex_t* reg)
688 {
689  int r;
690  int num_repeat = reg->num_repeat;
691 
692  r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
693  if (r) return r;
694  r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
695  reg->num_repeat++;
696  if (r) return r;
697  r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
698  if (r) return r;
699 
700  r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
701  if (r) return r;
702 
703  r = compile_tree_empty_check(qn->target, reg, empty_info);
704  if (r) return r;
705 
706  if (
707 #ifdef USE_SUBEXP_CALL
708  reg->num_call > 0 ||
709 #endif
712  }
713  else {
715  }
716  if (r) return r;
717  r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
718  return r;
719 }
720 
721 static int
723 {
724  if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
725  NTYPE(qn->target) == NT_CANY)
726  return 1;
727  else
728  return 0;
729 }
730 
731 #define QUANTIFIER_EXPAND_LIMIT_SIZE 50
732 #define CKN_ON (ckn > 0)
733 
734 #ifdef USE_COMBINATION_EXPLOSION_CHECK
735 
736 static int
738 {
739  int len, mod_tlen, cklen;
740  int ckn;
741  int infinite = IS_REPEAT_INFINITE(qn->upper);
742  int empty_info = qn->target_empty_info;
743  int tlen = compile_length_tree(qn->target, reg);
744 
745  if (tlen < 0) return tlen;
746 
747  ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
748 
749  cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
750 
751  /* anychar repeat */
752  if (NTYPE(qn->target) == NT_CANY) {
753  if (qn->greedy && infinite) {
754  if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
755  return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
756  else
757  return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
758  }
759  }
760 
761  if (empty_info != 0)
762  mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
763  else
764  mod_tlen = tlen;
765 
766  if (infinite && qn->lower <= 1) {
767  if (qn->greedy) {
768  if (qn->lower == 1)
769  len = SIZE_OP_JUMP;
770  else
771  len = 0;
772 
773  len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
774  }
775  else {
776  if (qn->lower == 0)
777  len = SIZE_OP_JUMP;
778  else
779  len = 0;
780 
781  len += mod_tlen + SIZE_OP_PUSH + cklen;
782  }
783  }
784  else if (qn->upper == 0) {
785  if (qn->is_refered != 0) /* /(?<n>..){0}/ */
786  len = SIZE_OP_JUMP + tlen;
787  else
788  len = 0;
789  }
790  else if (qn->upper == 1 && qn->greedy) {
791  if (qn->lower == 0) {
792  if (CKN_ON) {
793  len = SIZE_OP_STATE_CHECK_PUSH + tlen;
794  }
795  else {
796  len = SIZE_OP_PUSH + tlen;
797  }
798  }
799  else {
800  len = tlen;
801  }
802  }
803  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
804  len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
805  }
806  else {
807  len = SIZE_OP_REPEAT_INC
808  + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
809  if (CKN_ON)
810  len += SIZE_OP_STATE_CHECK;
811  }
812 
813  return len;
814 }
815 
816 static int
818 {
819  int r, mod_tlen;
820  int ckn;
821  int infinite = IS_REPEAT_INFINITE(qn->upper);
822  int empty_info = qn->target_empty_info;
823  int tlen = compile_length_tree(qn->target, reg);
824 
825  if (tlen < 0) return tlen;
826 
827  ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
828 
829  if (is_anychar_star_quantifier(qn)) {
830  r = compile_tree_n_times(qn->target, qn->lower, reg);
831  if (r) return r;
832  if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
833  if (IS_MULTILINE(reg->options))
835  else
837  if (r) return r;
838  if (CKN_ON) {
839  r = add_state_check_num(reg, ckn);
840  if (r) return r;
841  }
842 
843  return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
844  }
845  else {
846  if (IS_MULTILINE(reg->options)) {
847  r = add_opcode(reg, (CKN_ON ?
849  : OP_ANYCHAR_ML_STAR));
850  }
851  else {
852  r = add_opcode(reg, (CKN_ON ?
854  : OP_ANYCHAR_STAR));
855  }
856  if (r) return r;
857  if (CKN_ON)
858  r = add_state_check_num(reg, ckn);
859 
860  return r;
861  }
862  }
863 
864  if (empty_info != 0)
865  mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
866  else
867  mod_tlen = tlen;
868 
869  if (infinite && qn->lower <= 1) {
870  if (qn->greedy) {
871  if (qn->lower == 1) {
872  r = add_opcode_rel_addr(reg, OP_JUMP,
873  (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
874  if (r) return r;
875  }
876 
877  if (CKN_ON) {
879  if (r) return r;
880  r = add_state_check_num(reg, ckn);
881  if (r) return r;
882  r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
883  }
884  else {
885  r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
886  }
887  if (r) return r;
888  r = compile_tree_empty_check(qn->target, reg, empty_info);
889  if (r) return r;
890  r = add_opcode_rel_addr(reg, OP_JUMP,
891  -(mod_tlen + (int )SIZE_OP_JUMP
892  + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
893  }
894  else {
895  if (qn->lower == 0) {
896  r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
897  if (r) return r;
898  }
899  r = compile_tree_empty_check(qn->target, reg, empty_info);
900  if (r) return r;
901  if (CKN_ON) {
903  if (r) return r;
904  r = add_state_check_num(reg, ckn);
905  if (r) return r;
906  r = add_rel_addr(reg,
907  -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
908  }
909  else
910  r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
911  }
912  }
913  else if (qn->upper == 0) {
914  if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
915  r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
916  if (r) return r;
917  r = compile_tree(qn->target, reg);
918  }
919  else
920  r = 0;
921  }
922  else if (qn->upper == 1 && qn->greedy) {
923  if (qn->lower == 0) {
924  if (CKN_ON) {
926  if (r) return r;
927  r = add_state_check_num(reg, ckn);
928  if (r) return r;
929  r = add_rel_addr(reg, tlen);
930  }
931  else {
932  r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
933  }
934  if (r) return r;
935  }
936 
937  r = compile_tree(qn->target, reg);
938  }
939  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
940  if (CKN_ON) {
942  if (r) return r;
943  r = add_state_check_num(reg, ckn);
944  if (r) return r;
945  r = add_rel_addr(reg, SIZE_OP_JUMP);
946  }
947  else {
949  }
950 
951  if (r) return r;
952  r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
953  if (r) return r;
954  r = compile_tree(qn->target, reg);
955  }
956  else {
957  r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
958  if (CKN_ON) {
959  if (r) return r;
960  r = add_opcode(reg, OP_STATE_CHECK);
961  if (r) return r;
962  r = add_state_check_num(reg, ckn);
963  }
964  }
965  return r;
966 }
967 
968 #else /* USE_COMBINATION_EXPLOSION_CHECK */
969 
970 static int
972 {
973  int len, mod_tlen;
974  int infinite = IS_REPEAT_INFINITE(qn->upper);
975  int empty_info = qn->target_empty_info;
976  int tlen = compile_length_tree(qn->target, reg);
977 
978  if (tlen < 0) return tlen;
979 
980  /* anychar repeat */
981  if (NTYPE(qn->target) == NT_CANY) {
982  if (qn->greedy && infinite) {
983  if (IS_NOT_NULL(qn->next_head_exact))
984  return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
985  else
986  return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
987  }
988  }
989 
990  if (empty_info != 0)
991  mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
992  else
993  mod_tlen = tlen;
994 
995  if (infinite &&
996  (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
997  if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
998  len = SIZE_OP_JUMP;
999  }
1000  else {
1001  len = tlen * qn->lower;
1002  }
1003 
1004  if (qn->greedy) {
1005  if (IS_NOT_NULL(qn->head_exact))
1006  len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
1007  else if (IS_NOT_NULL(qn->next_head_exact))
1008  len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
1009  else
1010  len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
1011  }
1012  else
1013  len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
1014  }
1015  else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
1016  len = SIZE_OP_JUMP + tlen;
1017  }
1018  else if (!infinite && qn->greedy &&
1019  (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1021  len = tlen * qn->lower;
1022  len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
1023  }
1024  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1025  len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1026  }
1027  else {
1028  len = SIZE_OP_REPEAT_INC
1029  + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
1030  }
1031 
1032  return len;
1033 }
1034 
1035 static int
1037 {
1038  int i, r, mod_tlen;
1039  int infinite = IS_REPEAT_INFINITE(qn->upper);
1040  int empty_info = qn->target_empty_info;
1041  int tlen = compile_length_tree(qn->target, reg);
1042 
1043  if (tlen < 0) return tlen;
1044 
1045  if (is_anychar_star_quantifier(qn)) {
1046  r = compile_tree_n_times(qn->target, qn->lower, reg);
1047  if (r) return r;
1048  if (IS_NOT_NULL(qn->next_head_exact)) {
1049  if (IS_MULTILINE(reg->options))
1051  else
1053  if (r) return r;
1054  return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1055  }
1056  else {
1057  if (IS_MULTILINE(reg->options))
1058  return add_opcode(reg, OP_ANYCHAR_ML_STAR);
1059  else
1060  return add_opcode(reg, OP_ANYCHAR_STAR);
1061  }
1062  }
1063 
1064  if (empty_info != 0)
1065  mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1066  else
1067  mod_tlen = tlen;
1068 
1069  if (infinite &&
1070  (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1071  if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1072  if (qn->greedy) {
1073  if (IS_NOT_NULL(qn->head_exact))
1075  else if (IS_NOT_NULL(qn->next_head_exact))
1077  else
1079  }
1080  else {
1082  }
1083  if (r) return r;
1084  }
1085  else {
1086  r = compile_tree_n_times(qn->target, qn->lower, reg);
1087  if (r) return r;
1088  }
1089 
1090  if (qn->greedy) {
1091  if (IS_NOT_NULL(qn->head_exact)) {
1093  mod_tlen + SIZE_OP_JUMP);
1094  if (r) return r;
1095  add_bytes(reg, NSTR(qn->head_exact)->s, 1);
1096  r = compile_tree_empty_check(qn->target, reg, empty_info);
1097  if (r) return r;
1098  r = add_opcode_rel_addr(reg, OP_JUMP,
1099  -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
1100  }
1101  else if (IS_NOT_NULL(qn->next_head_exact)) {
1103  mod_tlen + SIZE_OP_JUMP);
1104  if (r) return r;
1105  add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1106  r = compile_tree_empty_check(qn->target, reg, empty_info);
1107  if (r) return r;
1108  r = add_opcode_rel_addr(reg, OP_JUMP,
1109  -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
1110  }
1111  else {
1112  r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
1113  if (r) return r;
1114  r = compile_tree_empty_check(qn->target, reg, empty_info);
1115  if (r) return r;
1116  r = add_opcode_rel_addr(reg, OP_JUMP,
1117  -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
1118  }
1119  }
1120  else {
1121  r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
1122  if (r) return r;
1123  r = compile_tree_empty_check(qn->target, reg, empty_info);
1124  if (r) return r;
1125  r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
1126  }
1127  }
1128  else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
1129  r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1130  if (r) return r;
1131  r = compile_tree(qn->target, reg);
1132  }
1133  else if (!infinite && qn->greedy &&
1134  (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1136  int n = qn->upper - qn->lower;
1137 
1138  r = compile_tree_n_times(qn->target, qn->lower, reg);
1139  if (r) return r;
1140 
1141  for (i = 0; i < n; i++) {
1142  r = add_opcode_rel_addr(reg, OP_PUSH,
1143  (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
1144  if (r) return r;
1145  r = compile_tree(qn->target, reg);
1146  if (r) return r;
1147  }
1148  }
1149  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1151  if (r) return r;
1152  r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1153  if (r) return r;
1154  r = compile_tree(qn->target, reg);
1155  }
1156  else {
1157  r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1158  }
1159  return r;
1160 }
1161 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
1162 
1163 static int
1165 {
1166  int tlen;
1167  OnigOptionType prev = reg->options;
1168 
1169  reg->options = node->option;
1170  tlen = compile_length_tree(node->target, reg);
1171  reg->options = prev;
1172 
1173  if (tlen < 0) return tlen;
1174 
1175  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1177  + tlen + SIZE_OP_SET_OPTION;
1178  }
1179  else
1180  return tlen;
1181 }
1182 
1183 static int
1185 {
1186  int r;
1187  OnigOptionType prev = reg->options;
1188 
1189  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1190  r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
1191  if (r) return r;
1192  r = add_opcode_option(reg, OP_SET_OPTION, prev);
1193  if (r) return r;
1194  r = add_opcode(reg, OP_FAIL);
1195  if (r) return r;
1196  }
1197 
1198  reg->options = node->option;
1199  r = compile_tree(node->target, reg);
1200  reg->options = prev;
1201 
1202  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1203  if (r) return r;
1204  r = add_opcode_option(reg, OP_SET_OPTION, prev);
1205  }
1206  return r;
1207 }
1208 
1209 static int
1211 {
1212  int len;
1213  int tlen;
1214 
1215  if (node->type == ENCLOSE_OPTION)
1216  return compile_length_option_node(node, reg);
1217 
1218  if (node->target) {
1219  tlen = compile_length_tree(node->target, reg);
1220  if (tlen < 0) return tlen;
1221  }
1222  else
1223  tlen = 0;
1224 
1225  switch (node->type) {
1226  case ENCLOSE_MEMORY:
1227 #ifdef USE_SUBEXP_CALL
1228  if (IS_ENCLOSE_CALLED(node)) {
1229  len = SIZE_OP_MEMORY_START_PUSH + tlen
1231  if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1232  len += (IS_ENCLOSE_RECURSION(node)
1234  else
1235  len += (IS_ENCLOSE_RECURSION(node)
1237  }
1238  else
1239 #endif
1240  {
1241  if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1243  else
1244  len = SIZE_OP_MEMORY_START;
1245 
1246  len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1248  }
1249  break;
1250 
1253  QtfrNode* qn = NQTFR(node->target);
1254  tlen = compile_length_tree(qn->target, reg);
1255  if (tlen < 0) return tlen;
1256 
1257  len = tlen * qn->lower
1258  + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
1259  }
1260  else {
1262  }
1263  break;
1264 
1265  default:
1266  return ONIGERR_TYPE_BUG;
1267  break;
1268  }
1269 
1270  return len;
1271 }
1272 
1273 static int get_char_length_tree(Node* node, regex_t* reg, int* len);
1274 
1275 static int
1277 {
1278  int r, len;
1279 
1280  if (node->type == ENCLOSE_OPTION)
1281  return compile_option_node(node, reg);
1282 
1283  switch (node->type) {
1284  case ENCLOSE_MEMORY:
1285 #ifdef USE_SUBEXP_CALL
1286  if (IS_ENCLOSE_CALLED(node)) {
1287  r = add_opcode(reg, OP_CALL);
1288  if (r) return r;
1290  node->state |= NST_ADDR_FIXED;
1291  r = add_abs_addr(reg, (int )node->call_addr);
1292  if (r) return r;
1293  len = compile_length_tree(node->target, reg);
1295  if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1296  len += (IS_ENCLOSE_RECURSION(node)
1298  else
1299  len += (IS_ENCLOSE_RECURSION(node)
1301 
1302  r = add_opcode_rel_addr(reg, OP_JUMP, len);
1303  if (r) return r;
1304  }
1305 #endif
1306  if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1307  r = add_opcode(reg, OP_MEMORY_START_PUSH);
1308  else
1309  r = add_opcode(reg, OP_MEMORY_START);
1310  if (r) return r;
1311  r = add_mem_num(reg, node->regnum);
1312  if (r) return r;
1313  r = compile_tree(node->target, reg);
1314  if (r) return r;
1315 #ifdef USE_SUBEXP_CALL
1316  if (IS_ENCLOSE_CALLED(node)) {
1317  if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1318  r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1320  else
1321  r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1323 
1324  if (r) return r;
1325  r = add_mem_num(reg, node->regnum);
1326  if (r) return r;
1327  r = add_opcode(reg, OP_RETURN);
1328  }
1329  else
1330 #endif
1331  {
1332  if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1333  r = add_opcode(reg, OP_MEMORY_END_PUSH);
1334  else
1335  r = add_opcode(reg, OP_MEMORY_END);
1336  if (r) return r;
1337  r = add_mem_num(reg, node->regnum);
1338  }
1339  break;
1340 
1343  QtfrNode* qn = NQTFR(node->target);
1344  r = compile_tree_n_times(qn->target, qn->lower, reg);
1345  if (r) return r;
1346 
1347  len = compile_length_tree(qn->target, reg);
1348  if (len < 0) return len;
1349 
1351  if (r) return r;
1352  r = compile_tree(qn->target, reg);
1353  if (r) return r;
1354  r = add_opcode(reg, OP_POP);
1355  if (r) return r;
1356  r = add_opcode_rel_addr(reg, OP_JUMP,
1357  -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
1358  }
1359  else {
1360  r = add_opcode(reg, OP_PUSH_STOP_BT);
1361  if (r) return r;
1362  r = compile_tree(node->target, reg);
1363  if (r) return r;
1364  r = add_opcode(reg, OP_POP_STOP_BT);
1365  }
1366  break;
1367 
1368  default:
1369  return ONIGERR_TYPE_BUG;
1370  break;
1371  }
1372 
1373  return r;
1374 }
1375 
1376 static int
1378 {
1379  int len;
1380  int tlen = 0;
1381 
1382  if (node->target) {
1383  tlen = compile_length_tree(node->target, reg);
1384  if (tlen < 0) return tlen;
1385  }
1386 
1387  switch (node->type) {
1388  case ANCHOR_PREC_READ:
1389  len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
1390  break;
1391  case ANCHOR_PREC_READ_NOT:
1392  len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
1393  break;
1394  case ANCHOR_LOOK_BEHIND:
1395  len = SIZE_OP_LOOK_BEHIND + tlen;
1396  break;
1399  break;
1400 
1401  default:
1402  len = SIZE_OPCODE;
1403  break;
1404  }
1405 
1406  return len;
1407 }
1408 
1409 static int
1411 {
1412  int r, len;
1413 
1414  switch (node->type) {
1415  case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
1416  case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
1417  case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
1418  case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
1419  case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
1420  case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
1421 
1422  case ANCHOR_WORD_BOUND: r = add_opcode(reg, OP_WORD_BOUND); break;
1423  case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break;
1424 #ifdef USE_WORD_BEGIN_END
1425  case ANCHOR_WORD_BEGIN: r = add_opcode(reg, OP_WORD_BEGIN); break;
1426  case ANCHOR_WORD_END: r = add_opcode(reg, OP_WORD_END); break;
1427 #endif
1428 
1429  case ANCHOR_PREC_READ:
1430  r = add_opcode(reg, OP_PUSH_POS);
1431  if (r) return r;
1432  r = compile_tree(node->target, reg);
1433  if (r) return r;
1434  r = add_opcode(reg, OP_POP_POS);
1435  break;
1436 
1437  case ANCHOR_PREC_READ_NOT:
1438  len = compile_length_tree(node->target, reg);
1439  if (len < 0) return len;
1441  if (r) return r;
1442  r = compile_tree(node->target, reg);
1443  if (r) return r;
1444  r = add_opcode(reg, OP_FAIL_POS);
1445  break;
1446 
1447  case ANCHOR_LOOK_BEHIND:
1448  {
1449  int n;
1450  r = add_opcode(reg, OP_LOOK_BEHIND);
1451  if (r) return r;
1452  if (node->char_len < 0) {
1453  r = get_char_length_tree(node->target, reg, &n);
1455  }
1456  else
1457  n = node->char_len;
1458  r = add_length(reg, n);
1459  if (r) return r;
1460  r = compile_tree(node->target, reg);
1461  }
1462  break;
1463 
1465  {
1466  int n;
1467  len = compile_length_tree(node->target, reg);
1470  if (r) return r;
1471  if (node->char_len < 0) {
1472  r = get_char_length_tree(node->target, reg, &n);
1474  }
1475  else
1476  n = node->char_len;
1477  r = add_length(reg, n);
1478  if (r) return r;
1479  r = compile_tree(node->target, reg);
1480  if (r) return r;
1482  }
1483  break;
1484 
1485  default:
1486  return ONIGERR_TYPE_BUG;
1487  break;
1488  }
1489 
1490  return r;
1491 }
1492 
1493 static int
1495 {
1496  int len, type, r;
1497 
1498  type = NTYPE(node);
1499  switch (type) {
1500  case NT_LIST:
1501  len = 0;
1502  do {
1503  r = compile_length_tree(NCAR(node), reg);
1504  if (r < 0) return r;
1505  len += r;
1506  } while (IS_NOT_NULL(node = NCDR(node)));
1507  r = len;
1508  break;
1509 
1510  case NT_ALT:
1511  {
1512  int n;
1513 
1514  n = r = 0;
1515  do {
1516  r += compile_length_tree(NCAR(node), reg);
1517  n++;
1518  } while (IS_NOT_NULL(node = NCDR(node)));
1519  r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1520  }
1521  break;
1522 
1523  case NT_STR:
1524  if (NSTRING_IS_RAW(node))
1525  r = compile_length_string_raw_node(NSTR(node), reg);
1526  else
1527  r = compile_length_string_node(node, reg);
1528  break;
1529 
1530  case NT_CCLASS:
1531  r = compile_length_cclass_node(NCCLASS(node), reg);
1532  break;
1533 
1534  case NT_CTYPE:
1535  case NT_CANY:
1536  r = SIZE_OPCODE;
1537  break;
1538 
1539  case NT_BREF:
1540  {
1541  BRefNode* br = NBREF(node);
1542 
1543 #ifdef USE_BACKREF_WITH_LEVEL
1544  if (IS_BACKREF_NEST_LEVEL(br)) {
1546  SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1547  }
1548  else
1549 #endif
1550  if (br->back_num == 1) {
1551  r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1553  }
1554  else {
1555  r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1556  }
1557  }
1558  break;
1559 
1560 #ifdef USE_SUBEXP_CALL
1561  case NT_CALL:
1562  r = SIZE_OP_CALL;
1563  break;
1564 #endif
1565 
1566  case NT_QTFR:
1567  r = compile_length_quantifier_node(NQTFR(node), reg);
1568  break;
1569 
1570  case NT_ENCLOSE:
1571  r = compile_length_enclose_node(NENCLOSE(node), reg);
1572  break;
1573 
1574  case NT_ANCHOR:
1575  r = compile_length_anchor_node(NANCHOR(node), reg);
1576  break;
1577 
1578  default:
1579  return ONIGERR_TYPE_BUG;
1580  break;
1581  }
1582 
1583  return r;
1584 }
1585 
1586 static int
1588 {
1589  int n, type, len, pos, r = 0;
1590 
1591  type = NTYPE(node);
1592  switch (type) {
1593  case NT_LIST:
1594  do {
1595  r = compile_tree(NCAR(node), reg);
1596  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1597  break;
1598 
1599  case NT_ALT:
1600  {
1601  Node* x = node;
1602  len = 0;
1603  do {
1604  len += compile_length_tree(NCAR(x), reg);
1605  if (NCDR(x) != NULL) {
1606  len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1607  }
1608  } while (IS_NOT_NULL(x = NCDR(x)));
1609  pos = reg->used + len; /* goal position */
1610 
1611  do {
1612  len = compile_length_tree(NCAR(node), reg);
1613  if (IS_NOT_NULL(NCDR(node))) {
1614  r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
1615  if (r) break;
1616  }
1617  r = compile_tree(NCAR(node), reg);
1618  if (r) break;
1619  if (IS_NOT_NULL(NCDR(node))) {
1620  len = pos - (reg->used + SIZE_OP_JUMP);
1621  r = add_opcode_rel_addr(reg, OP_JUMP, len);
1622  if (r) break;
1623  }
1624  } while (IS_NOT_NULL(node = NCDR(node)));
1625  }
1626  break;
1627 
1628  case NT_STR:
1629  if (NSTRING_IS_RAW(node))
1630  r = compile_string_raw_node(NSTR(node), reg);
1631  else
1632  r = compile_string_node(node, reg);
1633  break;
1634 
1635  case NT_CCLASS:
1636  r = compile_cclass_node(NCCLASS(node), reg);
1637  break;
1638 
1639  case NT_CTYPE:
1640  {
1641  int op;
1642 
1643  switch (NCTYPE(node)->ctype) {
1644  case ONIGENC_CTYPE_WORD:
1645  if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
1646  else op = OP_WORD;
1647  break;
1648  default:
1649  return ONIGERR_TYPE_BUG;
1650  break;
1651  }
1652  r = add_opcode(reg, op);
1653  }
1654  break;
1655 
1656  case NT_CANY:
1657  if (IS_MULTILINE(reg->options))
1658  r = add_opcode(reg, OP_ANYCHAR_ML);
1659  else
1660  r = add_opcode(reg, OP_ANYCHAR);
1661  break;
1662 
1663  case NT_BREF:
1664  {
1665  BRefNode* br = NBREF(node);
1666 
1667 #ifdef USE_BACKREF_WITH_LEVEL
1668  if (IS_BACKREF_NEST_LEVEL(br)) {
1670  if (r) return r;
1671  r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1672  if (r) return r;
1673  r = add_length(reg, br->nest_level);
1674  if (r) return r;
1675 
1676  goto add_bacref_mems;
1677  }
1678  else
1679 #endif
1680  if (br->back_num == 1) {
1681  n = br->back_static[0];
1682  if (IS_IGNORECASE(reg->options)) {
1683  r = add_opcode(reg, OP_BACKREFN_IC);
1684  if (r) return r;
1685  r = add_mem_num(reg, n);
1686  }
1687  else {
1688  switch (n) {
1689  case 1: r = add_opcode(reg, OP_BACKREF1); break;
1690  case 2: r = add_opcode(reg, OP_BACKREF2); break;
1691  default:
1692  r = add_opcode(reg, OP_BACKREFN);
1693  if (r) return r;
1694  r = add_mem_num(reg, n);
1695  break;
1696  }
1697  }
1698  }
1699  else {
1700  int i;
1701  int* p;
1702 
1703  if (IS_IGNORECASE(reg->options)) {
1704  r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1705  }
1706  else {
1707  r = add_opcode(reg, OP_BACKREF_MULTI);
1708  }
1709  if (r) return r;
1710 
1711 #ifdef USE_BACKREF_WITH_LEVEL
1712  add_bacref_mems:
1713 #endif
1714  r = add_length(reg, br->back_num);
1715  if (r) return r;
1716  p = BACKREFS_P(br);
1717  for (i = br->back_num - 1; i >= 0; i--) {
1718  r = add_mem_num(reg, p[i]);
1719  if (r) return r;
1720  }
1721  }
1722  }
1723  break;
1724 
1725 #ifdef USE_SUBEXP_CALL
1726  case NT_CALL:
1727  r = compile_call(NCALL(node), reg);
1728  break;
1729 #endif
1730 
1731  case NT_QTFR:
1732  r = compile_quantifier_node(NQTFR(node), reg);
1733  break;
1734 
1735  case NT_ENCLOSE:
1736  r = compile_enclose_node(NENCLOSE(node), reg);
1737  break;
1738 
1739  case NT_ANCHOR:
1740  r = compile_anchor_node(NANCHOR(node), reg);
1741  break;
1742 
1743  default:
1744 #ifdef ONIG_DEBUG
1745  fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
1746 #endif
1747  break;
1748  }
1749 
1750  return r;
1751 }
1752 
1753 #ifdef USE_NAMED_GROUP
1754 
1755 static int
1756 noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
1757 {
1758  int r = 0;
1759  Node* node = *plink;
1760 
1761  switch (NTYPE(node)) {
1762  case NT_LIST:
1763  case NT_ALT:
1764  do {
1765  r = noname_disable_map(&(NCAR(node)), map, counter);
1766  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1767  break;
1768 
1769  case NT_QTFR:
1770  {
1771  Node** ptarget = &(NQTFR(node)->target);
1772  Node* old = *ptarget;
1773  r = noname_disable_map(ptarget, map, counter);
1774  if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
1775  onig_reduce_nested_quantifier(node, *ptarget);
1776  }
1777  }
1778  break;
1779 
1780  case NT_ENCLOSE:
1781  {
1782  EncloseNode* en = NENCLOSE(node);
1783  if (en->type == ENCLOSE_MEMORY) {
1784  if (IS_ENCLOSE_NAMED_GROUP(en)) {
1785  (*counter)++;
1786  map[en->regnum].new_val = *counter;
1787  en->regnum = *counter;
1788  r = noname_disable_map(&(en->target), map, counter);
1789  }
1790  else {
1791  *plink = en->target;
1792  en->target = NULL_NODE;
1793  onig_node_free(node);
1794  r = noname_disable_map(plink, map, counter);
1795  }
1796  }
1797  else
1798  r = noname_disable_map(&(en->target), map, counter);
1799  }
1800  break;
1801 
1802  case NT_ANCHOR:
1803  {
1804  AnchorNode* an = NANCHOR(node);
1805  switch (an->type) {
1806  case ANCHOR_PREC_READ:
1807  case ANCHOR_PREC_READ_NOT:
1808  case ANCHOR_LOOK_BEHIND:
1810  r = noname_disable_map(&(an->target), map, counter);
1811  break;
1812  }
1813  }
1814  break;
1815 
1816  default:
1817  break;
1818  }
1819 
1820  return r;
1821 }
1822 
1823 static int
1824 renumber_node_backref(Node* node, GroupNumRemap* map)
1825 {
1826  int i, pos, n, old_num;
1827  int *backs;
1828  BRefNode* bn = NBREF(node);
1829 
1830  if (! IS_BACKREF_NAME_REF(bn))
1832 
1833  old_num = bn->back_num;
1834  if (IS_NULL(bn->back_dynamic))
1835  backs = bn->back_static;
1836  else
1837  backs = bn->back_dynamic;
1838 
1839  for (i = 0, pos = 0; i < old_num; i++) {
1840  n = map[backs[i]].new_val;
1841  if (n > 0) {
1842  backs[pos] = n;
1843  pos++;
1844  }
1845  }
1846 
1847  bn->back_num = pos;
1848  return 0;
1849 }
1850 
1851 static int
1852 renumber_by_map(Node* node, GroupNumRemap* map)
1853 {
1854  int r = 0;
1855 
1856  switch (NTYPE(node)) {
1857  case NT_LIST:
1858  case NT_ALT:
1859  do {
1860  r = renumber_by_map(NCAR(node), map);
1861  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1862  break;
1863  case NT_QTFR:
1864  r = renumber_by_map(NQTFR(node)->target, map);
1865  break;
1866  case NT_ENCLOSE:
1867  r = renumber_by_map(NENCLOSE(node)->target, map);
1868  break;
1869 
1870  case NT_BREF:
1871  r = renumber_node_backref(node, map);
1872  break;
1873 
1874  case NT_ANCHOR:
1875  {
1876  AnchorNode* an = NANCHOR(node);
1877  switch (an->type) {
1878  case ANCHOR_PREC_READ:
1879  case ANCHOR_PREC_READ_NOT:
1880  case ANCHOR_LOOK_BEHIND:
1882  r = renumber_by_map(an->target, map);
1883  break;
1884  }
1885  }
1886  break;
1887 
1888  default:
1889  break;
1890  }
1891 
1892  return r;
1893 }
1894 
1895 static int
1896 numbered_ref_check(Node* node)
1897 {
1898  int r = 0;
1899 
1900  switch (NTYPE(node)) {
1901  case NT_LIST:
1902  case NT_ALT:
1903  do {
1904  r = numbered_ref_check(NCAR(node));
1905  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1906  break;
1907  case NT_QTFR:
1908  r = numbered_ref_check(NQTFR(node)->target);
1909  break;
1910  case NT_ENCLOSE:
1911  r = numbered_ref_check(NENCLOSE(node)->target);
1912  break;
1913 
1914  case NT_BREF:
1915  if (! IS_BACKREF_NAME_REF(NBREF(node)))
1917  break;
1918 
1919  default:
1920  break;
1921  }
1922 
1923  return r;
1924 }
1925 
1926 static int
1927 disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
1928 {
1929  int r, i, pos, counter;
1930  BitStatusType loc;
1931  GroupNumRemap* map;
1932 
1933  map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
1935  for (i = 1; i <= env->num_mem; i++) {
1936  map[i].new_val = 0;
1937  }
1938  counter = 0;
1939  r = noname_disable_map(root, map, &counter);
1940  if (r != 0) return r;
1941 
1942  r = renumber_by_map(*root, map);
1943  if (r != 0) return r;
1944 
1945  for (i = 1, pos = 1; i <= env->num_mem; i++) {
1946  if (map[i].new_val > 0) {
1947  SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
1948  pos++;
1949  }
1950  }
1951 
1952  loc = env->capture_history;
1954  for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
1955  if (BIT_STATUS_AT(loc, i)) {
1957  }
1958  }
1959 
1960  env->num_mem = env->num_named;
1961  reg->num_mem = env->num_named;
1962 
1963  return onig_renumber_name_table(reg, map);
1964 }
1965 #endif /* USE_NAMED_GROUP */
1966 
1967 #ifdef USE_SUBEXP_CALL
1968 static int
1969 unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
1970 {
1971  int i, offset;
1972  EncloseNode* en;
1973  AbsAddrType addr;
1974 
1975  for (i = 0; i < uslist->num; i++) {
1976  en = NENCLOSE(uslist->us[i].target);
1977  if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
1978  addr = en->call_addr;
1979  offset = uslist->us[i].offset;
1980 
1981  BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
1982  }
1983  return 0;
1984 }
1985 #endif
1986 
1987 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
1988 static int
1989 quantifiers_memory_node_info(Node* node)
1990 {
1991  int r = 0;
1992 
1993  switch (NTYPE(node)) {
1994  case NT_LIST:
1995  case NT_ALT:
1996  {
1997  int v;
1998  do {
1999  v = quantifiers_memory_node_info(NCAR(node));
2000  if (v > r) r = v;
2001  } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
2002  }
2003  break;
2004 
2005 #ifdef USE_SUBEXP_CALL
2006  case NT_CALL:
2007  if (IS_CALL_RECURSION(NCALL(node))) {
2008  return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
2009  }
2010  else
2011  r = quantifiers_memory_node_info(NCALL(node)->target);
2012  break;
2013 #endif
2014 
2015  case NT_QTFR:
2016  {
2017  QtfrNode* qn = NQTFR(node);
2018  if (qn->upper != 0) {
2019  r = quantifiers_memory_node_info(qn->target);
2020  }
2021  }
2022  break;
2023 
2024  case NT_ENCLOSE:
2025  {
2026  EncloseNode* en = NENCLOSE(node);
2027  switch (en->type) {
2028  case ENCLOSE_MEMORY:
2029  return NQ_TARGET_IS_EMPTY_MEM;
2030  break;
2031 
2032  case ENCLOSE_OPTION:
2034  r = quantifiers_memory_node_info(en->target);
2035  break;
2036  default:
2037  break;
2038  }
2039  }
2040  break;
2041 
2042  case NT_BREF:
2043  case NT_STR:
2044  case NT_CTYPE:
2045  case NT_CCLASS:
2046  case NT_CANY:
2047  case NT_ANCHOR:
2048  default:
2049  break;
2050  }
2051 
2052  return r;
2053 }
2054 #endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
2055 
2056 static int
2058 {
2059  OnigDistance tmin;
2060  int r = 0;
2061 
2062  *min = 0;
2063  switch (NTYPE(node)) {
2064  case NT_BREF:
2065  {
2066  int i;
2067  int* backs;
2068  Node** nodes = SCANENV_MEM_NODES(env);
2069  BRefNode* br = NBREF(node);
2070  if (br->state & NST_RECURSION) break;
2071 
2072  backs = BACKREFS_P(br);
2073  if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2074  r = get_min_match_length(nodes[backs[0]], min, env);
2075  if (r != 0) break;
2076  for (i = 1; i < br->back_num; i++) {
2077  if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2078  r = get_min_match_length(nodes[backs[i]], &tmin, env);
2079  if (r != 0) break;
2080  if (*min > tmin) *min = tmin;
2081  }
2082  }
2083  break;
2084 
2085 #ifdef USE_SUBEXP_CALL
2086  case NT_CALL:
2087  if (IS_CALL_RECURSION(NCALL(node))) {
2088  EncloseNode* en = NENCLOSE(NCALL(node)->target);
2089  if (IS_ENCLOSE_MIN_FIXED(en))
2090  *min = en->min_len;
2091  }
2092  else
2093  r = get_min_match_length(NCALL(node)->target, min, env);
2094  break;
2095 #endif
2096 
2097  case NT_LIST:
2098  do {
2099  r = get_min_match_length(NCAR(node), &tmin, env);
2100  if (r == 0) *min += tmin;
2101  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2102  break;
2103 
2104  case NT_ALT:
2105  {
2106  Node *x, *y;
2107  y = node;
2108  do {
2109  x = NCAR(y);
2110  r = get_min_match_length(x, &tmin, env);
2111  if (r != 0) break;
2112  if (y == node) *min = tmin;
2113  else if (*min > tmin) *min = tmin;
2114  } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
2115  }
2116  break;
2117 
2118  case NT_STR:
2119  {
2120  StrNode* sn = NSTR(node);
2121  *min = sn->end - sn->s;
2122  }
2123  break;
2124 
2125  case NT_CTYPE:
2126  *min = 1;
2127  break;
2128 
2129  case NT_CCLASS:
2130  case NT_CANY:
2131  *min = 1;
2132  break;
2133 
2134  case NT_QTFR:
2135  {
2136  QtfrNode* qn = NQTFR(node);
2137 
2138  if (qn->lower > 0) {
2139  r = get_min_match_length(qn->target, min, env);
2140  if (r == 0)
2141  *min = distance_multiply(*min, qn->lower);
2142  }
2143  }
2144  break;
2145 
2146  case NT_ENCLOSE:
2147  {
2148  EncloseNode* en = NENCLOSE(node);
2149  switch (en->type) {
2150  case ENCLOSE_MEMORY:
2151 #ifdef USE_SUBEXP_CALL
2152  if (IS_ENCLOSE_MIN_FIXED(en))
2153  *min = en->min_len;
2154  else {
2155  r = get_min_match_length(en->target, min, env);
2156  if (r == 0) {
2157  en->min_len = *min;
2159  }
2160  }
2161  break;
2162 #endif
2163  case ENCLOSE_OPTION:
2165  r = get_min_match_length(en->target, min, env);
2166  break;
2167  }
2168  }
2169  break;
2170 
2171  case NT_ANCHOR:
2172  default:
2173  break;
2174  }
2175 
2176  return r;
2177 }
2178 
2179 static int
2181 {
2182  OnigDistance tmax;
2183  int r = 0;
2184 
2185  *max = 0;
2186  switch (NTYPE(node)) {
2187  case NT_LIST:
2188  do {
2189  r = get_max_match_length(NCAR(node), &tmax, env);
2190  if (r == 0)
2191  *max = distance_add(*max, tmax);
2192  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2193  break;
2194 
2195  case NT_ALT:
2196  do {
2197  r = get_max_match_length(NCAR(node), &tmax, env);
2198  if (r == 0 && *max < tmax) *max = tmax;
2199  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2200  break;
2201 
2202  case NT_STR:
2203  {
2204  StrNode* sn = NSTR(node);
2205  *max = sn->end - sn->s;
2206  }
2207  break;
2208 
2209  case NT_CTYPE:
2210  *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2211  break;
2212 
2213  case NT_CCLASS:
2214  case NT_CANY:
2215  *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2216  break;
2217 
2218  case NT_BREF:
2219  {
2220  int i;
2221  int* backs;
2222  Node** nodes = SCANENV_MEM_NODES(env);
2223  BRefNode* br = NBREF(node);
2224  if (br->state & NST_RECURSION) {
2225  *max = ONIG_INFINITE_DISTANCE;
2226  break;
2227  }
2228  backs = BACKREFS_P(br);
2229  for (i = 0; i < br->back_num; i++) {
2230  if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2231  r = get_max_match_length(nodes[backs[i]], &tmax, env);
2232  if (r != 0) break;
2233  if (*max < tmax) *max = tmax;
2234  }
2235  }
2236  break;
2237 
2238 #ifdef USE_SUBEXP_CALL
2239  case NT_CALL:
2240  if (! IS_CALL_RECURSION(NCALL(node)))
2241  r = get_max_match_length(NCALL(node)->target, max, env);
2242  else
2243  *max = ONIG_INFINITE_DISTANCE;
2244  break;
2245 #endif
2246 
2247  case NT_QTFR:
2248  {
2249  QtfrNode* qn = NQTFR(node);
2250 
2251  if (qn->upper != 0) {
2252  r = get_max_match_length(qn->target, max, env);
2253  if (r == 0 && *max != 0) {
2254  if (! IS_REPEAT_INFINITE(qn->upper))
2255  *max = distance_multiply(*max, qn->upper);
2256  else
2257  *max = ONIG_INFINITE_DISTANCE;
2258  }
2259  }
2260  }
2261  break;
2262 
2263  case NT_ENCLOSE:
2264  {
2265  EncloseNode* en = NENCLOSE(node);
2266  switch (en->type) {
2267  case ENCLOSE_MEMORY:
2268 #ifdef USE_SUBEXP_CALL
2269  if (IS_ENCLOSE_MAX_FIXED(en))
2270  *max = en->max_len;
2271  else {
2272  r = get_max_match_length(en->target, max, env);
2273  if (r == 0) {
2274  en->max_len = *max;
2276  }
2277  }
2278  break;
2279 #endif
2280  case ENCLOSE_OPTION:
2282  r = get_max_match_length(en->target, max, env);
2283  break;
2284  }
2285  }
2286  break;
2287 
2288  case NT_ANCHOR:
2289  default:
2290  break;
2291  }
2292 
2293  return r;
2294 }
2295 
2296 #define GET_CHAR_LEN_VARLEN -1
2297 #define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2298 
2299 /* fixed size pattern node only */
2300 static int
2301 get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
2302 {
2303  int tlen;
2304  int r = 0;
2305 
2306  level++;
2307  *len = 0;
2308  switch (NTYPE(node)) {
2309  case NT_LIST:
2310  do {
2311  r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2312  if (r == 0)
2313  *len = (int)distance_add(*len, tlen);
2314  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2315  break;
2316 
2317  case NT_ALT:
2318  {
2319  int tlen2;
2320  int varlen = 0;
2321 
2322  r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2323  while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
2324  r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
2325  if (r == 0) {
2326  if (tlen != tlen2)
2327  varlen = 1;
2328  }
2329  }
2330  if (r == 0) {
2331  if (varlen != 0) {
2332  if (level == 1)
2334  else
2335  r = GET_CHAR_LEN_VARLEN;
2336  }
2337  else
2338  *len = tlen;
2339  }
2340  }
2341  break;
2342 
2343  case NT_STR:
2344  {
2345  StrNode* sn = NSTR(node);
2346  UChar *s = sn->s;
2347  while (s < sn->end) {
2348  s += enclen(reg->enc, s, sn->end);
2349  (*len)++;
2350  }
2351  }
2352  break;
2353 
2354  case NT_QTFR:
2355  {
2356  QtfrNode* qn = NQTFR(node);
2357  if (qn->lower == qn->upper) {
2358  r = get_char_length_tree1(qn->target, reg, &tlen, level);
2359  if (r == 0)
2360  *len = (int)distance_multiply(tlen, qn->lower);
2361  }
2362  else
2363  r = GET_CHAR_LEN_VARLEN;
2364  }
2365  break;
2366 
2367 #ifdef USE_SUBEXP_CALL
2368  case NT_CALL:
2369  if (! IS_CALL_RECURSION(NCALL(node)))
2370  r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
2371  else
2372  r = GET_CHAR_LEN_VARLEN;
2373  break;
2374 #endif
2375 
2376  case NT_CTYPE:
2377  *len = 1;
2378  break;
2379 
2380  case NT_CCLASS:
2381  case NT_CANY:
2382  *len = 1;
2383  break;
2384 
2385  case NT_ENCLOSE:
2386  {
2387  EncloseNode* en = NENCLOSE(node);
2388  switch (en->type) {
2389  case ENCLOSE_MEMORY:
2390 #ifdef USE_SUBEXP_CALL
2391  if (IS_ENCLOSE_CLEN_FIXED(en))
2392  *len = en->char_len;
2393  else {
2394  r = get_char_length_tree1(en->target, reg, len, level);
2395  if (r == 0) {
2396  en->char_len = *len;
2398  }
2399  }
2400  break;
2401 #endif
2402  case ENCLOSE_OPTION:
2404  r = get_char_length_tree1(en->target, reg, len, level);
2405  break;
2406  default:
2407  break;
2408  }
2409  }
2410  break;
2411 
2412  case NT_ANCHOR:
2413  break;
2414 
2415  default:
2416  r = GET_CHAR_LEN_VARLEN;
2417  break;
2418  }
2419 
2420  return r;
2421 }
2422 
2423 static int
2425 {
2426  return get_char_length_tree1(node, reg, len, 0);
2427 }
2428 
2429 /* x is not included y ==> 1 : 0 */
2430 static int
2432 {
2433  int i;
2434  OnigDistance len;
2435  OnigCodePoint code;
2436  UChar *p, c;
2437  int ytype;
2438 
2439  retry:
2440  ytype = NTYPE(y);
2441  switch (NTYPE(x)) {
2442  case NT_CTYPE:
2443  {
2444  switch (ytype) {
2445  case NT_CTYPE:
2446  if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
2447  NCTYPE(y)->not != NCTYPE(x)->not)
2448  return 1;
2449  else
2450  return 0;
2451  break;
2452 
2453  case NT_CCLASS:
2454  swap:
2455  {
2456  Node* tmp;
2457  tmp = x; x = y; y = tmp;
2458  goto retry;
2459  }
2460  break;
2461 
2462  case NT_STR:
2463  goto swap;
2464  break;
2465 
2466  default:
2467  break;
2468  }
2469  }
2470  break;
2471 
2472  case NT_CCLASS:
2473  {
2474  CClassNode* xc = NCCLASS(x);
2475  switch (ytype) {
2476  case NT_CTYPE:
2477  switch (NCTYPE(y)->ctype) {
2478  case ONIGENC_CTYPE_WORD:
2479  if (NCTYPE(y)->not == 0) {
2480  if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2481  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2482  if (BITSET_AT(xc->bs, i)) {
2483  if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
2484  }
2485  }
2486  return 1;
2487  }
2488  return 0;
2489  }
2490  else {
2491  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2492  if (! IS_CODE_SB_WORD(reg->enc, i)) {
2493  if (!IS_NCCLASS_NOT(xc)) {
2494  if (BITSET_AT(xc->bs, i))
2495  return 0;
2496  }
2497  else {
2498  if (! BITSET_AT(xc->bs, i))
2499  return 0;
2500  }
2501  }
2502  }
2503  return 1;
2504  }
2505  break;
2506 
2507  default:
2508  break;
2509  }
2510  break;
2511 
2512  case NT_CCLASS:
2513  {
2514  int v;
2515  CClassNode* yc = NCCLASS(y);
2516 
2517  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2518  v = BITSET_AT(xc->bs, i);
2519  if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
2520  (v == 0 && IS_NCCLASS_NOT(xc))) {
2521  v = BITSET_AT(yc->bs, i);
2522  if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2523  (v == 0 && IS_NCCLASS_NOT(yc)))
2524  return 0;
2525  }
2526  }
2527  if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2528  (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2529  return 1;
2530  return 0;
2531  }
2532  break;
2533 
2534  case NT_STR:
2535  goto swap;
2536  break;
2537 
2538  default:
2539  break;
2540  }
2541  }
2542  break;
2543 
2544  case NT_STR:
2545  {
2546  StrNode* xs = NSTR(x);
2547  if (NSTRING_LEN(x) == 0)
2548  break;
2549 
2550  c = *(xs->s);
2551  switch (ytype) {
2552  case NT_CTYPE:
2553  switch (NCTYPE(y)->ctype) {
2554  case ONIGENC_CTYPE_WORD:
2555  if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2556  return NCTYPE(y)->not;
2557  else
2558  return !(NCTYPE(y)->not);
2559  break;
2560  default:
2561  break;
2562  }
2563  break;
2564 
2565  case NT_CCLASS:
2566  {
2567  CClassNode* cc = NCCLASS(y);
2568 
2569  code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2570  xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2571  return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2572  }
2573  break;
2574 
2575  case NT_STR:
2576  {
2577  UChar *q;
2578  StrNode* ys = NSTR(y);
2579  len = NSTRING_LEN(x);
2580  if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
2581  if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
2582  /* tiny version */
2583  return 0;
2584  }
2585  else {
2586  for (i = 0, p = ys->s, q = xs->s; (OnigDistance)i < len; i++, p++, q++) {
2587  if (*p != *q) return 1;
2588  }
2589  }
2590  }
2591  break;
2592 
2593  default:
2594  break;
2595  }
2596  }
2597  break;
2598 
2599  default:
2600  break;
2601  }
2602 
2603  return 0;
2604 }
2605 
2606 static Node*
2607 get_head_value_node(Node* node, int exact, regex_t* reg)
2608 {
2609  Node* n = NULL_NODE;
2610 
2611  switch (NTYPE(node)) {
2612  case NT_BREF:
2613  case NT_ALT:
2614  case NT_CANY:
2615 #ifdef USE_SUBEXP_CALL
2616  case NT_CALL:
2617 #endif
2618  break;
2619 
2620  case NT_CTYPE:
2621  case NT_CCLASS:
2622  if (exact == 0) {
2623  n = node;
2624  }
2625  break;
2626 
2627  case NT_LIST:
2628  n = get_head_value_node(NCAR(node), exact, reg);
2629  break;
2630 
2631  case NT_STR:
2632  {
2633  StrNode* sn = NSTR(node);
2634 
2635  if (sn->end <= sn->s)
2636  break;
2637 
2638  if (exact != 0 &&
2639  !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
2640  }
2641  else {
2642  n = node;
2643  }
2644  }
2645  break;
2646 
2647  case NT_QTFR:
2648  {
2649  QtfrNode* qn = NQTFR(node);
2650  if (qn->lower > 0) {
2651  if (IS_NOT_NULL(qn->head_exact))
2652  n = qn->head_exact;
2653  else
2654  n = get_head_value_node(qn->target, exact, reg);
2655  }
2656  }
2657  break;
2658 
2659  case NT_ENCLOSE:
2660  {
2661  EncloseNode* en = NENCLOSE(node);
2662  switch (en->type) {
2663  case ENCLOSE_OPTION:
2664  {
2666 
2667  reg->options = NENCLOSE(node)->option;
2668  n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
2669  reg->options = options;
2670  }
2671  break;
2672 
2673  case ENCLOSE_MEMORY:
2675  n = get_head_value_node(en->target, exact, reg);
2676  break;
2677  }
2678  }
2679  break;
2680 
2681  case NT_ANCHOR:
2682  if (NANCHOR(node)->type == ANCHOR_PREC_READ)
2683  n = get_head_value_node(NANCHOR(node)->target, exact, reg);
2684  break;
2685 
2686  default:
2687  break;
2688  }
2689 
2690  return n;
2691 }
2692 
2693 static int
2694 check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
2695 {
2696  int type, r = 0;
2697 
2698  type = NTYPE(node);
2699  if ((NTYPE2BIT(type) & type_mask) == 0)
2700  return 1;
2701 
2702  switch (type) {
2703  case NT_LIST:
2704  case NT_ALT:
2705  do {
2706  r = check_type_tree(NCAR(node), type_mask, enclose_mask,
2707  anchor_mask);
2708  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2709  break;
2710 
2711  case NT_QTFR:
2712  r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
2713  anchor_mask);
2714  break;
2715 
2716  case NT_ENCLOSE:
2717  {
2718  EncloseNode* en = NENCLOSE(node);
2719  if ((en->type & enclose_mask) == 0)
2720  return 1;
2721 
2722  r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
2723  }
2724  break;
2725 
2726  case NT_ANCHOR:
2727  type = NANCHOR(node)->type;
2728  if ((type & anchor_mask) == 0)
2729  return 1;
2730 
2731  if (NANCHOR(node)->target)
2732  r = check_type_tree(NANCHOR(node)->target,
2733  type_mask, enclose_mask, anchor_mask);
2734  break;
2735 
2736  default:
2737  break;
2738  }
2739  return r;
2740 }
2741 
2742 #ifdef USE_SUBEXP_CALL
2743 
2744 #define RECURSION_EXIST 1
2745 #define RECURSION_INFINITE 2
2746 
2747 static int
2748 subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
2749 {
2750  int type;
2751  int r = 0;
2752 
2753  type = NTYPE(node);
2754  switch (type) {
2755  case NT_LIST:
2756  {
2757  Node *x;
2758  OnigDistance min;
2759  int ret;
2760 
2761  x = node;
2762  do {
2763  ret = subexp_inf_recursive_check(NCAR(x), env, head);
2764  if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2765  r |= ret;
2766  if (head) {
2767  ret = get_min_match_length(NCAR(x), &min, env);
2768  if (ret != 0) return ret;
2769  if (min != 0) head = 0;
2770  }
2771  } while (IS_NOT_NULL(x = NCDR(x)));
2772  }
2773  break;
2774 
2775  case NT_ALT:
2776  {
2777  int ret;
2778  r = RECURSION_EXIST;
2779  do {
2780  ret = subexp_inf_recursive_check(NCAR(node), env, head);
2781  if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2782  r &= ret;
2783  } while (IS_NOT_NULL(node = NCDR(node)));
2784  }
2785  break;
2786 
2787  case NT_QTFR:
2788  r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
2789  if (r == RECURSION_EXIST) {
2790  if (NQTFR(node)->lower == 0) r = 0;
2791  }
2792  break;
2793 
2794  case NT_ANCHOR:
2795  {
2796  AnchorNode* an = NANCHOR(node);
2797  switch (an->type) {
2798  case ANCHOR_PREC_READ:
2799  case ANCHOR_PREC_READ_NOT:
2800  case ANCHOR_LOOK_BEHIND:
2802  r = subexp_inf_recursive_check(an->target, env, head);
2803  break;
2804  }
2805  }
2806  break;
2807 
2808  case NT_CALL:
2809  r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
2810  break;
2811 
2812  case NT_ENCLOSE:
2813  if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2814  return 0;
2815  else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2816  return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2817  else {
2819  r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
2821  }
2822  break;
2823 
2824  default:
2825  break;
2826  }
2827 
2828  return r;
2829 }
2830 
2831 static int
2832 subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
2833 {
2834  int type;
2835  int r = 0;
2836 
2837  type = NTYPE(node);
2838  switch (type) {
2839  case NT_LIST:
2840  case NT_ALT:
2841  do {
2842  r = subexp_inf_recursive_check_trav(NCAR(node), env);
2843  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2844  break;
2845 
2846  case NT_QTFR:
2847  r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
2848  break;
2849 
2850  case NT_ANCHOR:
2851  {
2852  AnchorNode* an = NANCHOR(node);
2853  switch (an->type) {
2854  case ANCHOR_PREC_READ:
2855  case ANCHOR_PREC_READ_NOT:
2856  case ANCHOR_LOOK_BEHIND:
2858  r = subexp_inf_recursive_check_trav(an->target, env);
2859  break;
2860  }
2861  }
2862  break;
2863 
2864  case NT_ENCLOSE:
2865  {
2866  EncloseNode* en = NENCLOSE(node);
2867 
2868  if (IS_ENCLOSE_RECURSION(en)) {
2870  r = subexp_inf_recursive_check(en->target, env, 1);
2871  if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
2873  }
2874  r = subexp_inf_recursive_check_trav(en->target, env);
2875  }
2876 
2877  break;
2878 
2879  default:
2880  break;
2881  }
2882 
2883  return r;
2884 }
2885 
2886 static int
2887 subexp_recursive_check(Node* node)
2888 {
2889  int r = 0;
2890 
2891  switch (NTYPE(node)) {
2892  case NT_LIST:
2893  case NT_ALT:
2894  do {
2895  r |= subexp_recursive_check(NCAR(node));
2896  } while (IS_NOT_NULL(node = NCDR(node)));
2897  break;
2898 
2899  case NT_QTFR:
2900  r = subexp_recursive_check(NQTFR(node)->target);
2901  break;
2902 
2903  case NT_ANCHOR:
2904  {
2905  AnchorNode* an = NANCHOR(node);
2906  switch (an->type) {
2907  case ANCHOR_PREC_READ:
2908  case ANCHOR_PREC_READ_NOT:
2909  case ANCHOR_LOOK_BEHIND:
2911  r = subexp_recursive_check(an->target);
2912  break;
2913  }
2914  }
2915  break;
2916 
2917  case NT_CALL:
2918  r = subexp_recursive_check(NCALL(node)->target);
2919  if (r != 0) SET_CALL_RECURSION(node);
2920  break;
2921 
2922  case NT_ENCLOSE:
2923  if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2924  return 0;
2925  else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2926  return 1; /* recursion */
2927  else {
2929  r = subexp_recursive_check(NENCLOSE(node)->target);
2931  }
2932  break;
2933 
2934  default:
2935  break;
2936  }
2937 
2938  return r;
2939 }
2940 
2941 
2942 static int
2943 subexp_recursive_check_trav(Node* node, ScanEnv* env)
2944 {
2945 #define FOUND_CALLED_NODE 1
2946 
2947  int type;
2948  int r = 0;
2949 
2950  type = NTYPE(node);
2951  switch (type) {
2952  case NT_LIST:
2953  case NT_ALT:
2954  {
2955  int ret;
2956  do {
2957  ret = subexp_recursive_check_trav(NCAR(node), env);
2958  if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
2959  else if (ret < 0) return ret;
2960  } while (IS_NOT_NULL(node = NCDR(node)));
2961  }
2962  break;
2963 
2964  case NT_QTFR:
2965  r = subexp_recursive_check_trav(NQTFR(node)->target, env);
2966  if (NQTFR(node)->upper == 0) {
2967  if (r == FOUND_CALLED_NODE)
2968  NQTFR(node)->is_refered = 1;
2969  }
2970  break;
2971 
2972  case NT_ANCHOR:
2973  {
2974  AnchorNode* an = NANCHOR(node);
2975  switch (an->type) {
2976  case ANCHOR_PREC_READ:
2977  case ANCHOR_PREC_READ_NOT:
2978  case ANCHOR_LOOK_BEHIND:
2980  r = subexp_recursive_check_trav(an->target, env);
2981  break;
2982  }
2983  }
2984  break;
2985 
2986  case NT_ENCLOSE:
2987  {
2988  EncloseNode* en = NENCLOSE(node);
2989 
2990  if (! IS_ENCLOSE_RECURSION(en)) {
2991  if (IS_ENCLOSE_CALLED(en)) {
2993  r = subexp_recursive_check(en->target);
2994  if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
2996  }
2997  }
2998  r = subexp_recursive_check_trav(en->target, env);
2999  if (IS_ENCLOSE_CALLED(en))
3000  r |= FOUND_CALLED_NODE;
3001  }
3002  break;
3003 
3004  default:
3005  break;
3006  }
3007 
3008  return r;
3009 }
3010 
3011 static int
3012 setup_subexp_call(Node* node, ScanEnv* env)
3013 {
3014  int type;
3015  int r = 0;
3016 
3017  type = NTYPE(node);
3018  switch (type) {
3019  case NT_LIST:
3020  do {
3021  r = setup_subexp_call(NCAR(node), env);
3022  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3023  break;
3024 
3025  case NT_ALT:
3026  do {
3027  r = setup_subexp_call(NCAR(node), env);
3028  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3029  break;
3030 
3031  case NT_QTFR:
3032  r = setup_subexp_call(NQTFR(node)->target, env);
3033  break;
3034  case NT_ENCLOSE:
3035  r = setup_subexp_call(NENCLOSE(node)->target, env);
3036  break;
3037 
3038  case NT_CALL:
3039  {
3040  CallNode* cn = NCALL(node);
3041  Node** nodes = SCANENV_MEM_NODES(env);
3042 
3043  if (cn->group_num != 0) {
3044  int gnum = cn->group_num;
3045 
3046 #ifdef USE_NAMED_GROUP
3047  if (env->num_named > 0 &&
3051  }
3052 #endif
3053  if (gnum > env->num_mem) {
3057  }
3058 
3059 #ifdef USE_NAMED_GROUP
3060  set_call_attr:
3061 #endif
3062  cn->target = nodes[cn->group_num];
3063  if (IS_NULL(cn->target)) {
3067  }
3070  cn->unset_addr_list = env->unset_addr_list;
3071  }
3072 #ifdef USE_NAMED_GROUP
3073  else {
3074  int *refs;
3075 
3076  int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
3077  &refs);
3078  if (n <= 0) {
3082  }
3083  else if (n > 1) {
3087  }
3088  else {
3089  cn->group_num = refs[0];
3090  goto set_call_attr;
3091  }
3092  }
3093 #endif
3094  }
3095  break;
3096 
3097  case NT_ANCHOR:
3098  {
3099  AnchorNode* an = NANCHOR(node);
3100 
3101  switch (an->type) {
3102  case ANCHOR_PREC_READ:
3103  case ANCHOR_PREC_READ_NOT:
3104  case ANCHOR_LOOK_BEHIND:
3106  r = setup_subexp_call(an->target, env);
3107  break;
3108  }
3109  }
3110  break;
3111 
3112  default:
3113  break;
3114  }
3115 
3116  return r;
3117 }
3118 #endif
3119 
3120 /* divide different length alternatives in look-behind.
3121  (?<=A|B) ==> (?<=A)|(?<=B)
3122  (?<!A|B) ==> (?<!A)(?<!B)
3123 */
3124 static int
3126 {
3127  Node *head, *np, *insert_node;
3128  AnchorNode* an = NANCHOR(node);
3129  int anc_type = an->type;
3130 
3131  head = an->target;
3132  np = NCAR(head);
3133  swap_node(node, head);
3134  NCAR(node) = head;
3135  NANCHOR(head)->target = np;
3136 
3137  np = node;
3138  while ((np = NCDR(np)) != NULL_NODE) {
3139  insert_node = onig_node_new_anchor(anc_type);
3140  CHECK_NULL_RETURN_MEMERR(insert_node);
3141  NANCHOR(insert_node)->target = NCAR(np);
3142  NCAR(np) = insert_node;
3143  }
3144 
3145  if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3146  np = node;
3147  do {
3148  SET_NTYPE(np, NT_LIST); /* alt -> list */
3149  } while ((np = NCDR(np)) != NULL_NODE);
3150  }
3151  return 0;
3152 }
3153 
3154 static int
3156 {
3157  int r, len;
3158  AnchorNode* an = NANCHOR(node);
3159 
3160  r = get_char_length_tree(an->target, reg, &len);
3161  if (r == 0)
3162  an->char_len = len;
3163  else if (r == GET_CHAR_LEN_VARLEN)
3165  else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3168  else
3170  }
3171 
3172  return r;
3173 }
3174 
3175 static int
3176 next_setup(Node* node, Node* next_node, regex_t* reg)
3177 {
3178  int type;
3179 
3180  retry:
3181  type = NTYPE(node);
3182  if (type == NT_QTFR) {
3183  QtfrNode* qn = NQTFR(node);
3184  if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3185 #ifdef USE_QTFR_PEEK_NEXT
3186  Node* n = get_head_value_node(next_node, 1, reg);
3187  /* '\0': for UTF-16BE etc... */
3188  if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
3189  qn->next_head_exact = n;
3190  }
3191 #endif
3192  /* automatic posseivation a*b ==> (?>a*)b */
3193  if (qn->lower <= 1) {
3194  int ttype = NTYPE(qn->target);
3195  if (IS_NODE_TYPE_SIMPLE(ttype)) {
3196  Node *x, *y;
3197  x = get_head_value_node(qn->target, 0, reg);
3198  if (IS_NOT_NULL(x)) {
3199  y = get_head_value_node(next_node, 0, reg);
3200  if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3204  swap_node(node, en);
3205  NENCLOSE(node)->target = en;
3206  }
3207  }
3208  }
3209  }
3210  }
3211  }
3212  else if (type == NT_ENCLOSE) {
3213  EncloseNode* en = NENCLOSE(node);
3214  if (en->type == ENCLOSE_MEMORY) {
3215  node = en->target;
3216  goto retry;
3217  }
3218  }
3219  return 0;
3220 }
3221 
3222 
3223 static int
3225 {
3227  UChar *sbuf, *ebuf, *sp;
3228  int r, i, len;
3229  OnigDistance sbuf_size;
3230  StrNode* sn = NSTR(node);
3231 
3232  end = sn->end;
3233  sbuf_size = (end - sn->s) * 2;
3234  sbuf = (UChar* )xmalloc(sbuf_size);
3236  ebuf = sbuf + sbuf_size;
3237 
3238  sp = sbuf;
3239  p = sn->s;
3240  while (p < end) {
3241  len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3242  q = buf;
3243  for (i = 0; i < len; i++) {
3244  if (sp >= ebuf) {
3245  sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2);
3247  sp = sbuf + sbuf_size;
3248  sbuf_size *= 2;
3249  ebuf = sbuf + sbuf_size;
3250  }
3251 
3252  *sp++ = buf[i];
3253  }
3254  }
3255 
3256  r = onig_node_str_set(node, sbuf, sp);
3257  if (r != 0) {
3258  xfree(sbuf);
3259  return r;
3260  }
3261 
3262  xfree(sbuf);
3263  return 0;
3264 }
3265 
3266 static int
3268  regex_t* reg)
3269 {
3270  int r;
3271  Node *node;
3272 
3273  node = onig_node_new_str(s, end);
3274  if (IS_NULL(node)) return ONIGERR_MEMORY;
3275 
3276  r = update_string_node_case_fold(reg, node);
3277  if (r != 0) {
3278  onig_node_free(node);
3279  return r;
3280  }
3281 
3282  NSTRING_SET_AMBIG(node);
3284  *rnode = node;
3285  return 0;
3286 }
3287 
3288 static int
3290  UChar *p, int slen, UChar *end,
3291  regex_t* reg, Node **rnode)
3292 {
3293  int r, i, j, len, varlen;
3294  Node *anode, *var_anode, *snode, *xnode, *an;
3296 
3297  *rnode = var_anode = NULL_NODE;
3298 
3299  varlen = 0;
3300  for (i = 0; i < item_num; i++) {
3301  if (items[i].byte_len != slen) {
3302  varlen = 1;
3303  break;
3304  }
3305  }
3306 
3307  if (varlen != 0) {
3308  *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3309  if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
3310 
3311  xnode = onig_node_new_list(NULL, NULL);
3312  if (IS_NULL(xnode)) goto mem_err;
3313  NCAR(var_anode) = xnode;
3314 
3316  if (IS_NULL(anode)) goto mem_err;
3317  NCAR(xnode) = anode;
3318  }
3319  else {
3320  *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3321  if (IS_NULL(anode)) return ONIGERR_MEMORY;
3322  }
3323 
3324  snode = onig_node_new_str(p, p + slen);
3325  if (IS_NULL(snode)) goto mem_err;
3326 
3327  NCAR(anode) = snode;
3328 
3329  for (i = 0; i < item_num; i++) {
3330  snode = onig_node_new_str(NULL, NULL);
3331  if (IS_NULL(snode)) goto mem_err;
3332 
3333  for (j = 0; j < items[i].code_len; j++) {
3334  len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3335  if (len < 0) {
3336  r = len;
3337  goto mem_err2;
3338  }
3339 
3340  r = onig_node_str_cat(snode, buf, buf + len);
3341  if (r != 0) goto mem_err2;
3342  }
3343 
3345  if (IS_NULL(an)) {
3346  goto mem_err2;
3347  }
3348 
3349  if (items[i].byte_len != slen) {
3350  Node *rem;
3351  UChar *q = p + items[i].byte_len;
3352 
3353  if (q < end) {
3354  r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3355  if (r != 0) {
3356  onig_node_free(an);
3357  goto mem_err2;
3358  }
3359 
3360  xnode = onig_node_list_add(NULL_NODE, snode);
3361  if (IS_NULL(xnode)) {
3362  onig_node_free(an);
3363  onig_node_free(rem);
3364  goto mem_err2;
3365  }
3366  if (IS_NULL(onig_node_list_add(xnode, rem))) {
3367  onig_node_free(an);
3368  onig_node_free(xnode);
3369  onig_node_free(rem);
3370  goto mem_err;
3371  }
3372 
3373  NCAR(an) = xnode;
3374  }
3375  else {
3376  NCAR(an) = snode;
3377  }
3378 
3379  NCDR(var_anode) = an;
3380  var_anode = an;
3381  }
3382  else {
3383  NCAR(an) = snode;
3384  NCDR(anode) = an;
3385  anode = an;
3386  }
3387  }
3388 
3389  return varlen;
3390 
3391  mem_err2:
3392  onig_node_free(snode);
3393 
3394  mem_err:
3395  onig_node_free(*rnode);
3396 
3397  return ONIGERR_MEMORY;
3398 }
3399 
3400 static int
3402 {
3403 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3404 
3405  int r, n, len, alt_num;
3406  UChar *start, *end, *p;
3407  Node *top_root, *root, *snode, *prev_node;
3409  StrNode* sn = NSTR(node);
3410 
3411  if (NSTRING_IS_AMBIG(node)) return 0;
3412 
3413  start = sn->s;
3414  end = sn->end;
3415  if (start >= end) return 0;
3416 
3417  r = 0;
3418  top_root = root = prev_node = snode = NULL_NODE;
3419  alt_num = 1;
3420  p = start;
3421  while (p < end) {
3423  p, end, items);
3424  if (n < 0) {
3425  r = n;
3426  goto err;
3427  }
3428 
3429  len = enclen(reg->enc, p, end);
3430 
3431  if (n == 0) {
3432  if (IS_NULL(snode)) {
3433  if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3434  top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3435  if (IS_NULL(root)) {
3436  onig_node_free(prev_node);
3437  goto mem_err;
3438  }
3439  }
3440 
3441  prev_node = snode = onig_node_new_str(NULL, NULL);
3442  if (IS_NULL(snode)) goto mem_err;
3443  if (IS_NOT_NULL(root)) {
3444  if (IS_NULL(onig_node_list_add(root, snode))) {
3445  onig_node_free(snode);
3446  goto mem_err;
3447  }
3448  }
3449  }
3450 
3451  r = onig_node_str_cat(snode, p, p + len);
3452  if (r != 0) goto err;
3453  }
3454  else {
3455  alt_num *= (n + 1);
3456  if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
3457 
3458  if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3459  top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3460  if (IS_NULL(root)) {
3461  onig_node_free(prev_node);
3462  goto mem_err;
3463  }
3464  }
3465 
3466  r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
3467  if (r < 0) goto mem_err;
3468  if (r == 1) {
3469  if (IS_NULL(root)) {
3470  top_root = prev_node;
3471  }
3472  else {
3473  if (IS_NULL(onig_node_list_add(root, prev_node))) {
3474  onig_node_free(prev_node);
3475  goto mem_err;
3476  }
3477  }
3478 
3479  root = NCAR(prev_node);
3480  }
3481  else { /* r == 0 */
3482  if (IS_NOT_NULL(root)) {
3483  if (IS_NULL(onig_node_list_add(root, prev_node))) {
3484  onig_node_free(prev_node);
3485  goto mem_err;
3486  }
3487  }
3488  }
3489 
3490  snode = NULL_NODE;
3491  }
3492 
3493  p += len;
3494  }
3495 
3496  if (p < end) {
3497  Node *srem;
3498 
3499  r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3500  if (r != 0) goto mem_err;
3501 
3502  if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3503  top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3504  if (IS_NULL(root)) {
3505  onig_node_free(srem);
3506  onig_node_free(prev_node);
3507  goto mem_err;
3508  }
3509  }
3510 
3511  if (IS_NULL(root)) {
3512  prev_node = srem;
3513  }
3514  else {
3515  if (IS_NULL(onig_node_list_add(root, srem))) {
3516  onig_node_free(srem);
3517  goto mem_err;
3518  }
3519  }
3520  }
3521 
3522  /* ending */
3523  top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3524  swap_node(node, top_root);
3525  onig_node_free(top_root);
3526  return 0;
3527 
3528  mem_err:
3529  r = ONIGERR_MEMORY;
3530 
3531  err:
3532  onig_node_free(top_root);
3533  return r;
3534 }
3535 
3536 
3537 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3538 
3539 #define CEC_THRES_NUM_BIG_REPEAT 512
3540 #define CEC_INFINITE_NUM 0x7fffffff
3541 
3542 #define CEC_IN_INFINITE_REPEAT (1<<0)
3543 #define CEC_IN_FINITE_REPEAT (1<<1)
3544 #define CEC_CONT_BIG_REPEAT (1<<2)
3545 
3546 static int
3547 setup_comb_exp_check(Node* node, int state, ScanEnv* env)
3548 {
3549  int type;
3550  int r = state;
3551 
3552  type = NTYPE(node);
3553  switch (type) {
3554  case NT_LIST:
3555  {
3556  Node* prev = NULL_NODE;
3557  do {
3558  r = setup_comb_exp_check(NCAR(node), r, env);
3559  prev = NCAR(node);
3560  } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
3561  }
3562  break;
3563 
3564  case NT_ALT:
3565  {
3566  int ret;
3567  do {
3568  ret = setup_comb_exp_check(NCAR(node), state, env);
3569  r |= ret;
3570  } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
3571  }
3572  break;
3573 
3574  case NT_QTFR:
3575  {
3576  int child_state = state;
3577  int add_state = 0;
3578  QtfrNode* qn = NQTFR(node);
3579  Node* target = qn->target;
3580  int var_num;
3581 
3582  if (! IS_REPEAT_INFINITE(qn->upper)) {
3583  if (qn->upper > 1) {
3584  /* {0,1}, {1,1} are allowed */
3585  child_state |= CEC_IN_FINITE_REPEAT;
3586 
3587  /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
3588  if (env->backrefed_mem == 0) {
3589  if (NTYPE(qn->target) == NT_ENCLOSE) {
3590  EncloseNode* en = NENCLOSE(qn->target);
3591  if (en->type == ENCLOSE_MEMORY) {
3592  if (NTYPE(en->target) == NT_QTFR) {
3593  QtfrNode* q = NQTFR(en->target);
3594  if (IS_REPEAT_INFINITE(q->upper)
3595  && q->greedy == qn->greedy) {
3596  qn->upper = (qn->lower == 0 ? 1 : qn->lower);
3597  if (qn->upper == 1)
3598  child_state = state;
3599  }
3600  }
3601  }
3602  }
3603  }
3604  }
3605  }
3606 
3607  if (state & CEC_IN_FINITE_REPEAT) {
3608  qn->comb_exp_check_num = -1;
3609  }
3610  else {
3611  if (IS_REPEAT_INFINITE(qn->upper)) {
3612  var_num = CEC_INFINITE_NUM;
3613  child_state |= CEC_IN_INFINITE_REPEAT;
3614  }
3615  else {
3616  var_num = qn->upper - qn->lower;
3617  }
3618 
3619  if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3620  add_state |= CEC_CONT_BIG_REPEAT;
3621 
3622  if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3623  ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3624  var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3625  if (qn->comb_exp_check_num == 0) {
3626  env->num_comb_exp_check++;
3627  qn->comb_exp_check_num = env->num_comb_exp_check;
3628  if (env->curr_max_regnum > env->comb_exp_max_regnum)
3629  env->comb_exp_max_regnum = env->curr_max_regnum;
3630  }
3631  }
3632  }
3633 
3634  r = setup_comb_exp_check(target, child_state, env);
3635  r |= add_state;
3636  }
3637  break;
3638 
3639  case NT_ENCLOSE:
3640  {
3641  EncloseNode* en = NENCLOSE(node);
3642 
3643  switch (en->type) {
3644  case ENCLOSE_MEMORY:
3645  {
3646  if (env->curr_max_regnum < en->regnum)
3647  env->curr_max_regnum = en->regnum;
3648 
3649  r = setup_comb_exp_check(en->target, state, env);
3650  }
3651  break;
3652 
3653  default:
3654  r = setup_comb_exp_check(en->target, state, env);
3655  break;
3656  }
3657  }
3658  break;
3659 
3660 #ifdef USE_SUBEXP_CALL
3661  case NT_CALL:
3662  if (IS_CALL_RECURSION(NCALL(node)))
3663  env->has_recursion = 1;
3664  else
3665  r = setup_comb_exp_check(NCALL(node)->target, state, env);
3666  break;
3667 #endif
3668 
3669  default:
3670  break;
3671  }
3672 
3673  return r;
3674 }
3675 #endif
3676 
3677 #define IN_ALT (1<<0)
3678 #define IN_NOT (1<<1)
3679 #define IN_REPEAT (1<<2)
3680 #define IN_VAR_REPEAT (1<<3)
3681 
3682 /* setup_tree does the following work.
3683  1. check empty loop. (set qn->target_empty_info)
3684  2. expand ignore-case in char class.
3685  3. set memory status bit flags. (reg->mem_stats)
3686  4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
3687  5. find invalid patterns in look-behind.
3688  6. expand repeated string.
3689  */
3690 static int
3691 setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
3692 {
3693  int type;
3694  int r = 0;
3695 
3696 restart:
3697  type = NTYPE(node);
3698  switch (type) {
3699  case NT_LIST:
3700  {
3701  Node* prev = NULL_NODE;
3702  do {
3703  r = setup_tree(NCAR(node), reg, state, env);
3704  if (IS_NOT_NULL(prev) && r == 0) {
3705  r = next_setup(prev, NCAR(node), reg);
3706  }
3707  prev = NCAR(node);
3708  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3709  }
3710  break;
3711 
3712  case NT_ALT:
3713  do {
3714  r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
3715  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3716  break;
3717 
3718  case NT_CCLASS:
3719  break;
3720 
3721  case NT_STR:
3722  if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
3723  r = expand_case_fold_string(node, reg);
3724  }
3725  break;
3726 
3727  case NT_CTYPE:
3728  case NT_CANY:
3729  break;
3730 
3731 #ifdef USE_SUBEXP_CALL
3732  case NT_CALL:
3733  break;
3734 #endif
3735 
3736  case NT_BREF:
3737  {
3738  int i;
3739  int* p;
3740  Node** nodes = SCANENV_MEM_NODES(env);
3741  BRefNode* br = NBREF(node);
3742  p = BACKREFS_P(br);
3743  for (i = 0; i < br->back_num; i++) {
3744  if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
3745  BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
3746  BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
3747 #ifdef USE_BACKREF_WITH_LEVEL
3748  if (IS_BACKREF_NEST_LEVEL(br)) {
3749  BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
3750  }
3751 #endif
3752  SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
3753  }
3754  }
3755  break;
3756 
3757  case NT_QTFR:
3758  {
3759  OnigDistance d;
3760  QtfrNode* qn = NQTFR(node);
3761  Node* target = qn->target;
3762 
3763  if ((state & IN_REPEAT) != 0) {
3764  qn->state |= NST_IN_REPEAT;
3765  }
3766 
3767  if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
3768  r = get_min_match_length(target, &d, env);
3769  if (r) break;
3770  if (d == 0) {
3772 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3773  r = quantifiers_memory_node_info(target);
3774  if (r < 0) break;
3775  if (r > 0) {
3776  qn->target_empty_info = r;
3777  }
3778 #endif
3779 #if 0
3780  r = get_max_match_length(target, &d, env);
3781  if (r == 0 && d == 0) {
3782  /* ()* ==> ()?, ()+ ==> () */
3783  qn->upper = 1;
3784  if (qn->lower > 1) qn->lower = 1;
3785  if (NTYPE(target) == NT_STR) {
3786  qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */
3787  }
3788  }
3789 #endif
3790  }
3791  }
3792 
3793  state |= IN_REPEAT;
3794  if (qn->lower != qn->upper)
3795  state |= IN_VAR_REPEAT;
3796  r = setup_tree(target, reg, state, env);
3797  if (r) break;
3798 
3799  /* expand string */
3800 #define EXPAND_STRING_MAX_LENGTH 100
3801  if (NTYPE(target) == NT_STR) {
3802  if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&
3803  qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
3804  OnigDistance len = NSTRING_LEN(target);
3805  StrNode* sn = NSTR(target);
3806 
3807  if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {
3808  int i, n = qn->lower;
3809  onig_node_conv_to_str_node(node, NSTR(target)->flag);
3810  for (i = 0; i < n; i++) {
3811  r = onig_node_str_cat(node, sn->s, sn->end);
3812  if (r) break;
3813  }
3814  onig_node_free(target);
3815  break; /* break case NT_QTFR: */
3816  }
3817  }
3818  }
3819 
3820 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
3821  if (qn->greedy && (qn->target_empty_info != 0)) {
3822  if (NTYPE(target) == NT_QTFR) {
3823  QtfrNode* tqn = NQTFR(target);
3824  if (IS_NOT_NULL(tqn->head_exact)) {
3825  qn->head_exact = tqn->head_exact;
3826  tqn->head_exact = NULL;
3827  }
3828  }
3829  else {
3830  qn->head_exact = get_head_value_node(qn->target, 1, reg);
3831  }
3832  }
3833 #endif
3834  }
3835  break;
3836 
3837  case NT_ENCLOSE:
3838  {
3839  EncloseNode* en = NENCLOSE(node);
3840 
3841  switch (en->type) {
3842  case ENCLOSE_OPTION:
3843  {
3845  reg->options = NENCLOSE(node)->option;
3846  r = setup_tree(NENCLOSE(node)->target, reg, state, env);
3847  reg->options = options;
3848  }
3849  break;
3850 
3851  case ENCLOSE_MEMORY:
3852  if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
3854  /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
3855  }
3856  r = setup_tree(en->target, reg, state, env);
3857  break;
3858 
3860  {
3861  Node* target = en->target;
3862  r = setup_tree(target, reg, state, env);
3863  if (NTYPE(target) == NT_QTFR) {
3864  QtfrNode* tqn = NQTFR(target);
3865  if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
3866  tqn->greedy != 0) { /* (?>a*), a*+ etc... */
3867  int qtype = NTYPE(tqn->target);
3868  if (IS_NODE_TYPE_SIMPLE(qtype))
3870  }
3871  }
3872  }
3873  break;
3874  }
3875  }
3876  break;
3877 
3878  case NT_ANCHOR:
3879  {
3880  AnchorNode* an = NANCHOR(node);
3881 
3882  switch (an->type) {
3883  case ANCHOR_PREC_READ:
3884  r = setup_tree(an->target, reg, state, env);
3885  break;
3886  case ANCHOR_PREC_READ_NOT:
3887  r = setup_tree(an->target, reg, (state | IN_NOT), env);
3888  break;
3889 
3890 /* allowed node types in look-behind */
3891 #define ALLOWED_TYPE_IN_LB \
3892  ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
3893  BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
3894 
3895 #define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY )
3896 #define ALLOWED_ENCLOSE_IN_LB_NOT 0
3897 
3898 #define ALLOWED_ANCHOR_IN_LB \
3899 ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
3900 #define ALLOWED_ANCHOR_IN_LB_NOT \
3901 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
3902 
3903  case ANCHOR_LOOK_BEHIND:
3904  {
3907  if (r < 0) return r;
3908  if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3909  r = setup_look_behind(node, reg, env);
3910  if (r != 0) return r;
3911  if (NTYPE(node) != NT_ANCHOR) goto restart;
3912  r = setup_tree(an->target, reg, state, env);
3913  }
3914  break;
3915 
3917  {
3920  if (r < 0) return r;
3921  if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3922  r = setup_look_behind(node, reg, env);
3923  if (r != 0) return r;
3924  if (NTYPE(node) != NT_ANCHOR) goto restart;
3925  r = setup_tree(an->target, reg, (state | IN_NOT), env);
3926  }
3927  break;
3928  }
3929  }
3930  break;
3931 
3932  default:
3933  break;
3934  }
3935 
3936  return r;
3937 }
3938 
3939 /* set skip map for Boyer-Moor search */
3940 static int
3941 set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
3942  UChar skip[], int** int_skip)
3943 {
3944  OnigDistance i, len;
3945 
3946  len = end - s;
3947  if (len < ONIG_CHAR_TABLE_SIZE) {
3948  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len;
3949 
3950  for (i = 0; i < len - 1; i++)
3951  skip[s[i]] = len - 1 - i;
3952  }
3953  else {
3954  if (IS_NULL(*int_skip)) {
3955  *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
3956  if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
3957  }
3958  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int)len;
3959 
3960  for (i = 0; i < len - 1; i++)
3961  (*int_skip)[s[i]] = (int)(len - 1 - i);
3962  }
3963  return 0;
3964 }
3965 
3966 #define OPT_EXACT_MAXLEN 24
3967 
3968 typedef struct {
3969  OnigDistance min; /* min byte length */
3970  OnigDistance max; /* max byte length */
3971 } MinMaxLen;
3972 
3973 typedef struct {
3979 } OptEnv;
3980 
3981 typedef struct {
3984 } OptAncInfo;
3985 
3986 typedef struct {
3987  MinMaxLen mmd; /* info position */
3989 
3992  int len;
3994 } OptExactInfo;
3995 
3996 typedef struct {
3997  MinMaxLen mmd; /* info position */
3999 
4000  int value; /* weighted value */
4002 } OptMapInfo;
4003 
4004 typedef struct {
4006 
4008  OptExactInfo exb; /* boundary */
4009  OptExactInfo exm; /* middle */
4010  OptExactInfo expr; /* prec read (?=...) */
4011 
4012  OptMapInfo map; /* boundary */
4013 } NodeOptInfo;
4014 
4015 
4016 static int
4018 {
4019  static const short int ByteValTable[] = {
4020  5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
4021  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4022  12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
4023  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
4024  5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4025  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
4026  5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4027  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
4028  };
4029 
4030  if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) {
4031  if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4032  return 20;
4033  else
4034  return (int )ByteValTable[i];
4035  }
4036  else
4037  return 4; /* Take it easy. */
4038 }
4039 
4040 static int
4042 {
4043  /* 1000 / (min-max-dist + 1) */
4044  static const short int dist_vals[] = {
4045  1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4046  91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4047  48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4048  32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4049  24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4050  20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4051  16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4052  14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4053  12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4054  11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4055  };
4056 
4057  OnigDistance d;
4058 
4059  if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
4060 
4061  d = mm->max - mm->min;
4062  if (d < sizeof(dist_vals)/sizeof(dist_vals[0]))
4063  /* return dist_vals[d] * 16 / (mm->min + 12); */
4064  return (int )dist_vals[d];
4065  else
4066  return 1;
4067 }
4068 
4069 static int
4070 comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
4071 {
4072  if (v2 <= 0) return -1;
4073  if (v1 <= 0) return 1;
4074 
4075  v1 *= distance_value(d1);
4076  v2 *= distance_value(d2);
4077 
4078  if (v2 > v1) return 1;
4079  if (v2 < v1) return -1;
4080 
4081  if (d2->min < d1->min) return 1;
4082  if (d2->min > d1->min) return -1;
4083  return 0;
4084 }
4085 
4086 static int
4088 {
4089  return (a->min == b->min && a->max == b->max) ? 1 : 0;
4090 }
4091 
4092 
4093 static void
4095 {
4096  mml->min = min;
4097  mml->max = max;
4098 }
4099 
4100 static void
4102 {
4103  mml->min = mml->max = 0;
4104 }
4105 
4106 static void
4108 {
4109  to->min = from->min;
4110  to->max = from->max;
4111 }
4112 
4113 static void
4115 {
4116  to->min = distance_add(to->min, from->min);
4117  to->max = distance_add(to->max, from->max);
4118 }
4119 
4120 #if 0
4121 static void
4122 add_len_mml(MinMaxLen* to, OnigDistance len)
4123 {
4124  to->min = distance_add(to->min, len);
4125  to->max = distance_add(to->max, len);
4126 }
4127 #endif
4128 
4129 static void
4131 {
4132  if (to->min > from->min) to->min = from->min;
4133  if (to->max < from->max) to->max = from->max;
4134 }
4135 
4136 static void
4138 {
4139  *to = *from;
4140 }
4141 
4142 static void
4144 {
4145  anc->left_anchor = 0;
4146  anc->right_anchor = 0;
4147 }
4148 
4149 static void
4151 {
4152  *to = *from;
4153 }
4154 
4155 static void
4157  OnigDistance left_len, OnigDistance right_len)
4158 {
4159  clear_opt_anc_info(to);
4160 
4161  to->left_anchor = left->left_anchor;
4162  if (left_len == 0) {
4163  to->left_anchor |= right->left_anchor;
4164  }
4165 
4166  to->right_anchor = right->right_anchor;
4167  if (right_len == 0) {
4168  to->right_anchor |= left->right_anchor;
4169  }
4170 }
4171 
4172 static int
4174 {
4175  if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
4176  anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
4177  anc == ANCHOR_PREC_READ_NOT)
4178  return 0;
4179 
4180  return 1;
4181 }
4182 
4183 static int
4185 {
4186  if ((to->left_anchor & anc) != 0) return 1;
4187 
4188  return ((to->right_anchor & anc) != 0 ? 1 : 0);
4189 }
4190 
4191 static void
4193 {
4194  if (is_left_anchor(anc))
4195  to->left_anchor |= anc;
4196  else
4197  to->right_anchor |= anc;
4198 }
4199 
4200 static void
4202 {
4203  if (is_left_anchor(anc))
4204  to->left_anchor &= ~anc;
4205  else
4206  to->right_anchor &= ~anc;
4207 }
4208 
4209 static void
4211 {
4212  to->left_anchor &= add->left_anchor;
4213  to->right_anchor &= add->right_anchor;
4214 }
4215 
4216 static int
4218 {
4219  return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4220 }
4221 
4222 static void
4224 {
4225  clear_mml(&ex->mmd);
4226  clear_opt_anc_info(&ex->anc);
4227  ex->reach_end = 0;
4228  ex->ignore_case = 0;
4229  ex->len = 0;
4230  ex->s[0] = '\0';
4231 }
4232 
4233 static void
4235 {
4236  *to = *from;
4237 }
4238 
4239 static void
4241 {
4242  int i, j, len;
4243  UChar *p, *end;
4244  OptAncInfo tanc;
4245 
4246  if (! to->ignore_case && add->ignore_case) {
4247  if (to->len >= add->len) return ; /* avoid */
4248 
4249  to->ignore_case = 1;
4250  }
4251 
4252  p = add->s;
4253  end = p + add->len;
4254  for (i = to->len; p < end; ) {
4255  len = enclen(enc, p, end);
4256  if (i + len > OPT_EXACT_MAXLEN) break;
4257  for (j = 0; j < len && p < end; j++)
4258  to->s[i++] = *p++;
4259  }
4260 
4261  to->len = i;
4262  to->reach_end = (p == end ? add->reach_end : 0);
4263 
4264  concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4265  if (! to->reach_end) tanc.right_anchor = 0;
4266  copy_opt_anc_info(&to->anc, &tanc);
4267 }
4268 
4269 static void
4271  int raw ARG_UNUSED, OnigEncoding enc)
4272 {
4273  int i, j, len;
4274  UChar *p;
4275 
4276  for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4277  len = enclen(enc, p, end);
4278  if (i + len > OPT_EXACT_MAXLEN) break;
4279  for (j = 0; j < len && p < end; j++)
4280  to->s[i++] = *p++;
4281  }
4282 
4283  to->len = i;
4284 }
4285 
4286 static void
4288 {
4289  int i, j, len;
4290 
4291  if (add->len == 0 || to->len == 0) {
4293  return ;
4294  }
4295 
4296  if (! is_equal_mml(&to->mmd, &add->mmd)) {
4298  return ;
4299  }
4300 
4301  for (i = 0; i < to->len && i < add->len; ) {
4302  if (to->s[i] != add->s[i]) break;
4303  len = enclen(env->enc, to->s + i, to->s + to->len);
4304 
4305  for (j = 1; j < len; j++) {
4306  if (to->s[i+j] != add->s[i+j]) break;
4307  }
4308  if (j < len) break;
4309  i += len;
4310  }
4311 
4312  if (! add->reach_end || i < add->len || i < to->len) {
4313  to->reach_end = 0;
4314  }
4315  to->len = i;
4316  to->ignore_case |= add->ignore_case;
4317 
4318  alt_merge_opt_anc_info(&to->anc, &add->anc);
4319  if (! to->reach_end) to->anc.right_anchor = 0;
4320 }
4321 
4322 static void
4324 {
4325  int v1, v2;
4326 
4327  v1 = now->len;
4328  v2 = alt->len;
4329 
4330  if (v2 == 0) {
4331  return ;
4332  }
4333  else if (v1 == 0) {
4334  copy_opt_exact_info(now, alt);
4335  return ;
4336  }
4337  else if (v1 <= 2 && v2 <= 2) {
4338  /* ByteValTable[x] is big value --> low price */
4339  v2 = map_position_value(enc, now->s[0]);
4340  v1 = map_position_value(enc, alt->s[0]);
4341 
4342  if (now->len > 1) v1 += 5;
4343  if (alt->len > 1) v2 += 5;
4344  }
4345 
4346  if (now->ignore_case == 0) v1 *= 2;
4347  if (alt->ignore_case == 0) v2 *= 2;
4348 
4349  if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4350  copy_opt_exact_info(now, alt);
4351 }
4352 
4353 static void
4355 {
4356  static const OptMapInfo clean_info = {
4357  {0, 0}, {0, 0}, 0,
4358  {
4359  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4360  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4361  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4362  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4363  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4364  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4365  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4366  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4367  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4368  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4369  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4370  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4371  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4372  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4373  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4374  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4375  }
4376  };
4377 
4378  xmemcpy(map, &clean_info, sizeof(OptMapInfo));
4379 }
4380 
4381 static void
4383 {
4384  *to = *from;
4385 }
4386 
4387 static void
4389 {
4390  if (map->map[c] == 0) {
4391  map->map[c] = 1;
4392  map->value += map_position_value(enc, c);
4393  }
4394 }
4395 
4396 static int
4398  OnigEncoding enc, OnigCaseFoldType case_fold_flag)
4399 {
4402  int i, n;
4403 
4404  add_char_opt_map_info(map, p[0], enc);
4405 
4406  case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
4407  n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
4408  if (n < 0) return n;
4409 
4410  for (i = 0; i < n; i++) {
4411  ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
4412  add_char_opt_map_info(map, buf[0], enc);
4413  }
4414 
4415  return 0;
4416 }
4417 
4418 static void
4420 {
4421  const int z = 1<<15; /* 32768: something big value */
4422 
4423  int v1, v2;
4424 
4425  if (alt->value == 0) return ;
4426  if (now->value == 0) {
4427  copy_opt_map_info(now, alt);
4428  return ;
4429  }
4430 
4431  v1 = z / now->value;
4432  v2 = z / alt->value;
4433  if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4434  copy_opt_map_info(now, alt);
4435 }
4436 
4437 static int
4439 {
4440 #define COMP_EM_BASE 20
4441  int ve, vm;
4442 
4443  if (m->value <= 0) return -1;
4444 
4445  ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2);
4446  vm = COMP_EM_BASE * 5 * 2 / m->value;
4447  return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
4448 }
4449 
4450 static void
4452 {
4453  int i, val;
4454 
4455  /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
4456  if (to->value == 0) return ;
4457  if (add->value == 0 || to->mmd.max < add->mmd.min) {
4458  clear_opt_map_info(to);
4459  return ;
4460  }
4461 
4462  alt_merge_mml(&to->mmd, &add->mmd);
4463 
4464  val = 0;
4465  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
4466  if (add->map[i])
4467  to->map[i] = 1;
4468 
4469  if (to->map[i])
4470  val += map_position_value(enc, i);
4471  }
4472  to->value = val;
4473 
4474  alt_merge_opt_anc_info(&to->anc, &add->anc);
4475 }
4476 
4477 static void
4479 {
4480  copy_mml(&(opt->exb.mmd), mmd);
4481  copy_mml(&(opt->expr.mmd), mmd);
4482  copy_mml(&(opt->map.mmd), mmd);
4483 }
4484 
4485 static void
4487 {
4488  clear_mml(&opt->len);
4489  clear_opt_anc_info(&opt->anc);
4490  clear_opt_exact_info(&opt->exb);
4491  clear_opt_exact_info(&opt->exm);
4492  clear_opt_exact_info(&opt->expr);
4493  clear_opt_map_info(&opt->map);
4494 }
4495 
4496 static void
4498 {
4499  *to = *from;
4500 }
4501 
4502 static void
4504 {
4505  int exb_reach, exm_reach;
4506  OptAncInfo tanc;
4507 
4508  concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
4509  copy_opt_anc_info(&to->anc, &tanc);
4510 
4511  if (add->exb.len > 0 && to->len.max == 0) {
4512  concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
4513  to->len.max, add->len.max);
4514  copy_opt_anc_info(&add->exb.anc, &tanc);
4515  }
4516 
4517  if (add->map.value > 0 && to->len.max == 0) {
4518  if (add->map.mmd.max == 0)
4519  add->map.anc.left_anchor |= to->anc.left_anchor;
4520  }
4521 
4522  exb_reach = to->exb.reach_end;
4523  exm_reach = to->exm.reach_end;
4524 
4525  if (add->len.max != 0)
4526  to->exb.reach_end = to->exm.reach_end = 0;
4527 
4528  if (add->exb.len > 0) {
4529  if (exb_reach) {
4530  concat_opt_exact_info(&to->exb, &add->exb, enc);
4531  clear_opt_exact_info(&add->exb);
4532  }
4533  else if (exm_reach) {
4534  concat_opt_exact_info(&to->exm, &add->exb, enc);
4535  clear_opt_exact_info(&add->exb);
4536  }
4537  }
4538  select_opt_exact_info(enc, &to->exm, &add->exb);
4539  select_opt_exact_info(enc, &to->exm, &add->exm);
4540 
4541  if (to->expr.len > 0) {
4542  if (add->len.max > 0) {
4543  if (to->expr.len > (int )add->len.max)
4544  to->expr.len = (int)add->len.max;
4545 
4546  if (to->expr.mmd.max == 0)
4547  select_opt_exact_info(enc, &to->exb, &to->expr);
4548  else
4549  select_opt_exact_info(enc, &to->exm, &to->expr);
4550  }
4551  }
4552  else if (add->expr.len > 0) {
4553  copy_opt_exact_info(&to->expr, &add->expr);
4554  }
4555 
4556  select_opt_map_info(&to->map, &add->map);
4557 
4558  add_mml(&to->len, &add->len);
4559 }
4560 
4561 static void
4563 {
4564  alt_merge_opt_anc_info (&to->anc, &add->anc);
4565  alt_merge_opt_exact_info(&to->exb, &add->exb, env);
4566  alt_merge_opt_exact_info(&to->exm, &add->exm, env);
4567  alt_merge_opt_exact_info(&to->expr, &add->expr, env);
4568  alt_merge_opt_map_info(env->enc, &to->map, &add->map);
4569 
4570  alt_merge_mml(&to->len, &add->len);
4571 }
4572 
4573 
4574 #define MAX_NODE_OPT_INFO_REF_COUNT 5
4575 
4576 static int
4578 {
4579  int type;
4580  int r = 0;
4581 
4582  clear_node_opt_info(opt);
4583  set_bound_node_opt_info(opt, &env->mmd);
4584 
4585  type = NTYPE(node);
4586  switch (type) {
4587  case NT_LIST:
4588  {
4589  OptEnv nenv;
4590  NodeOptInfo nopt;
4591  Node* nd = node;
4592 
4593  copy_opt_env(&nenv, env);
4594  do {
4595  r = optimize_node_left(NCAR(nd), &nopt, &nenv);
4596  if (r == 0) {
4597  add_mml(&nenv.mmd, &nopt.len);
4598  concat_left_node_opt_info(env->enc, opt, &nopt);
4599  }
4600  } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
4601  }
4602  break;
4603 
4604  case NT_ALT:
4605  {
4606  NodeOptInfo nopt;
4607  Node* nd = node;
4608 
4609  do {
4610  r = optimize_node_left(NCAR(nd), &nopt, env);
4611  if (r == 0) {
4612  if (nd == node) copy_node_opt_info(opt, &nopt);
4613  else alt_merge_node_opt_info(opt, &nopt, env);
4614  }
4615  } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
4616  }
4617  break;
4618 
4619  case NT_STR:
4620  {
4621  StrNode* sn = NSTR(node);
4622  OnigDistance slen = sn->end - sn->s;
4623  int is_raw = NSTRING_IS_RAW(node);
4624 
4625  if (! NSTRING_IS_AMBIG(node)) {
4626  concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4627  NSTRING_IS_RAW(node), env->enc);
4628  if (slen > 0) {
4629  add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
4630  }
4631  set_mml(&opt->len, slen, slen);
4632  }
4633  else {
4634  OnigDistance max;
4635 
4636  if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
4637  int n = onigenc_strlen(env->enc, sn->s, sn->end);
4638  max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
4639  }
4640  else {
4641  concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4642  is_raw, env->enc);
4643  opt->exb.ignore_case = 1;
4644 
4645  if (slen > 0) {
4646  r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
4647  env->enc, env->case_fold_flag);
4648  if (r != 0) break;
4649  }
4650 
4651  max = slen;
4652  }
4653 
4654  set_mml(&opt->len, slen, max);
4655  }
4656 
4657  if ((OnigDistance)opt->exb.len == slen)
4658  opt->exb.reach_end = 1;
4659  }
4660  break;
4661 
4662  case NT_CCLASS:
4663  {
4664  int i, z;
4665  CClassNode* cc = NCCLASS(node);
4666 
4667  /* no need to check ignore case. (setted in setup_tree()) */
4668 
4669  if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
4670  OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
4672 
4673  set_mml(&opt->len, min, max);
4674  }
4675  else {
4676  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
4677  z = BITSET_AT(cc->bs, i);
4678  if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
4679  add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
4680  }
4681  }
4682  set_mml(&opt->len, 1, 1);
4683  }
4684  }
4685  break;
4686 
4687  case NT_CTYPE:
4688  {
4689  int i, min, max;
4690 
4691  max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
4692 
4693  if (max == 1) {
4694  min = 1;
4695 
4696  switch (NCTYPE(node)->ctype) {
4697  case ONIGENC_CTYPE_WORD:
4698  if (NCTYPE(node)->not != 0) {
4699  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
4700  if (! ONIGENC_IS_CODE_WORD(env->enc, i)) {
4701  add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
4702  }
4703  }
4704  }
4705  else {
4706  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
4707  if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
4708  add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
4709  }
4710  }
4711  }
4712  break;
4713  }
4714  }
4715  else {
4716  min = ONIGENC_MBC_MINLEN(env->enc);
4717  }
4718  set_mml(&opt->len, min, max);
4719  }
4720  break;
4721 
4722  case NT_CANY:
4723  {
4724  OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
4726  set_mml(&opt->len, min, max);
4727  }
4728  break;
4729 
4730  case NT_ANCHOR:
4731  switch (NANCHOR(node)->type) {
4732  case ANCHOR_BEGIN_BUF:
4733  case ANCHOR_BEGIN_POSITION:
4734  case ANCHOR_BEGIN_LINE:
4735  case ANCHOR_END_BUF:
4736  case ANCHOR_SEMI_END_BUF:
4737  case ANCHOR_END_LINE:
4738  add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
4739  break;
4740 
4741  case ANCHOR_PREC_READ:
4742  {
4743  NodeOptInfo nopt;
4744 
4745  r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
4746  if (r == 0) {
4747  if (nopt.exb.len > 0)
4748  copy_opt_exact_info(&opt->expr, &nopt.exb);
4749  else if (nopt.exm.len > 0)
4750  copy_opt_exact_info(&opt->expr, &nopt.exm);
4751 
4752  opt->expr.reach_end = 0;
4753 
4754  if (nopt.map.value > 0)
4755  copy_opt_map_info(&opt->map, &nopt.map);
4756  }
4757  }
4758  break;
4759 
4760  case ANCHOR_PREC_READ_NOT:
4761  case ANCHOR_LOOK_BEHIND: /* Sorry, I can't make use of it. */
4763  break;
4764  }
4765  break;
4766 
4767  case NT_BREF:
4768  {
4769  int i;
4770  int* backs;
4771  OnigDistance min, max, tmin, tmax;
4772  Node** nodes = SCANENV_MEM_NODES(env->scan_env);
4773  BRefNode* br = NBREF(node);
4774 
4775  if (br->state & NST_RECURSION) {
4776  set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
4777  break;
4778  }
4779  backs = BACKREFS_P(br);
4780  r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
4781  if (r != 0) break;
4782  r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
4783  if (r != 0) break;
4784  for (i = 1; i < br->back_num; i++) {
4785  r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
4786  if (r != 0) break;
4787  r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
4788  if (r != 0) break;
4789  if (min > tmin) min = tmin;
4790  if (max < tmax) max = tmax;
4791  }
4792  if (r == 0) set_mml(&opt->len, min, max);
4793  }
4794  break;
4795 
4796 #ifdef USE_SUBEXP_CALL
4797  case NT_CALL:
4798  if (IS_CALL_RECURSION(NCALL(node)))
4799  set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
4800  else {
4801  OnigOptionType save = env->options;
4802  env->options = NENCLOSE(NCALL(node)->target)->option;
4803  r = optimize_node_left(NCALL(node)->target, opt, env);
4804  env->options = save;
4805  }
4806  break;
4807 #endif
4808 
4809  case NT_QTFR:
4810  {
4811  int i;
4812  OnigDistance min, max;
4813  NodeOptInfo nopt;
4814  QtfrNode* qn = NQTFR(node);
4815 
4816  r = optimize_node_left(qn->target, &nopt, env);
4817  if (r) break;
4818 
4819  if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
4820  if (env->mmd.max == 0 &&
4821  NTYPE(qn->target) == NT_CANY && qn->greedy) {
4822  if (IS_MULTILINE(env->options))
4824  else
4826  }
4827  }
4828  else {
4829  if (qn->lower > 0) {
4830  copy_node_opt_info(opt, &nopt);
4831  if (nopt.exb.len > 0) {
4832  if (nopt.exb.reach_end) {
4833  for (i = 2; i <= qn->lower &&
4834  ! is_full_opt_exact_info(&opt->exb); i++) {
4835  concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
4836  }
4837  if (i < qn->lower) {
4838  opt->exb.reach_end = 0;
4839  }
4840  }
4841  }
4842 
4843  if (qn->lower != qn->upper) {
4844  opt->exb.reach_end = 0;
4845  opt->exm.reach_end = 0;
4846  }
4847  if (qn->lower > 1)
4848  opt->exm.reach_end = 0;
4849  }
4850  }
4851 
4852  min = distance_multiply(nopt.len.min, qn->lower);
4853  if (IS_REPEAT_INFINITE(qn->upper))
4854  max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
4855  else
4856  max = distance_multiply(nopt.len.max, qn->upper);
4857 
4858  set_mml(&opt->len, min, max);
4859  }
4860  break;
4861 
4862  case NT_ENCLOSE:
4863  {
4864  EncloseNode* en = NENCLOSE(node);
4865 
4866  switch (en->type) {
4867  case ENCLOSE_OPTION:
4868  {
4869  OnigOptionType save = env->options;
4870 
4871  env->options = en->option;
4872  r = optimize_node_left(en->target, opt, env);
4873  env->options = save;
4874  }
4875  break;
4876 
4877  case ENCLOSE_MEMORY:
4878 #ifdef USE_SUBEXP_CALL
4879  en->opt_count++;
4881  OnigDistance min, max;
4882 
4883  min = 0;
4884  max = ONIG_INFINITE_DISTANCE;
4885  if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
4886  if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
4887  set_mml(&opt->len, min, max);
4888  }
4889  else
4890 #endif
4891  {
4892  r = optimize_node_left(en->target, opt, env);
4893 
4895  if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
4897  }
4898  }
4899  break;
4900 
4902  r = optimize_node_left(en->target, opt, env);
4903  break;
4904  }
4905  }
4906  break;
4907 
4908  default:
4909 #ifdef ONIG_DEBUG
4910  if (!onig_is_prelude()) fprintf(stderr, "optimize_node_left: undefined node type %d\n",
4911  NTYPE(node));
4912 #endif
4913  r = ONIGERR_TYPE_BUG;
4914  break;
4915  }
4916 
4917  return r;
4918 }
4919 
4920 static int
4922 {
4923  int r;
4924 
4925  if (e->len == 0) return 0;
4926 
4927  if (e->ignore_case) {
4928  reg->exact = (UChar* )xmalloc(e->len);
4930  xmemcpy(reg->exact, e->s, e->len);
4931  reg->exact_end = reg->exact + e->len;
4933  }
4934  else {
4935  int allow_reverse;
4936 
4937  reg->exact = str_dup(e->s, e->s + e->len);
4939  reg->exact_end = reg->exact + e->len;
4940 
4941  allow_reverse =
4943 
4944  if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
4945  r = set_bm_skip(reg->exact, reg->exact_end, reg->enc,
4946  reg->map, &(reg->int_map));
4947  if (r) return r;
4948 
4949  reg->optimize = (allow_reverse != 0
4951  }
4952  else {
4954  }
4955  }
4956 
4957  reg->dmin = e->mmd.min;
4958  reg->dmax = e->mmd.max;
4959 
4960  if (reg->dmin != ONIG_INFINITE_DISTANCE) {
4961  reg->threshold_len = (int)(reg->dmin + (reg->exact_end - reg->exact));
4962  }
4963 
4964  return 0;
4965 }
4966 
4967 static void
4969 {
4970  int i;
4971 
4972  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
4973  reg->map[i] = m->map[i];
4974 
4975  reg->optimize = ONIG_OPTIMIZE_MAP;
4976  reg->dmin = m->mmd.min;
4977  reg->dmax = m->mmd.max;
4978 
4979  if (reg->dmin != ONIG_INFINITE_DISTANCE) {
4980  reg->threshold_len = (int)(reg->dmin + 1);
4981  }
4982 }
4983 
4984 static void
4986 {
4987  reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
4988  reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
4989 }
4990 
4991 #ifdef ONIG_DEBUG
4992 static void print_optimize_info(FILE* f, regex_t* reg);
4993 #endif
4994 
4995 static int
4997 {
4998 
4999  int r;
5000  NodeOptInfo opt;
5001  OptEnv env;
5002 
5003  env.enc = reg->enc;
5004  env.options = reg->options;
5005  env.case_fold_flag = reg->case_fold_flag;
5006  env.scan_env = scan_env;
5007  clear_mml(&env.mmd);
5008 
5009  r = optimize_node_left(node, &opt, &env);
5010  if (r) return r;
5011 
5012  reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
5014 
5016 
5017  if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
5018  reg->anchor_dmin = opt.len.min;
5019  reg->anchor_dmax = opt.len.max;
5020  }
5021 
5022  if (opt.exb.len > 0 || opt.exm.len > 0) {
5023  select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
5024  if (opt.map.value > 0 &&
5025  comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
5026  goto set_map;
5027  }
5028  else {
5029  r = set_optimize_exact_info(reg, &opt.exb);
5030  set_sub_anchor(reg, &opt.exb.anc);
5031  }
5032  }
5033  else if (opt.map.value > 0) {
5034  set_map:
5035  set_optimize_map_info(reg, &opt.map);
5036  set_sub_anchor(reg, &opt.map.anc);
5037  }
5038  else {
5040  if (opt.len.max == 0)
5042  }
5043 
5044 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5045  if (!onig_is_prelude()) print_optimize_info(stderr, reg);
5046 #endif
5047  return r;
5048 }
5049 
5050 static void
5052 {
5054  reg->anchor = 0;
5055  reg->anchor_dmin = 0;
5056  reg->anchor_dmax = 0;
5057  reg->sub_anchor = 0;
5058  reg->exact_end = (UChar* )NULL;
5059  reg->threshold_len = 0;
5060  if (IS_NOT_NULL(reg->exact)) {
5061  xfree(reg->exact);
5062  reg->exact = (UChar* )NULL;
5063  }
5064 }
5065 
5066 #ifdef ONIG_DEBUG
5067 
5068 static void print_enc_string(FILE* fp, OnigEncoding enc,
5069  const UChar *s, const UChar *end)
5070 {
5071  fprintf(fp, "\nPATTERN: /");
5072 
5073  if (ONIGENC_MBC_MINLEN(enc) > 1) {
5074  const UChar *p;
5075  OnigCodePoint code;
5076 
5077  p = s;
5078  while (p < end) {
5079  code = ONIGENC_MBC_TO_CODE(enc, p, end);
5080  if (code >= 0x80) {
5081  fprintf(fp, " 0x%04x ", (int )code);
5082  }
5083  else {
5084  fputc((int )code, fp);
5085  }
5086 
5087  p += enclen(enc, p, end);
5088  }
5089  }
5090  else {
5091  while (s < end) {
5092  fputc((int )*s, fp);
5093  s++;
5094  }
5095  }
5096 
5097  fprintf(fp, "/ (%s)\n", enc->name);
5098 }
5099 
5100 static void
5101 print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
5102 {
5103  if (a == ONIG_INFINITE_DISTANCE)
5104  fputs("inf", f);
5105  else
5106  fprintf(f, "(%"PRIuSIZE")", a);
5107 
5108  fputs("-", f);
5109 
5110  if (b == ONIG_INFINITE_DISTANCE)
5111  fputs("inf", f);
5112  else
5113  fprintf(f, "(%"PRIuSIZE")", b);
5114 }
5115 
5116 static void
5117 print_anchor(FILE* f, int anchor)
5118 {
5119  int q = 0;
5120 
5121  fprintf(f, "[");
5122 
5123  if (anchor & ANCHOR_BEGIN_BUF) {
5124  fprintf(f, "begin-buf");
5125  q = 1;
5126  }
5127  if (anchor & ANCHOR_BEGIN_LINE) {
5128  if (q) fprintf(f, ", ");
5129  q = 1;
5130  fprintf(f, "begin-line");
5131  }
5132  if (anchor & ANCHOR_BEGIN_POSITION) {
5133  if (q) fprintf(f, ", ");
5134  q = 1;
5135  fprintf(f, "begin-pos");
5136  }
5137  if (anchor & ANCHOR_END_BUF) {
5138  if (q) fprintf(f, ", ");
5139  q = 1;
5140  fprintf(f, "end-buf");
5141  }
5142  if (anchor & ANCHOR_SEMI_END_BUF) {
5143  if (q) fprintf(f, ", ");
5144  q = 1;
5145  fprintf(f, "semi-end-buf");
5146  }
5147  if (anchor & ANCHOR_END_LINE) {
5148  if (q) fprintf(f, ", ");
5149  q = 1;
5150  fprintf(f, "end-line");
5151  }
5152  if (anchor & ANCHOR_ANYCHAR_STAR) {
5153  if (q) fprintf(f, ", ");
5154  q = 1;
5155  fprintf(f, "anychar-star");
5156  }
5157  if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
5158  if (q) fprintf(f, ", ");
5159  fprintf(f, "anychar-star-pl");
5160  }
5161 
5162  fprintf(f, "]");
5163 }
5164 
5165 static void
5166 print_optimize_info(FILE* f, regex_t* reg)
5167 {
5168  static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5169  "EXACT_IC", "MAP" };
5170 
5171  fprintf(f, "optimize: %s\n", on[reg->optimize]);
5172  fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
5173  if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5174  print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5175  fprintf(f, "\n");
5176 
5177  if (reg->optimize) {
5178  fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
5179  fprintf(f, "\n");
5180  }
5181  fprintf(f, "\n");
5182 
5183  if (reg->exact) {
5184  UChar *p;
5185  fprintf(f, "exact: [");
5186  for (p = reg->exact; p < reg->exact_end; p++) {
5187  fputc(*p, f);
5188  }
5189  fprintf(f, "]: length: %ld\n", (reg->exact_end - reg->exact));
5190  }
5191  else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
5192  int c, i, n = 0;
5193 
5194  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5195  if (reg->map[i]) n++;
5196 
5197  fprintf(f, "map: n=%d\n", n);
5198  if (n > 0) {
5199  c = 0;
5200  fputc('[', f);
5201  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5202  if (reg->map[i] != 0) {
5203  if (c > 0) fputs(", ", f);
5204  c++;
5205  if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5207  fputc(i, f);
5208  else
5209  fprintf(f, "%d", i);
5210  }
5211  }
5212  fprintf(f, "]\n");
5213  }
5214  }
5215 }
5216 #endif /* ONIG_DEBUG */
5217 
5218 
5219 extern void
5221 {
5222  if (IS_NOT_NULL(reg)) {
5223  if (IS_NOT_NULL(reg->p)) xfree(reg->p);
5224  if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
5225  if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
5227  if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
5228  if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
5229 
5230 #ifdef USE_NAMED_GROUP
5231  onig_names_free(reg);
5232 #endif
5233  }
5234 }
5235 
5236 extern void
5238 {
5239  if (IS_NOT_NULL(reg)) {
5240  onig_free_body(reg);
5241  xfree(reg);
5242  }
5243 }
5244 
5245 size_t
5247 {
5248  size_t size = sizeof(regex_t);
5249  if (IS_NOT_NULL(reg->p)) size += reg->alloc;
5250  if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
5251  if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5252  if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5253  if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
5254  if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
5255 
5256  return size;
5257 }
5258 
5259 #define REGEX_TRANSFER(to,from) do {\
5260  (to)->state = ONIG_STATE_MODIFY;\
5261  onig_free_body(to);\
5262  xmemcpy(to, from, sizeof(regex_t));\
5263  xfree(from);\
5264 } while (0)
5265 
5266 extern void
5268 {
5270  REGEX_TRANSFER(to, from);
5272 }
5273 
5274 #define REGEX_CHAIN_HEAD(reg) do {\
5275  while (IS_NOT_NULL((reg)->chain)) {\
5276  (reg) = (reg)->chain;\
5277  }\
5278 } while (0)
5279 
5280 extern void
5282 {
5284  REGEX_CHAIN_HEAD(to);
5285  to->chain = add;
5287 }
5288 
5289 extern void
5291 {
5292  regex_t *head, *prev;
5293 
5294  prev = reg;
5295  head = prev->chain;
5296  if (IS_NOT_NULL(head)) {
5297  reg->state = ONIG_STATE_MODIFY;
5298  while (IS_NOT_NULL(head->chain)) {
5299  prev = head;
5300  head = head->chain;
5301  }
5302  prev->chain = (regex_t* )NULL;
5303  REGEX_TRANSFER(reg, head);
5304  }
5305 }
5306 
5307 #ifdef ONIG_DEBUG
5308 static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
5309 #endif
5310 #ifdef ONIG_DEBUG_PARSE_TREE
5311 static void print_tree P_((FILE* f, Node* node));
5312 #endif
5313 
5314 extern int
5315 onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5316  OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
5317 {
5318 #define COMPILE_INIT_SIZE 20
5319 
5320  int r;
5321  OnigDistance init_size;
5322  Node* root;
5323  ScanEnv scan_env = {0};
5324 #ifdef USE_SUBEXP_CALL
5325  UnsetAddrList uslist;
5326 #endif
5327 
5328  if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
5329 
5330  scan_env.sourcefile = sourcefile;
5331  scan_env.sourceline = sourceline;
5332  reg->state = ONIG_STATE_COMPILING;
5333 
5334 #ifdef ONIG_DEBUG
5335  if (!onig_is_prelude()) print_enc_string(stderr, reg->enc, pattern, pattern_end);
5336 #endif
5337 
5338  if (reg->alloc == 0) {
5339  init_size = (pattern_end - pattern) * 2;
5340  if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5341  r = BBUF_INIT(reg, init_size);
5342  if (r != 0) goto end;
5343  }
5344  else
5345  reg->used = 0;
5346 
5347  reg->num_mem = 0;
5348  reg->num_repeat = 0;
5349  reg->num_null_check = 0;
5350  reg->repeat_range_alloc = 0;
5351  reg->repeat_range = (OnigRepeatRange* )NULL;
5352 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5353  reg->num_comb_exp_check = 0;
5354 #endif
5355 
5356  r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
5357  if (r != 0) goto err;
5358 
5359 #ifdef ONIG_DEBUG_PARSE_TREE
5360 # if 0
5361  fprintf(stderr, "ORIGINAL PARSE TREE:\n");
5362  if (!onig_is_prelude()) {
5363  print_tree(stderr, root);
5364  }
5365 # endif
5366 #endif
5367 
5368 #ifdef USE_NAMED_GROUP
5369  /* mixed use named group and no-named group */
5370  if (scan_env.num_named > 0 &&
5373  if (scan_env.num_named != scan_env.num_mem)
5374  r = disable_noname_group_capture(&root, reg, &scan_env);
5375  else
5376  r = numbered_ref_check(root);
5377 
5378  if (r != 0) goto err;
5379  }
5380 #endif
5381 
5382 #ifdef USE_SUBEXP_CALL
5383  if (scan_env.num_call > 0) {
5384  r = unset_addr_list_init(&uslist, scan_env.num_call);
5385  if (r != 0) goto err;
5386  scan_env.unset_addr_list = &uslist;
5387  r = setup_subexp_call(root, &scan_env);
5388  if (r != 0) goto err_unset;
5389  r = subexp_recursive_check_trav(root, &scan_env);
5390  if (r < 0) goto err_unset;
5391  r = subexp_inf_recursive_check_trav(root, &scan_env);
5392  if (r != 0) goto err_unset;
5393 
5394  reg->num_call = scan_env.num_call;
5395  }
5396  else
5397  reg->num_call = 0;
5398 #endif
5399 
5400  r = setup_tree(root, reg, 0, &scan_env);
5401  if (r != 0) goto err_unset;
5402 
5403 #ifdef ONIG_DEBUG_PARSE_TREE
5404  if (!onig_is_prelude()) print_tree(stderr, root);
5405 #endif
5406 
5407  reg->capture_history = scan_env.capture_history;
5408  reg->bt_mem_start = scan_env.bt_mem_start;
5409  reg->bt_mem_start |= reg->capture_history;
5410  if (IS_FIND_CONDITION(reg->options))
5412  else {
5413  reg->bt_mem_end = scan_env.bt_mem_end;
5414  reg->bt_mem_end |= reg->capture_history;
5415  }
5416 
5417 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5418  if (scan_env.backrefed_mem == 0
5419 #ifdef USE_SUBEXP_CALL
5420  || scan_env.num_call == 0
5421 #endif
5422  ) {
5423  setup_comb_exp_check(root, 0, &scan_env);
5424 #ifdef USE_SUBEXP_CALL
5425  if (scan_env.has_recursion != 0) {
5426  scan_env.num_comb_exp_check = 0;
5427  }
5428  else
5429 #endif
5430  if (scan_env.comb_exp_max_regnum > 0) {
5431  int i;
5432  for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
5433  if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
5434  scan_env.num_comb_exp_check = 0;
5435  break;
5436  }
5437  }
5438  }
5439  }
5440 
5441  reg->num_comb_exp_check = scan_env.num_comb_exp_check;
5442 #endif
5443 
5444  clear_optimize_info(reg);
5445 #ifndef ONIG_DONT_OPTIMIZE
5446  r = set_optimize_info_from_tree(root, reg, &scan_env);
5447  if (r != 0) goto err_unset;
5448 #endif
5449 
5450  if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
5451  xfree(scan_env.mem_nodes_dynamic);
5452  scan_env.mem_nodes_dynamic = (Node** )NULL;
5453  }
5454 
5455  r = compile_tree(root, reg);
5456  if (r == 0) {
5457  r = add_opcode(reg, OP_END);
5458 #ifdef USE_SUBEXP_CALL
5459  if (scan_env.num_call > 0) {
5460  r = unset_addr_list_fix(&uslist, reg);
5461  unset_addr_list_end(&uslist);
5462  if (r) goto err;
5463  }
5464 #endif
5465 
5466  if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
5468  else {
5469  if (reg->bt_mem_start != 0)
5471  else
5473  }
5474  }
5475 #ifdef USE_SUBEXP_CALL
5476  else if (scan_env.num_call > 0) {
5477  unset_addr_list_end(&uslist);
5478  }
5479 #endif
5480  onig_node_free(root);
5481 
5482 #ifdef ONIG_DEBUG_COMPILE
5483 #ifdef USE_NAMED_GROUP
5484  if (!onig_is_prelude()) onig_print_names(stderr, reg);
5485 #endif
5486  if (!onig_is_prelude()) print_compiled_byte_code_list(stderr, reg);
5487 #endif
5488 
5489  end:
5490  reg->state = ONIG_STATE_NORMAL;
5491  return r;
5492 
5493  err_unset:
5494 #ifdef USE_SUBEXP_CALL
5495  if (scan_env.num_call > 0) {
5496  unset_addr_list_end(&uslist);
5497  }
5498 #endif
5499  err:
5500  if (IS_NOT_NULL(scan_env.error)) {
5501  if (IS_NOT_NULL(einfo)) {
5502  einfo->enc = scan_env.enc;
5503  einfo->par = scan_env.error;
5504  einfo->par_end = scan_env.error_end;
5505  }
5506  }
5507 
5508  onig_node_free(root);
5509  if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
5510  xfree(scan_env.mem_nodes_dynamic);
5511  return r;
5512 }
5513 
5514 #ifdef USE_RECOMPILE_API
5515 extern int
5516 onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5517  OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
5518  OnigErrorInfo* einfo)
5519 {
5520  int r;
5521  regex_t *new_reg;
5522 
5523  r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo);
5524  if (r) return r;
5525  if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
5526  onig_transfer(reg, new_reg);
5527  }
5528  else {
5529  onig_chain_link_add(reg, new_reg);
5530  }
5531  return 0;
5532 }
5533 #endif
5534 
5535 static int onig_inited = 0;
5536 
5537 extern int
5539  OnigCaseFoldType case_fold_flag,
5540  OnigEncoding enc, const OnigSyntaxType* syntax)
5541 {
5542  if (! onig_inited)
5543  onig_init();
5544 
5545  if (IS_NULL(reg))
5546  return ONIGERR_INVALID_ARGUMENT;
5547 
5548  if (ONIGENC_IS_UNDEF(enc))
5550 
5554  }
5555 
5556  (reg)->state = ONIG_STATE_MODIFY;
5557 
5558  if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
5559  option |= syntax->options;
5560  option &= ~ONIG_OPTION_SINGLELINE;
5561  }
5562  else
5563  option |= syntax->options;
5564 
5565  (reg)->enc = enc;
5566  (reg)->options = option;
5567  (reg)->syntax = syntax;
5568  (reg)->optimize = 0;
5569  (reg)->exact = (UChar* )NULL;
5570  (reg)->int_map = (int* )NULL;
5571  (reg)->int_map_backward = (int* )NULL;
5572  (reg)->chain = (regex_t* )NULL;
5573 
5574  (reg)->p = (UChar* )NULL;
5575  (reg)->alloc = 0;
5576  (reg)->used = 0;
5577  (reg)->name_table = (void* )NULL;
5578 
5579  (reg)->case_fold_flag = case_fold_flag;
5580  return 0;
5581 }
5582 
5583 extern int
5584 onig_new_without_alloc(regex_t* reg, const UChar* pattern,
5585  const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
5586  OnigSyntaxType* syntax, OnigErrorInfo* einfo)
5587 {
5588  int r;
5589 
5590  r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5591  if (r) return r;
5592 
5593  r = onig_compile(reg, pattern, pattern_end, einfo, NULL, 0);
5594  return r;
5595 }
5596 
5597 extern int
5598 onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
5599  OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
5600  OnigErrorInfo* einfo)
5601 {
5602  int r;
5603 
5604  *reg = (regex_t* )xmalloc(sizeof(regex_t));
5605  if (IS_NULL(*reg)) return ONIGERR_MEMORY;
5606 
5607  r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5608  if (r) goto err;
5609 
5610  r = onig_compile(*reg, pattern, pattern_end, einfo, NULL, 0);
5611  if (r) {
5612  err:
5613  onig_free(*reg);
5614  *reg = NULL;
5615  }
5616  return r;
5617 }
5618 
5619 
5620 extern int
5622 {
5623  if (onig_inited != 0)
5624  return 0;
5625 
5628 
5629  onig_inited = 1;
5630 
5631  onigenc_init();
5632  /* onigenc_set_default_caseconv_table((UChar* )0); */
5633 
5634 #ifdef ONIG_DEBUG_STATISTICS
5635  onig_statistics_init();
5636 #endif
5637 
5639  return 0;
5640 }
5641 
5642 
5643 extern int
5645 {
5647 
5648 #ifdef ONIG_DEBUG_STATISTICS
5649  if (!onig_is_prelude()) onig_print_statistics(stderr);
5650 #endif
5651 
5652 #ifdef USE_SHARED_CCLASS_TABLE
5654 #endif
5655 
5656 #ifdef USE_PARSE_TREE_NODE_RECYCLE
5658 #endif
5659 
5660  onig_inited = 0;
5661 
5664  return 0;
5665 }
5666 
5667 extern int
5669 {
5670  OnigCodePoint n, *data;
5671  OnigCodePoint low, high, x;
5672 
5673  GET_CODE_POINT(n, p);
5674  data = (OnigCodePoint* )p;
5675  data++;
5676 
5677  for (low = 0, high = n; low < high; ) {
5678  x = (low + high) >> 1;
5679  if (code > data[x * 2 + 1])
5680  low = x + 1;
5681  else
5682  high = x;
5683  }
5684 
5685  return ((low < n && code >= data[low * 2]) ? 1 : 0);
5686 }
5687 
5688 extern int
5690 {
5691  int found;
5692 
5693  if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
5694  if (IS_NULL(cc->mbuf)) {
5695  found = 0;
5696  }
5697  else {
5698  found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
5699  }
5700  }
5701  else {
5702  found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
5703  }
5704 
5705  if (IS_NCCLASS_NOT(cc))
5706  return !found;
5707  else
5708  return found;
5709 }
5710 
5711 extern int
5713 {
5714  int len;
5715 
5716  if (ONIGENC_MBC_MINLEN(enc) > 1) {
5717  len = 2;
5718  }
5719  else {
5720  len = ONIGENC_CODE_TO_MBCLEN(enc, code);
5721  }
5722  return onig_is_code_in_cc_len(len, code, cc);
5723 }
5724 
5725 
5726 #ifdef ONIG_DEBUG
5727 
5728 /* arguments type */
5729 #define ARG_SPECIAL -1
5730 #define ARG_NON 0
5731 #define ARG_RELADDR 1
5732 #define ARG_ABSADDR 2
5733 #define ARG_LENGTH 3
5734 #define ARG_MEMNUM 4
5735 #define ARG_OPTION 5
5736 #define ARG_STATE_CHECK 6
5737 
5738 OnigOpInfoType OnigOpInfo[] = {
5739  { OP_FINISH, "finish", ARG_NON },
5740  { OP_END, "end", ARG_NON },
5741  { OP_EXACT1, "exact1", ARG_SPECIAL },
5742  { OP_EXACT2, "exact2", ARG_SPECIAL },
5743  { OP_EXACT3, "exact3", ARG_SPECIAL },
5744  { OP_EXACT4, "exact4", ARG_SPECIAL },
5745  { OP_EXACT5, "exact5", ARG_SPECIAL },
5746  { OP_EXACTN, "exactn", ARG_SPECIAL },
5747  { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
5748  { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
5749  { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
5750  { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
5751  { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
5752  { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
5753  { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
5754  { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
5755  { OP_CCLASS, "cclass", ARG_SPECIAL },
5756  { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
5757  { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
5758  { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
5759  { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
5760  { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
5761  { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL },
5762  { OP_ANYCHAR, "anychar", ARG_NON },
5763  { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
5764  { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
5765  { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
5766  { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
5767  { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
5768  { OP_WORD, "word", ARG_NON },
5769  { OP_NOT_WORD, "not-word", ARG_NON },
5770  { OP_WORD_BOUND, "word-bound", ARG_NON },
5771  { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
5772  { OP_WORD_BEGIN, "word-begin", ARG_NON },
5773  { OP_WORD_END, "word-end", ARG_NON },
5774  { OP_BEGIN_BUF, "begin-buf", ARG_NON },
5775  { OP_END_BUF, "end-buf", ARG_NON },
5776  { OP_BEGIN_LINE, "begin-line", ARG_NON },
5777  { OP_END_LINE, "end-line", ARG_NON },
5778  { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
5779  { OP_BEGIN_POSITION, "begin-position", ARG_NON },
5780  { OP_BACKREF1, "backref1", ARG_NON },
5781  { OP_BACKREF2, "backref2", ARG_NON },
5782  { OP_BACKREFN, "backrefn", ARG_MEMNUM },
5783  { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
5784  { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
5785  { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
5786  { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL },
5787  { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
5788  { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
5789  { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
5790  { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
5791  { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
5792  { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
5793  { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
5794  { OP_SET_OPTION, "set-option", ARG_OPTION },
5795  { OP_FAIL, "fail", ARG_NON },
5796  { OP_JUMP, "jump", ARG_RELADDR },
5797  { OP_PUSH, "push", ARG_RELADDR },
5798  { OP_POP, "pop", ARG_NON },
5799  { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
5800  { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
5801  { OP_REPEAT, "repeat", ARG_SPECIAL },
5802  { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
5803  { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
5804  { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
5805  { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
5806  { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
5807  { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
5808  { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
5809  { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
5810  { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
5811  { OP_PUSH_POS, "push-pos", ARG_NON },
5812  { OP_POP_POS, "pop-pos", ARG_NON },
5813  { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
5814  { OP_FAIL_POS, "fail-pos", ARG_NON },
5815  { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
5816  { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
5817  { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
5818  { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
5819  { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
5820  { OP_CALL, "call", ARG_ABSADDR },
5821  { OP_RETURN, "return", ARG_NON },
5822  { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
5823  { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
5824  { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
5825  { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
5827  "state-check-anychar-ml*", ARG_STATE_CHECK },
5828  { -1, "", ARG_NON }
5829 };
5830 
5831 static const char*
5832 op2name(int opcode)
5833 {
5834  int i;
5835 
5836  for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
5837  if (opcode == OnigOpInfo[i].opcode)
5838  return OnigOpInfo[i].name;
5839  }
5840  return "";
5841 }
5842 
5843 static int
5844 op2arg_type(int opcode)
5845 {
5846  int i;
5847 
5848  for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
5849  if (opcode == OnigOpInfo[i].opcode)
5850  return OnigOpInfo[i].arg_type;
5851  }
5852  return ARG_SPECIAL;
5853 }
5854 
5855 static void
5856 Indent(FILE* f, int indent)
5857 {
5858  int i;
5859  for (i = 0; i < indent; i++) putc(' ', f);
5860 }
5861 
5862 static void
5863 p_string(FILE* f, int len, UChar* s)
5864 {
5865  fputs(":", f);
5866  while (len-- > 0) { fputc(*s++, f); }
5867 }
5868 
5869 static void
5870 p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
5871 {
5872  int x = len * mb_len;
5873 
5874  fprintf(f, ":%d:", len);
5875  while (x-- > 0) { fputc(*s++, f); }
5876 }
5877 
5878 extern void
5879 onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
5880  OnigEncoding enc)
5881 {
5882  int i, n, arg_type;
5883  RelAddrType addr;
5884  LengthType len;
5885  MemNumType mem;
5886  StateCheckNumType scn;
5887  OnigCodePoint code;
5888  UChar *q;
5889 
5890  fprintf(f, "[%s", op2name(*bp));
5891  arg_type = op2arg_type(*bp);
5892  if (arg_type != ARG_SPECIAL) {
5893  bp++;
5894  switch (arg_type) {
5895  case ARG_NON:
5896  break;
5897  case ARG_RELADDR:
5898  GET_RELADDR_INC(addr, bp);
5899  fprintf(f, ":(%d)", addr);
5900  break;
5901  case ARG_ABSADDR:
5902  GET_ABSADDR_INC(addr, bp);
5903  fprintf(f, ":(%d)", addr);
5904  break;
5905  case ARG_LENGTH:
5906  GET_LENGTH_INC(len, bp);
5907  fprintf(f, ":%d", len);
5908  break;
5909  case ARG_MEMNUM:
5910  mem = *((MemNumType* )bp);
5911  bp += SIZE_MEMNUM;
5912  fprintf(f, ":%d", mem);
5913  break;
5914  case ARG_OPTION:
5915  {
5916  OnigOptionType option = *((OnigOptionType* )bp);
5917  bp += SIZE_OPTION;
5918  fprintf(f, ":%d", option);
5919  }
5920  break;
5921 
5922  case ARG_STATE_CHECK:
5923  scn = *((StateCheckNumType* )bp);
5924  bp += SIZE_STATE_CHECK_NUM;
5925  fprintf(f, ":%d", scn);
5926  break;
5927  }
5928  }
5929  else {
5930  switch (*bp++) {
5931  case OP_EXACT1:
5934  p_string(f, 1, bp++); break;
5935  case OP_EXACT2:
5936  p_string(f, 2, bp); bp += 2; break;
5937  case OP_EXACT3:
5938  p_string(f, 3, bp); bp += 3; break;
5939  case OP_EXACT4:
5940  p_string(f, 4, bp); bp += 4; break;
5941  case OP_EXACT5:
5942  p_string(f, 5, bp); bp += 5; break;
5943  case OP_EXACTN:
5944  GET_LENGTH_INC(len, bp);
5945  p_len_string(f, len, 1, bp);
5946  bp += len;
5947  break;
5948 
5949  case OP_EXACTMB2N1:
5950  p_string(f, 2, bp); bp += 2; break;
5951  case OP_EXACTMB2N2:
5952  p_string(f, 4, bp); bp += 4; break;
5953  case OP_EXACTMB2N3:
5954  p_string(f, 6, bp); bp += 6; break;
5955  case OP_EXACTMB2N:
5956  GET_LENGTH_INC(len, bp);
5957  p_len_string(f, len, 2, bp);
5958  bp += len * 2;
5959  break;
5960  case OP_EXACTMB3N:
5961  GET_LENGTH_INC(len, bp);
5962  p_len_string(f, len, 3, bp);
5963  bp += len * 3;
5964  break;
5965  case OP_EXACTMBN:
5966  {
5967  int mb_len;
5968 
5969  GET_LENGTH_INC(mb_len, bp);
5970  GET_LENGTH_INC(len, bp);
5971  fprintf(f, ":%d:%d:", mb_len, len);
5972  n = len * mb_len;
5973  while (n-- > 0) { fputc(*bp++, f); }
5974  }
5975  break;
5976 
5977  case OP_EXACT1_IC:
5978  len = enclen(enc, bp, bpend);
5979  p_string(f, len, bp);
5980  bp += len;
5981  break;
5982  case OP_EXACTN_IC:
5983  GET_LENGTH_INC(len, bp);
5984  p_len_string(f, len, 1, bp);
5985  bp += len;
5986  break;
5987 
5988  case OP_CCLASS:
5989  n = bitset_on_num((BitSetRef )bp);
5990  bp += SIZE_BITSET;
5991  fprintf(f, ":%d", n);
5992  break;
5993 
5994  case OP_CCLASS_NOT:
5995  n = bitset_on_num((BitSetRef )bp);
5996  bp += SIZE_BITSET;
5997  fprintf(f, ":%d", n);
5998  break;
5999 
6000  case OP_CCLASS_MB:
6001  case OP_CCLASS_MB_NOT:
6002  GET_LENGTH_INC(len, bp);
6003  q = bp;
6004 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6005  ALIGNMENT_RIGHT(q);
6006 #endif
6007  GET_CODE_POINT(code, q);
6008  bp += len;
6009  fprintf(f, ":%d:%d", (int )code, len);
6010  break;
6011 
6012  case OP_CCLASS_MIX:
6013  case OP_CCLASS_MIX_NOT:
6014  n = bitset_on_num((BitSetRef )bp);
6015  bp += SIZE_BITSET;
6016  GET_LENGTH_INC(len, bp);
6017  q = bp;
6018 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6019  ALIGNMENT_RIGHT(q);
6020 #endif
6021  GET_CODE_POINT(code, q);
6022  bp += len;
6023  fprintf(f, ":%d:%d:%d", n, (int )code, len);
6024  break;
6025 
6026  case OP_CCLASS_NODE:
6027  {
6028  CClassNode *cc;
6029 
6030  GET_POINTER_INC(cc, bp);
6031  n = bitset_on_num(cc->bs);
6032  fprintf(f, ":%"PRIuPTR":%d", (uintptr_t)cc, n);
6033  }
6034  break;
6035 
6036  case OP_BACKREFN_IC:
6037  mem = *((MemNumType* )bp);
6038  bp += SIZE_MEMNUM;
6039  fprintf(f, ":%d", mem);
6040  break;
6041 
6042  case OP_BACKREF_MULTI_IC:
6043  case OP_BACKREF_MULTI:
6044  fputs(" ", f);
6045  GET_LENGTH_INC(len, bp);
6046  for (i = 0; i < len; i++) {
6047  GET_MEMNUM_INC(mem, bp);
6048  if (i > 0) fputs(", ", f);
6049  fprintf(f, "%d", mem);
6050  }
6051  break;
6052 
6053  case OP_BACKREF_WITH_LEVEL:
6054  {
6055  OnigOptionType option;
6056  LengthType level;
6057 
6058  GET_OPTION_INC(option, bp);
6059  fprintf(f, ":%d", option);
6060  GET_LENGTH_INC(level, bp);
6061  fprintf(f, ":%d", level);
6062 
6063  fputs(" ", f);
6064  GET_LENGTH_INC(len, bp);
6065  for (i = 0; i < len; i++) {
6066  GET_MEMNUM_INC(mem, bp);
6067  if (i > 0) fputs(", ", f);
6068  fprintf(f, "%d", mem);
6069  }
6070  }
6071  break;
6072 
6073  case OP_REPEAT:
6074  case OP_REPEAT_NG:
6075  {
6076  mem = *((MemNumType* )bp);
6077  bp += SIZE_MEMNUM;
6078  addr = *((RelAddrType* )bp);
6079  bp += SIZE_RELADDR;
6080  fprintf(f, ":%d:%d", mem, addr);
6081  }
6082  break;
6083 
6085  case OP_PUSH_IF_PEEK_NEXT:
6086  addr = *((RelAddrType* )bp);
6087  bp += SIZE_RELADDR;
6088  fprintf(f, ":(%d)", addr);
6089  p_string(f, 1, bp);
6090  bp += 1;
6091  break;
6092 
6093  case OP_LOOK_BEHIND:
6094  GET_LENGTH_INC(len, bp);
6095  fprintf(f, ":%d", len);
6096  break;
6097 
6099  GET_RELADDR_INC(addr, bp);
6100  GET_LENGTH_INC(len, bp);
6101  fprintf(f, ":%d:(%d)", len, addr);
6102  break;
6103 
6104  case OP_STATE_CHECK_PUSH:
6106  scn = *((StateCheckNumType* )bp);
6107  bp += SIZE_STATE_CHECK_NUM;
6108  addr = *((RelAddrType* )bp);
6109  bp += SIZE_RELADDR;
6110  fprintf(f, ":%d:(%d)", scn, addr);
6111  break;
6112 
6113  default:
6114  fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
6115  *--bp);
6116  }
6117  }
6118  fputs("]", f);
6119  if (nextp) *nextp = bp;
6120 }
6121 
6122 static void
6123 print_compiled_byte_code_list(FILE* f, regex_t* reg)
6124 {
6125  int ncode;
6126  UChar* bp = reg->p;
6127  UChar* end = reg->p + reg->used;
6128 
6129  fprintf(f, "code length: %d\n", reg->used);
6130 
6131  ncode = -1;
6132  while (bp < end) {
6133  ncode++;
6134  if (bp > reg->p) {
6135  if (ncode % 5 == 0)
6136  fprintf(f, "\n%ld:", bp-reg->p);
6137  else
6138  fprintf(f, " %ld:", bp-reg->p);
6139  }
6140  onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
6141  }
6142 
6143  fprintf(f, "\n");
6144 }
6145 
6146 static void
6147 print_indent_tree(FILE* f, Node* node, int indent)
6148 {
6149  int i, type, container_p = 0;
6150  int add = 3;
6151  UChar* p;
6152 
6153  Indent(f, indent);
6154  if (IS_NULL(node)) {
6155  fprintf(f, "ERROR: null node!!!\n");
6156  exit (0);
6157  }
6158 
6159  type = NTYPE(node);
6160  switch (type) {
6161  case NT_LIST:
6162  case NT_ALT:
6163  if (NTYPE(node) == NT_LIST)
6164  fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t)node);
6165  else
6166  fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t)node);
6167 
6168  print_indent_tree(f, NCAR(node), indent + add);
6169  while (IS_NOT_NULL(node = NCDR(node))) {
6170  if (NTYPE(node) != type) {
6171  fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
6172  exit(0);
6173  }
6174  print_indent_tree(f, NCAR(node), indent + add);
6175  }
6176  break;
6177 
6178  case NT_STR:
6179  fprintf(f, "<string%s:%"PRIxPTR">",
6180  (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t)node);
6181  for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
6182  if (*p >= 0x20 && *p < 0x7f)
6183  fputc(*p, f);
6184  else {
6185  fprintf(f, " 0x%02x", *p);
6186  }
6187  }
6188  break;
6189 
6190  case NT_CCLASS:
6191  fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t)node);
6192  if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f);
6193  if (NCCLASS(node)->mbuf) {
6194  BBuf* bbuf = NCCLASS(node)->mbuf;
6195  for (i = 0; i < (int)bbuf->used; i++) {
6196  if (i > 0) fprintf(f, ",");
6197  fprintf(f, "%0x", bbuf->p[i]);
6198  }
6199  }
6200  break;
6201 
6202  case NT_CTYPE:
6203  fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t)node);
6204  switch (NCTYPE(node)->ctype) {
6205  case ONIGENC_CTYPE_WORD:
6206  if (NCTYPE(node)->not != 0)
6207  fputs("not word", f);
6208  else
6209  fputs("word", f);
6210  break;
6211 
6212  default:
6213  fprintf(f, "ERROR: undefined ctype.\n");
6214  exit(0);
6215  }
6216  break;
6217 
6218  case NT_CANY:
6219  fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t)node);
6220  break;
6221 
6222  case NT_ANCHOR:
6223  fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t)node);
6224  switch (NANCHOR(node)->type) {
6225  case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
6226  case ANCHOR_END_BUF: fputs("end buf", f); break;
6227  case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
6228  case ANCHOR_END_LINE: fputs("end line", f); break;
6229  case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
6230  case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
6231 
6232  case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
6233  case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
6234 #ifdef USE_WORD_BEGIN_END
6235  case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
6236  case ANCHOR_WORD_END: fputs("word end", f); break;
6237 #endif
6238  case ANCHOR_PREC_READ: fputs("prec read", f); container_p = TRUE; break;
6239  case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); container_p = TRUE; break;
6240  case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); container_p = TRUE; break;
6241  case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break;
6242 
6243  default:
6244  fprintf(f, "ERROR: undefined anchor type.\n");
6245  break;
6246  }
6247  break;
6248 
6249  case NT_BREF:
6250  {
6251  int* p;
6252  BRefNode* br = NBREF(node);
6253  p = BACKREFS_P(br);
6254  fprintf(f, "<backref:%"PRIxPTR">", (intptr_t)node);
6255  for (i = 0; i < br->back_num; i++) {
6256  if (i > 0) fputs(", ", f);
6257  fprintf(f, "%d", p[i]);
6258  }
6259  }
6260  break;
6261 
6262 #ifdef USE_SUBEXP_CALL
6263  case NT_CALL:
6264  {
6265  CallNode* cn = NCALL(node);
6266  fprintf(f, "<call:%"PRIxPTR">", (intptr_t)node);
6267  p_string(f, cn->name_end - cn->name, cn->name);
6268  }
6269  break;
6270 #endif
6271 
6272  case NT_QTFR:
6273  fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t)node,
6274  NQTFR(node)->lower, NQTFR(node)->upper,
6275  (NQTFR(node)->greedy ? "" : "?"));
6276  print_indent_tree(f, NQTFR(node)->target, indent + add);
6277  break;
6278 
6279  case NT_ENCLOSE:
6280  fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t)node);
6281  switch (NENCLOSE(node)->type) {
6282  case ENCLOSE_OPTION:
6283  fprintf(f, "option:%d\n", NENCLOSE(node)->option);
6284  print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6285  break;
6286  case ENCLOSE_MEMORY:
6287  fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
6288  break;
6290  fprintf(f, "stop-bt");
6291  break;
6292 
6293  default:
6294  break;
6295  }
6296  fprintf(f, "\n");
6297  print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6298  break;
6299 
6300  default:
6301  fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
6302  break;
6303  }
6304 
6305  if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
6306  type != NT_ENCLOSE)
6307  fprintf(f, "\n");
6308 
6309  if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
6310 
6311  fflush(f);
6312 }
6313 #endif /* ONIG_DEBUG */
6314 
6315 #ifdef ONIG_DEBUG_PARSE_TREE
6316 static void
6317 print_tree(FILE* f, Node* node)
6318 {
6319  print_indent_tree(f, node, 0);
6320 }
6321 #endif
6322