Ruby 4.0.5p0 (2026-05-20 revision 64336ffd0ee9e1f4c05891695a3d7b49cb709721)
regcomp.c
1/**********************************************************************
2 regcomp.c - Onigmo (Oniguruma-mod) (regular expression library)
3**********************************************************************/
4/*-
5 * Copyright (c) 2002-2013 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include "regparse.h"
32
33OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
34
35extern OnigCaseFoldType
36onig_get_default_case_fold_flag(void)
37{
38 return OnigDefaultCaseFoldFlag;
39}
40
41extern int
42onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
43{
44 OnigDefaultCaseFoldFlag = case_fold_flag;
45 return 0;
46}
47
48
49#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
50static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
51#endif
52
53#if 0
54static UChar*
55str_dup(UChar* s, UChar* end)
56{
57 ptrdiff_t len = end - s;
58
59 if (len > 0) {
60 UChar* r = (UChar* )xmalloc(len + 1);
61 CHECK_NULL_RETURN(r);
62 xmemcpy(r, s, len);
63 r[len] = (UChar )0;
64 return r;
65 }
66 else return NULL;
67}
68#endif
69
70static void
71swap_node(Node* a, Node* b)
72{
73 Node c;
74 c = *a; *a = *b; *b = c;
75
76 if (NTYPE(a) == NT_STR) {
77 StrNode* sn = NSTR(a);
78 if (sn->capa == 0) {
79 size_t len = sn->end - sn->s;
80 sn->s = sn->buf;
81 sn->end = sn->s + len;
82 }
83 }
84
85 if (NTYPE(b) == NT_STR) {
86 StrNode* sn = NSTR(b);
87 if (sn->capa == 0) {
88 size_t len = sn->end - sn->s;
89 sn->s = sn->buf;
90 sn->end = sn->s + len;
91 }
92 }
93}
94
95static OnigDistance
96distance_add(OnigDistance d1, OnigDistance d2)
97{
98 if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
99 return ONIG_INFINITE_DISTANCE;
100 else {
101 if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
102 else return ONIG_INFINITE_DISTANCE;
103 }
104}
105
106static OnigDistance
107distance_multiply(OnigDistance d, int m)
108{
109 if (m == 0) return 0;
110
111 if (d < ONIG_INFINITE_DISTANCE / m)
112 return d * m;
113 else
114 return ONIG_INFINITE_DISTANCE;
115}
116
117static int
118bitset_is_empty(BitSetRef bs)
119{
120 int i;
121 for (i = 0; i < BITSET_SIZE; i++) {
122 if (bs[i] != 0) return 0;
123 }
124 return 1;
125}
126
127#ifdef ONIG_DEBUG
128static int
129bitset_on_num(BitSetRef bs)
130{
131 int i, n;
132
133 n = 0;
134 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
135 if (BITSET_AT(bs, i)) n++;
136 }
137 return n;
138}
139#endif
140
141// Attempt to right size allocated buffers for a regex post compile
142static void
143onig_reg_resize(regex_t *reg)
144{
145 do {
146 if (!reg->used) {
147 xfree(reg->p);
148 reg->alloc = 0;
149 reg->p = 0;
150 }
151 else if (reg->alloc > reg->used) {
152 unsigned char *new_ptr = xrealloc(reg->p, reg->used);
153 // Skip the right size optimization if memory allocation fails
154 if (new_ptr) {
155 reg->alloc = reg->used;
156 reg->p = new_ptr;
157 }
158 }
159 } while ((reg = reg->chain) != 0);
160}
161
162extern int
163onig_bbuf_init(BBuf* buf, OnigDistance size)
164{
165 if (size <= 0) {
166 size = 0;
167 buf->p = NULL;
168 }
169 else {
170 buf->p = (UChar* )xmalloc(size);
171 if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
172 }
173
174 buf->alloc = (unsigned int )size;
175 buf->used = 0;
176 return 0;
177}
178
179
180#ifdef USE_SUBEXP_CALL
181
182static int
183unset_addr_list_init(UnsetAddrList* uslist, int size)
184{
185 UnsetAddr* p;
186
187 p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
188 CHECK_NULL_RETURN_MEMERR(p);
189 uslist->num = 0;
190 uslist->alloc = size;
191 uslist->us = p;
192 return 0;
193}
194
195static void
196unset_addr_list_end(UnsetAddrList* uslist)
197{
198 xfree(uslist->us);
199}
200
201static int
202unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
203{
204 UnsetAddr* p;
205 int size;
206
207 if (uslist->num >= uslist->alloc) {
208 size = uslist->alloc * 2;
209 p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
210 CHECK_NULL_RETURN_MEMERR(p);
211 uslist->alloc = size;
212 uslist->us = p;
213 }
214
215 uslist->us[uslist->num].offset = offset;
216 uslist->us[uslist->num].target = node;
217 uslist->num++;
218 return 0;
219}
220#endif /* USE_SUBEXP_CALL */
221
222
223static int
224add_opcode(regex_t* reg, int opcode)
225{
226 BBUF_ADD1(reg, opcode);
227 return 0;
228}
229
230#ifdef USE_COMBINATION_EXPLOSION_CHECK
231static int
232add_state_check_num(regex_t* reg, int num)
233{
234 StateCheckNumType n = (StateCheckNumType )num;
235
236 BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
237 return 0;
238}
239#endif
240
241static int
242add_rel_addr(regex_t* reg, int addr)
243{
244 RelAddrType ra = (RelAddrType )addr;
245
246 BBUF_ADD(reg, &ra, SIZE_RELADDR);
247 return 0;
248}
249
250static int
251add_abs_addr(regex_t* reg, int addr)
252{
253 AbsAddrType ra = (AbsAddrType )addr;
254
255 BBUF_ADD(reg, &ra, SIZE_ABSADDR);
256 return 0;
257}
258
259static int
260add_length(regex_t* reg, OnigDistance len)
261{
262 LengthType l = (LengthType )len;
263
264 BBUF_ADD(reg, &l, SIZE_LENGTH);
265 return 0;
266}
267
268static int
269add_mem_num(regex_t* reg, int num)
270{
271 MemNumType n = (MemNumType )num;
272
273 BBUF_ADD(reg, &n, SIZE_MEMNUM);
274 return 0;
275}
276
277#if 0
278static int
279add_pointer(regex_t* reg, void* addr)
280{
281 PointerType ptr = (PointerType )addr;
282
283 BBUF_ADD(reg, &ptr, SIZE_POINTER);
284 return 0;
285}
286#endif
287
288static int
289add_option(regex_t* reg, OnigOptionType option)
290{
291 BBUF_ADD(reg, &option, SIZE_OPTION);
292 return 0;
293}
294
295static int
296add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
297{
298 int r;
299
300 r = add_opcode(reg, opcode);
301 if (r) return r;
302 r = add_rel_addr(reg, addr);
303 return r;
304}
305
306static int
307add_bytes(regex_t* reg, UChar* bytes, OnigDistance len)
308{
309 BBUF_ADD(reg, bytes, len);
310 return 0;
311}
312
313static int
314add_bitset(regex_t* reg, BitSetRef bs)
315{
316 BBUF_ADD(reg, bs, SIZE_BITSET);
317 return 0;
318}
319
320static int
321add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
322{
323 int r;
324
325 r = add_opcode(reg, opcode);
326 if (r) return r;
327 r = add_option(reg, option);
328 return r;
329}
330
331static int compile_length_tree(Node* node, regex_t* reg);
332static int compile_tree(Node* node, regex_t* reg);
333
334
335#define IS_NEED_STR_LEN_OP_EXACT(op) \
336 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
337 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
338
339static int
340select_str_opcode(int mb_len, OnigDistance byte_len, int ignore_case)
341{
342 int op;
343 OnigDistance str_len = roomof(byte_len, mb_len);
344
345 if (ignore_case) {
346 switch (str_len) {
347 case 1: op = OP_EXACT1_IC; break;
348 default: op = OP_EXACTN_IC; break;
349 }
350 }
351 else {
352 switch (mb_len) {
353 case 1:
354 switch (str_len) {
355 case 1: op = OP_EXACT1; break;
356 case 2: op = OP_EXACT2; break;
357 case 3: op = OP_EXACT3; break;
358 case 4: op = OP_EXACT4; break;
359 case 5: op = OP_EXACT5; break;
360 default: op = OP_EXACTN; break;
361 }
362 break;
363
364 case 2:
365 switch (str_len) {
366 case 1: op = OP_EXACTMB2N1; break;
367 case 2: op = OP_EXACTMB2N2; break;
368 case 3: op = OP_EXACTMB2N3; break;
369 default: op = OP_EXACTMB2N; break;
370 }
371 break;
372
373 case 3:
374 op = OP_EXACTMB3N;
375 break;
376
377 default:
378 op = OP_EXACTMBN;
379 break;
380 }
381 }
382 return op;
383}
384
385static int
386compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
387{
388 int r;
389 int saved_num_null_check = reg->num_null_check;
390
391 if (empty_info != 0) {
392 r = add_opcode(reg, OP_NULL_CHECK_START);
393 if (r) return r;
394 r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
395 if (r) return r;
396 reg->num_null_check++;
397 }
398
399 r = compile_tree(node, reg);
400 if (r) return r;
401
402 if (empty_info != 0) {
403 if (empty_info == NQ_TARGET_IS_EMPTY)
404 r = add_opcode(reg, OP_NULL_CHECK_END);
405 else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
406 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
407 else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
408 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
409
410 if (r) return r;
411 r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
412 }
413 return r;
414}
415
416#ifdef USE_SUBEXP_CALL
417static int
418compile_call(CallNode* node, regex_t* reg)
419{
420 int r;
421
422 r = add_opcode(reg, OP_CALL);
423 if (r) return r;
424 r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
425 node->target);
426 if (r) return r;
427 r = add_abs_addr(reg, 0 /*dummy addr.*/);
428 return r;
429}
430#endif
431
432static int
433compile_tree_n_times(Node* node, int n, regex_t* reg)
434{
435 int i, r;
436
437 for (i = 0; i < n; i++) {
438 r = compile_tree(node, reg);
439 if (r) return r;
440 }
441 return 0;
442}
443
444static int
445add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance byte_len,
446 regex_t* reg ARG_UNUSED, int ignore_case)
447{
448 int len;
449 int op = select_str_opcode(mb_len, byte_len, ignore_case);
450
451 len = SIZE_OPCODE;
452
453 if (op == OP_EXACTMBN) len += SIZE_LENGTH;
454 if (IS_NEED_STR_LEN_OP_EXACT(op))
455 len += SIZE_LENGTH;
456
457 len += (int )byte_len;
458 return len;
459}
460
461static int
462add_compile_string(UChar* s, int mb_len, OnigDistance byte_len,
463 regex_t* reg, int ignore_case)
464{
465 int op = select_str_opcode(mb_len, byte_len, ignore_case);
466 add_opcode(reg, op);
467
468 if (op == OP_EXACTMBN)
469 add_length(reg, mb_len);
470
471 if (IS_NEED_STR_LEN_OP_EXACT(op)) {
472 if (op == OP_EXACTN_IC)
473 add_length(reg, byte_len);
474 else
475 add_length(reg, byte_len / mb_len);
476 }
477
478 add_bytes(reg, s, byte_len);
479 return 0;
480}
481
482
483static int
484compile_length_string_node(Node* node, regex_t* reg)
485{
486 int rlen, r, len, prev_len, blen, ambig;
487 OnigEncoding enc = reg->enc;
488 UChar *p, *prev;
489 StrNode* sn;
490
491 sn = NSTR(node);
492 if (sn->end <= sn->s)
493 return 0;
494
495 ambig = NSTRING_IS_AMBIG(node);
496
497 p = prev = sn->s;
498 prev_len = enclen(enc, p, sn->end);
499 p += prev_len;
500 blen = prev_len;
501 rlen = 0;
502
503 for (; p < sn->end; ) {
504 len = enclen(enc, p, sn->end);
505 if (len == prev_len || ambig) {
506 blen += len;
507 }
508 else {
509 r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
510 rlen += r;
511 prev = p;
512 blen = len;
513 prev_len = len;
514 }
515 p += len;
516 }
517 r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
518 rlen += r;
519 return rlen;
520}
521
522static int
523compile_length_string_raw_node(StrNode* sn, regex_t* reg)
524{
525 if (sn->end <= sn->s)
526 return 0;
527
528 return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
529}
530
531static int
532compile_string_node(Node* node, regex_t* reg)
533{
534 int r, len, prev_len, blen, ambig;
535 OnigEncoding enc = reg->enc;
536 UChar *p, *prev, *end;
537 StrNode* sn;
538
539 sn = NSTR(node);
540 if (sn->end <= sn->s)
541 return 0;
542
543 end = sn->end;
544 ambig = NSTRING_IS_AMBIG(node);
545
546 p = prev = sn->s;
547 prev_len = enclen(enc, p, end);
548 p += prev_len;
549 blen = prev_len;
550
551 for (; p < end; ) {
552 len = enclen(enc, p, end);
553 if (len == prev_len || ambig) {
554 blen += len;
555 }
556 else {
557 r = add_compile_string(prev, prev_len, blen, reg, ambig);
558 if (r) return r;
559
560 prev = p;
561 blen = len;
562 prev_len = len;
563 }
564
565 p += len;
566 }
567 return add_compile_string(prev, prev_len, blen, reg, ambig);
568}
569
570static int
571compile_string_raw_node(StrNode* sn, regex_t* reg)
572{
573 if (sn->end <= sn->s)
574 return 0;
575
576 return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
577}
578
579static int
580add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
581{
582#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
583 add_length(reg, mbuf->used);
584 return add_bytes(reg, mbuf->p, mbuf->used);
585#else
586 int r, pad_size;
587 UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
588
589 GET_ALIGNMENT_PAD_SIZE(p, pad_size);
590 add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
591 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
592
593 r = add_bytes(reg, mbuf->p, mbuf->used);
594
595 /* padding for return value from compile_length_cclass_node() to be fix. */
596 pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
597 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
598 return r;
599#endif
600}
601
602static int
603compile_length_cclass_node(CClassNode* cc, regex_t* reg)
604{
605 int len;
606
607 if (IS_NULL(cc->mbuf)) {
608 len = SIZE_OPCODE + SIZE_BITSET;
609 }
610 else {
611 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
612 len = SIZE_OPCODE;
613 }
614 else {
615 len = SIZE_OPCODE + SIZE_BITSET;
616 }
617#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
618 len += SIZE_LENGTH + cc->mbuf->used;
619#else
620 len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
621#endif
622 }
623
624 return len;
625}
626
627static int
628compile_cclass_node(CClassNode* cc, regex_t* reg)
629{
630 int r;
631
632 if (IS_NULL(cc->mbuf)) {
633 if (IS_NCCLASS_NOT(cc))
634 add_opcode(reg, OP_CCLASS_NOT);
635 else
636 add_opcode(reg, OP_CCLASS);
637
638 r = add_bitset(reg, cc->bs);
639 }
640 else {
641 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
642 if (IS_NCCLASS_NOT(cc))
643 add_opcode(reg, OP_CCLASS_MB_NOT);
644 else
645 add_opcode(reg, OP_CCLASS_MB);
646
647 r = add_multi_byte_cclass(cc->mbuf, reg);
648 }
649 else {
650 if (IS_NCCLASS_NOT(cc))
651 add_opcode(reg, OP_CCLASS_MIX_NOT);
652 else
653 add_opcode(reg, OP_CCLASS_MIX);
654
655 r = add_bitset(reg, cc->bs);
656 if (r) return r;
657 r = add_multi_byte_cclass(cc->mbuf, reg);
658 }
659 }
660
661 return r;
662}
663
664static int
665entry_repeat_range(regex_t* reg, int id, int lower, int upper)
666{
667#define REPEAT_RANGE_ALLOC 4
668
670
671 if (reg->repeat_range_alloc == 0) {
672 p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
673 CHECK_NULL_RETURN_MEMERR(p);
674 reg->repeat_range = p;
675 reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
676 }
677 else if (reg->repeat_range_alloc <= id) {
678 int n;
679 n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
680 p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
681 sizeof(OnigRepeatRange) * n);
682 CHECK_NULL_RETURN_MEMERR(p);
683 reg->repeat_range = p;
684 reg->repeat_range_alloc = n;
685 }
686 else {
687 p = reg->repeat_range;
688 }
689
690 p[id].lower = lower;
691 p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
692 return 0;
693}
694
695static int
696compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
697 regex_t* reg)
698{
699 int r;
700 int num_repeat = reg->num_repeat;
701
702 r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
703 if (r) return r;
704 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
705 reg->num_repeat++;
706 if (r) return r;
707 r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
708 if (r) return r;
709
710 r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
711 if (r) return r;
712
713 r = compile_tree_empty_check(qn->target, reg, empty_info);
714 if (r) return r;
715
716 if (
717#ifdef USE_SUBEXP_CALL
718 reg->num_call > 0 ||
719#endif
720 IS_QUANTIFIER_IN_REPEAT(qn)) {
721 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
722 }
723 else {
724 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
725 }
726 if (r) return r;
727 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
728 return r;
729}
730
731static int
732is_anychar_star_quantifier(QtfrNode* qn)
733{
734 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
735 NTYPE(qn->target) == NT_CANY)
736 return 1;
737 else
738 return 0;
739}
740
741#define QUANTIFIER_EXPAND_LIMIT_SIZE 50
742#define CKN_ON (ckn > 0)
743
744#ifdef USE_COMBINATION_EXPLOSION_CHECK
745
746static int
747compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
748{
749 int len, mod_tlen, cklen;
750 int ckn;
751 int infinite = IS_REPEAT_INFINITE(qn->upper);
752 int empty_info = qn->target_empty_info;
753 int tlen = compile_length_tree(qn->target, reg);
754
755 if (tlen < 0) return tlen;
756
757 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
758
759 cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
760
761 /* anychar repeat */
762 if (NTYPE(qn->target) == NT_CANY) {
763 if (qn->greedy && infinite) {
764 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
765 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
766 else
767 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
768 }
769 }
770
771 if (empty_info != 0)
772 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
773 else
774 mod_tlen = tlen;
775
776 if (infinite && qn->lower <= 1) {
777 if (qn->greedy) {
778 if (qn->lower == 1)
779 len = SIZE_OP_JUMP;
780 else
781 len = 0;
782
783 len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
784 }
785 else {
786 if (qn->lower == 0)
787 len = SIZE_OP_JUMP;
788 else
789 len = 0;
790
791 len += mod_tlen + SIZE_OP_PUSH + cklen;
792 }
793 }
794 else if (qn->upper == 0) {
795 if (qn->is_referred != 0) /* /(?<n>..){0}/ */
796 len = SIZE_OP_JUMP + tlen;
797 else
798 len = 0;
799 }
800 else if (qn->upper == 1 && qn->greedy) {
801 if (qn->lower == 0) {
802 if (CKN_ON) {
803 len = SIZE_OP_STATE_CHECK_PUSH + tlen;
804 }
805 else {
806 len = SIZE_OP_PUSH + tlen;
807 }
808 }
809 else {
810 len = tlen;
811 }
812 }
813 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
814 len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
815 }
816 else {
817 len = SIZE_OP_REPEAT_INC
818 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
819 if (CKN_ON)
820 len += SIZE_OP_STATE_CHECK;
821 }
822
823 return len;
824}
825
826static int
827compile_quantifier_node(QtfrNode* qn, regex_t* reg)
828{
829 int r, mod_tlen;
830 int ckn;
831 int infinite = IS_REPEAT_INFINITE(qn->upper);
832 int empty_info = qn->target_empty_info;
833 int tlen = compile_length_tree(qn->target, reg);
834
835 if (tlen < 0) return tlen;
836
837 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
838
839 if (is_anychar_star_quantifier(qn)) {
840 r = compile_tree_n_times(qn->target, qn->lower, reg);
841 if (r) return r;
842 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
843 if (IS_MULTILINE(reg->options))
844 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
845 else
846 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
847 if (r) return r;
848 if (CKN_ON) {
849 r = add_state_check_num(reg, ckn);
850 if (r) return r;
851 }
852
853 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
854 }
855 else {
856 if (IS_MULTILINE(reg->options)) {
857 r = add_opcode(reg, (CKN_ON ?
858 OP_STATE_CHECK_ANYCHAR_ML_STAR
859 : OP_ANYCHAR_ML_STAR));
860 }
861 else {
862 r = add_opcode(reg, (CKN_ON ?
863 OP_STATE_CHECK_ANYCHAR_STAR
864 : OP_ANYCHAR_STAR));
865 }
866 if (r) return r;
867 if (CKN_ON)
868 r = add_state_check_num(reg, ckn);
869
870 return r;
871 }
872 }
873
874 if (empty_info != 0)
875 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
876 else
877 mod_tlen = tlen;
878
879 if (infinite && qn->lower <= 1) {
880 if (qn->greedy) {
881 if (qn->lower == 1) {
882 r = add_opcode_rel_addr(reg, OP_JUMP,
883 (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
884 if (r) return r;
885 }
886
887 if (CKN_ON) {
888 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
889 if (r) return r;
890 r = add_state_check_num(reg, ckn);
891 if (r) return r;
892 r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
893 }
894 else {
895 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
896 }
897 if (r) return r;
898 r = compile_tree_empty_check(qn->target, reg, empty_info);
899 if (r) return r;
900 r = add_opcode_rel_addr(reg, OP_JUMP,
901 -(mod_tlen + (int )SIZE_OP_JUMP
902 + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
903 }
904 else {
905 if (qn->lower == 0) {
906 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
907 if (r) return r;
908 }
909 r = compile_tree_empty_check(qn->target, reg, empty_info);
910 if (r) return r;
911 if (CKN_ON) {
912 r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
913 if (r) return r;
914 r = add_state_check_num(reg, ckn);
915 if (r) return r;
916 r = add_rel_addr(reg,
917 -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
918 }
919 else
920 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
921 }
922 }
923 else if (qn->upper == 0) {
924 if (qn->is_referred != 0) { /* /(?<n>..){0}/ */
925 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
926 if (r) return r;
927 r = compile_tree(qn->target, reg);
928 }
929 else
930 r = 0;
931 }
932 else if (qn->upper == 1 && qn->greedy) {
933 if (qn->lower == 0) {
934 if (CKN_ON) {
935 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
936 if (r) return r;
937 r = add_state_check_num(reg, ckn);
938 if (r) return r;
939 r = add_rel_addr(reg, tlen);
940 }
941 else {
942 r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
943 }
944 if (r) return r;
945 }
946
947 r = compile_tree(qn->target, reg);
948 }
949 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
950 if (CKN_ON) {
951 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
952 if (r) return r;
953 r = add_state_check_num(reg, ckn);
954 if (r) return r;
955 r = add_rel_addr(reg, SIZE_OP_JUMP);
956 }
957 else {
958 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
959 }
960
961 if (r) return r;
962 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
963 if (r) return r;
964 r = compile_tree(qn->target, reg);
965 }
966 else {
967 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
968 if (CKN_ON) {
969 if (r) return r;
970 r = add_opcode(reg, OP_STATE_CHECK);
971 if (r) return r;
972 r = add_state_check_num(reg, ckn);
973 }
974 }
975 return r;
976}
977
978#else /* USE_COMBINATION_EXPLOSION_CHECK */
979
980static int
981compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
982{
983 int len, mod_tlen;
984 int infinite = IS_REPEAT_INFINITE(qn->upper);
985 int empty_info = qn->target_empty_info;
986 int tlen = compile_length_tree(qn->target, reg);
987
988 if (tlen < 0) return tlen;
989
990 /* anychar repeat */
991 if (NTYPE(qn->target) == NT_CANY) {
992 if (qn->greedy && infinite) {
993 if (IS_NOT_NULL(qn->next_head_exact))
994 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
995 else
996 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
997 }
998 }
999
1000 if (empty_info != 0)
1001 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1002 else
1003 mod_tlen = tlen;
1004
1005 if (infinite &&
1006 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1007 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1008 len = SIZE_OP_JUMP;
1009 }
1010 else {
1011 len = tlen * qn->lower;
1012 }
1013
1014 if (qn->greedy) {
1015#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1016 if (IS_NOT_NULL(qn->head_exact))
1017 len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
1018 else
1019#endif
1020 if (IS_NOT_NULL(qn->next_head_exact))
1021 len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
1022 else
1023 len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
1024 }
1025 else
1026 len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
1027 }
1028 else if (qn->upper == 0 && qn->is_referred != 0) { /* /(?<n>..){0}/ */
1029 len = SIZE_OP_JUMP + tlen;
1030 }
1031 else if (!infinite && qn->greedy &&
1032 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1033 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1034 len = tlen * qn->lower;
1035 len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
1036 }
1037 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1038 len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1039 }
1040 else {
1041 len = SIZE_OP_REPEAT_INC
1042 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
1043 }
1044
1045 return len;
1046}
1047
1048static int
1049compile_quantifier_node(QtfrNode* qn, regex_t* reg)
1050{
1051 int i, r, mod_tlen;
1052 int infinite = IS_REPEAT_INFINITE(qn->upper);
1053 int empty_info = qn->target_empty_info;
1054 int tlen = compile_length_tree(qn->target, reg);
1055
1056 if (tlen < 0) return tlen;
1057
1058 if (is_anychar_star_quantifier(qn)) {
1059 r = compile_tree_n_times(qn->target, qn->lower, reg);
1060 if (r) return r;
1061 if (IS_NOT_NULL(qn->next_head_exact)) {
1062 if (IS_MULTILINE(reg->options))
1063 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1064 else
1065 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
1066 if (r) return r;
1067 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1068 }
1069 else {
1070 if (IS_MULTILINE(reg->options))
1071 return add_opcode(reg, OP_ANYCHAR_ML_STAR);
1072 else
1073 return add_opcode(reg, OP_ANYCHAR_STAR);
1074 }
1075 }
1076
1077 if (empty_info != 0)
1078 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1079 else
1080 mod_tlen = tlen;
1081
1082 if (infinite &&
1083 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1084 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1085 if (qn->greedy) {
1086#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1087 if (IS_NOT_NULL(qn->head_exact))
1088 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
1089 else
1090#endif
1091 if (IS_NOT_NULL(qn->next_head_exact))
1092 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
1093 else
1094 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
1095 }
1096 else {
1097 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
1098 }
1099 if (r) return r;
1100 }
1101 else {
1102 r = compile_tree_n_times(qn->target, qn->lower, reg);
1103 if (r) return r;
1104 }
1105
1106 if (qn->greedy) {
1107#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1108 if (IS_NOT_NULL(qn->head_exact)) {
1109 r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
1110 mod_tlen + SIZE_OP_JUMP);
1111 if (r) return r;
1112 add_bytes(reg, NSTR(qn->head_exact)->s, 1);
1113 r = compile_tree_empty_check(qn->target, reg, empty_info);
1114 if (r) return r;
1115 r = add_opcode_rel_addr(reg, OP_JUMP,
1116 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
1117 }
1118 else
1119#endif
1120 if (IS_NOT_NULL(qn->next_head_exact)) {
1121 r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
1122 mod_tlen + SIZE_OP_JUMP);
1123 if (r) return r;
1124 add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1125 r = compile_tree_empty_check(qn->target, reg, empty_info);
1126 if (r) return r;
1127 r = add_opcode_rel_addr(reg, OP_JUMP,
1128 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
1129 }
1130 else {
1131 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
1132 if (r) return r;
1133 r = compile_tree_empty_check(qn->target, reg, empty_info);
1134 if (r) return r;
1135 r = add_opcode_rel_addr(reg, OP_JUMP,
1136 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
1137 }
1138 }
1139 else {
1140 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
1141 if (r) return r;
1142 r = compile_tree_empty_check(qn->target, reg, empty_info);
1143 if (r) return r;
1144 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
1145 }
1146 }
1147 else if (qn->upper == 0 && qn->is_referred != 0) { /* /(?<n>..){0}/ */
1148 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1149 if (r) return r;
1150 r = compile_tree(qn->target, reg);
1151 }
1152 else if (!infinite && qn->greedy &&
1153 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1154 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1155 int n = qn->upper - qn->lower;
1156
1157 r = compile_tree_n_times(qn->target, qn->lower, reg);
1158 if (r) return r;
1159
1160 for (i = 0; i < n; i++) {
1161 r = add_opcode_rel_addr(reg, OP_PUSH,
1162 (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
1163 if (r) return r;
1164 r = compile_tree(qn->target, reg);
1165 if (r) return r;
1166 }
1167 }
1168 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1169 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
1170 if (r) return r;
1171 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1172 if (r) return r;
1173 r = compile_tree(qn->target, reg);
1174 }
1175 else {
1176 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1177 }
1178 return r;
1179}
1180#endif /* USE_COMBINATION_EXPLOSION_CHECK */
1181
1182static int
1183compile_length_option_node(EncloseNode* node, regex_t* reg)
1184{
1185 int tlen;
1186 OnigOptionType prev = reg->options;
1187
1188 reg->options = node->option;
1189 tlen = compile_length_tree(node->target, reg);
1190 reg->options = prev;
1191
1192 if (tlen < 0) return tlen;
1193
1194 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1195 return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
1196 + tlen + SIZE_OP_SET_OPTION;
1197 }
1198 else
1199 return tlen;
1200}
1201
1202static int
1203compile_option_node(EncloseNode* node, regex_t* reg)
1204{
1205 int r;
1206 OnigOptionType prev = reg->options;
1207
1208 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1209 r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
1210 if (r) return r;
1211 r = add_opcode_option(reg, OP_SET_OPTION, prev);
1212 if (r) return r;
1213 r = add_opcode(reg, OP_FAIL);
1214 if (r) return r;
1215 }
1216
1217 reg->options = node->option;
1218 r = compile_tree(node->target, reg);
1219 reg->options = prev;
1220
1221 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1222 if (r) return r;
1223 r = add_opcode_option(reg, OP_SET_OPTION, prev);
1224 }
1225 return r;
1226}
1227
1228static int
1229compile_length_enclose_node(EncloseNode* node, regex_t* reg)
1230{
1231 int len;
1232 int tlen;
1233
1234 if (node->type == ENCLOSE_OPTION)
1235 return compile_length_option_node(node, reg);
1236
1237 if (node->target) {
1238 tlen = compile_length_tree(node->target, reg);
1239 if (tlen < 0) return tlen;
1240 }
1241 else
1242 tlen = 0;
1243
1244 switch (node->type) {
1245 case ENCLOSE_MEMORY:
1246#ifdef USE_SUBEXP_CALL
1247 if (IS_ENCLOSE_CALLED(node)) {
1248 len = SIZE_OP_MEMORY_START_PUSH + tlen
1249 + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
1250 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1251 len += (IS_ENCLOSE_RECURSION(node)
1252 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1253 else
1254 len += (IS_ENCLOSE_RECURSION(node)
1255 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1256 }
1257 else if (IS_ENCLOSE_RECURSION(node)) {
1258 len = SIZE_OP_MEMORY_START_PUSH;
1259 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1260 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);
1261 }
1262 else
1263#endif
1264 {
1265 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1266 len = SIZE_OP_MEMORY_START_PUSH;
1267 else
1268 len = SIZE_OP_MEMORY_START;
1269
1270 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1271 ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
1272 }
1273 break;
1274
1275 case ENCLOSE_STOP_BACKTRACK:
1276 /* Disable POP_STOP_BT optimization for simple repeat under the match cache */
1277 /* optimization because the match cache optimization pushes an extra item to */
1278 /* the stack and it breaks the assumption for this optimization. */
1279#ifndef USE_MATCH_CACHE
1280 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1281 QtfrNode* qn = NQTFR(node->target);
1282 tlen = compile_length_tree(qn->target, reg);
1283 if (tlen < 0) return tlen;
1284
1285 len = tlen * qn->lower
1286 + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
1287 }
1288 else {
1289#endif
1290 len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
1291#ifndef USE_MATCH_CACHE
1292 }
1293#endif
1294 break;
1295
1296 case ENCLOSE_CONDITION:
1297 len = SIZE_OP_CONDITION;
1298 if (NTYPE(node->target) == NT_ALT) {
1299 Node* x = node->target;
1300
1301 tlen = compile_length_tree(NCAR(x), reg); /* yes-node */
1302 if (tlen < 0) return tlen;
1303 len += tlen + SIZE_OP_JUMP;
1304 if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1305 x = NCDR(x);
1306 tlen = compile_length_tree(NCAR(x), reg); /* no-node */
1307 if (tlen < 0) return tlen;
1308 len += tlen;
1309 if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1310 }
1311 else {
1312 return ONIGERR_PARSER_BUG;
1313 }
1314 break;
1315
1316 case ENCLOSE_ABSENT:
1317 len = SIZE_OP_PUSH_ABSENT_POS + SIZE_OP_ABSENT + tlen + SIZE_OP_ABSENT_END;
1318 break;
1319
1320 default:
1321 return ONIGERR_TYPE_BUG;
1322 break;
1323 }
1324
1325 return len;
1326}
1327
1328static int get_char_length_tree(Node* node, regex_t* reg, int* len);
1329
1330static int
1331compile_enclose_node(EncloseNode* node, regex_t* reg)
1332{
1333 int r, len;
1334
1335 if (node->type == ENCLOSE_OPTION)
1336 return compile_option_node(node, reg);
1337
1338 switch (node->type) {
1339 case ENCLOSE_MEMORY:
1340#ifdef USE_SUBEXP_CALL
1341 if (IS_ENCLOSE_CALLED(node)) {
1342 r = add_opcode(reg, OP_CALL);
1343 if (r) return r;
1344 node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
1345 node->state |= NST_ADDR_FIXED;
1346 r = add_abs_addr(reg, (int )node->call_addr);
1347 if (r) return r;
1348 len = compile_length_tree(node->target, reg);
1349 len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
1350 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1351 len += (IS_ENCLOSE_RECURSION(node)
1352 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1353 else
1354 len += (IS_ENCLOSE_RECURSION(node)
1355 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1356
1357 r = add_opcode_rel_addr(reg, OP_JUMP, len);
1358 if (r) return r;
1359 }
1360#endif
1361 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1362 r = add_opcode(reg, OP_MEMORY_START_PUSH);
1363 else
1364 r = add_opcode(reg, OP_MEMORY_START);
1365 if (r) return r;
1366 r = add_mem_num(reg, node->regnum);
1367 if (r) return r;
1368 r = compile_tree(node->target, reg);
1369 if (r) return r;
1370#ifdef USE_SUBEXP_CALL
1371 if (IS_ENCLOSE_CALLED(node)) {
1372 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1373 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1374 ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
1375 else
1376 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1377 ? OP_MEMORY_END_REC : OP_MEMORY_END));
1378
1379 if (r) return r;
1380 r = add_mem_num(reg, node->regnum);
1381 if (r) return r;
1382 r = add_opcode(reg, OP_RETURN);
1383 }
1384 else if (IS_ENCLOSE_RECURSION(node)) {
1385 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1386 r = add_opcode(reg, OP_MEMORY_END_PUSH_REC);
1387 else
1388 r = add_opcode(reg, OP_MEMORY_END_REC);
1389 if (r) return r;
1390 r = add_mem_num(reg, node->regnum);
1391 }
1392 else
1393#endif
1394 {
1395 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1396 r = add_opcode(reg, OP_MEMORY_END_PUSH);
1397 else
1398 r = add_opcode(reg, OP_MEMORY_END);
1399 if (r) return r;
1400 r = add_mem_num(reg, node->regnum);
1401 }
1402 break;
1403
1404 case ENCLOSE_STOP_BACKTRACK:
1405 /* Disable POP_STOP_BT optimization for simple repeat under the match cache */
1406 /* optimization because the match cache optimization pushes an extra item to */
1407 /* the stack and it breaks the assumption for this optimization. */
1408#ifndef USE_MATCH_CACHE
1409 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1410 QtfrNode* qn = NQTFR(node->target);
1411 r = compile_tree_n_times(qn->target, qn->lower, reg);
1412 if (r) return r;
1413
1414 len = compile_length_tree(qn->target, reg);
1415 if (len < 0) return len;
1416
1417 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
1418 if (r) return r;
1419 r = compile_tree(qn->target, reg);
1420 if (r) return r;
1421 r = add_opcode(reg, OP_POP);
1422 if (r) return r;
1423 r = add_opcode_rel_addr(reg, OP_JUMP,
1424 -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
1425 }
1426 else {
1427#endif
1428 r = add_opcode(reg, OP_PUSH_STOP_BT);
1429 if (r) return r;
1430 r = compile_tree(node->target, reg);
1431 if (r) return r;
1432 r = add_opcode(reg, OP_POP_STOP_BT);
1433#ifndef USE_MATCH_CACHE
1434 }
1435#endif
1436 break;
1437
1438 case ENCLOSE_CONDITION:
1439 r = add_opcode(reg, OP_CONDITION);
1440 if (r) return r;
1441 r = add_mem_num(reg, node->regnum);
1442 if (r) return r;
1443
1444 if (NTYPE(node->target) == NT_ALT) {
1445 Node* x = node->target;
1446 int len2;
1447
1448 len = compile_length_tree(NCAR(x), reg); /* yes-node */
1449 if (len < 0) return len;
1450 if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1451 x = NCDR(x);
1452 len2 = compile_length_tree(NCAR(x), reg); /* no-node */
1453 if (len2 < 0) return len2;
1454 if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1455
1456 x = node->target;
1457 r = add_rel_addr(reg, len + SIZE_OP_JUMP);
1458 if (r) return r;
1459 r = compile_tree(NCAR(x), reg); /* yes-node */
1460 if (r) return r;
1461 r = add_opcode_rel_addr(reg, OP_JUMP, len2);
1462 if (r) return r;
1463 x = NCDR(x);
1464 r = compile_tree(NCAR(x), reg); /* no-node */
1465 }
1466 else {
1467 return ONIGERR_PARSER_BUG;
1468 }
1469 break;
1470
1471 case ENCLOSE_ABSENT:
1472 len = compile_length_tree(node->target, reg);
1473 if (len < 0) return len;
1474
1475 r = add_opcode(reg, OP_PUSH_ABSENT_POS);
1476 if (r) return r;
1477 r = add_opcode_rel_addr(reg, OP_ABSENT, len + SIZE_OP_ABSENT_END);
1478 if (r) return r;
1479 r = compile_tree(node->target, reg);
1480 if (r) return r;
1481 r = add_opcode(reg, OP_ABSENT_END);
1482 break;
1483
1484 default:
1485 return ONIGERR_TYPE_BUG;
1486 break;
1487 }
1488
1489 return r;
1490}
1491
1492static int
1493compile_length_anchor_node(AnchorNode* node, regex_t* reg)
1494{
1495 int len;
1496 int tlen = 0;
1497
1498 if (node->target) {
1499 tlen = compile_length_tree(node->target, reg);
1500 if (tlen < 0) return tlen;
1501 }
1502
1503 switch (node->type) {
1504 case ANCHOR_PREC_READ:
1505 len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
1506 break;
1507 case ANCHOR_PREC_READ_NOT:
1508 len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
1509 break;
1510 case ANCHOR_LOOK_BEHIND:
1511 len = SIZE_OP_LOOK_BEHIND + tlen;
1512 break;
1513 case ANCHOR_LOOK_BEHIND_NOT:
1514 len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
1515 break;
1516
1517 default:
1518 len = SIZE_OPCODE;
1519 break;
1520 }
1521
1522 return len;
1523}
1524
1525static int
1526compile_anchor_node(AnchorNode* node, regex_t* reg)
1527{
1528 int r, len;
1529
1530 switch (node->type) {
1531 case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
1532 case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
1533 case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
1534 case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
1535 case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
1536 case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
1537
1538 case ANCHOR_WORD_BOUND:
1539 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BOUND);
1540 else r = add_opcode(reg, OP_WORD_BOUND);
1541 break;
1542 case ANCHOR_NOT_WORD_BOUND:
1543 if (node->ascii_range) r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND);
1544 else r = add_opcode(reg, OP_NOT_WORD_BOUND);
1545 break;
1546#ifdef USE_WORD_BEGIN_END
1547 case ANCHOR_WORD_BEGIN:
1548 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BEGIN);
1549 else r = add_opcode(reg, OP_WORD_BEGIN);
1550 break;
1551 case ANCHOR_WORD_END:
1552 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_END);
1553 else r = add_opcode(reg, OP_WORD_END);
1554 break;
1555#endif
1556 case ANCHOR_KEEP: r = add_opcode(reg, OP_KEEP); break;
1557
1558 case ANCHOR_PREC_READ:
1559 r = add_opcode(reg, OP_PUSH_POS);
1560 if (r) return r;
1561 r = compile_tree(node->target, reg);
1562 if (r) return r;
1563 r = add_opcode(reg, OP_POP_POS);
1564 break;
1565
1566 case ANCHOR_PREC_READ_NOT:
1567 len = compile_length_tree(node->target, reg);
1568 if (len < 0) return len;
1569 r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
1570 if (r) return r;
1571 r = compile_tree(node->target, reg);
1572 if (r) return r;
1573 r = add_opcode(reg, OP_FAIL_POS);
1574 break;
1575
1576 case ANCHOR_LOOK_BEHIND:
1577 {
1578 int n;
1579 r = add_opcode(reg, OP_LOOK_BEHIND);
1580 if (r) return r;
1581 if (node->char_len < 0) {
1582 r = get_char_length_tree(node->target, reg, &n);
1583 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1584 }
1585 else
1586 n = node->char_len;
1587 r = add_length(reg, n);
1588 if (r) return r;
1589 r = compile_tree(node->target, reg);
1590 }
1591 break;
1592
1593 case ANCHOR_LOOK_BEHIND_NOT:
1594 {
1595 int n;
1596 len = compile_length_tree(node->target, reg);
1597 r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
1598 len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
1599 if (r) return r;
1600 if (node->char_len < 0) {
1601 r = get_char_length_tree(node->target, reg, &n);
1602 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1603 }
1604 else
1605 n = node->char_len;
1606 r = add_length(reg, n);
1607 if (r) return r;
1608 r = compile_tree(node->target, reg);
1609 if (r) return r;
1610 r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
1611 }
1612 break;
1613
1614 default:
1615 return ONIGERR_TYPE_BUG;
1616 break;
1617 }
1618
1619 return r;
1620}
1621
1622static int
1623compile_length_tree(Node* node, regex_t* reg)
1624{
1625 int len, type, r;
1626
1627 type = NTYPE(node);
1628 switch (type) {
1629 case NT_LIST:
1630 len = 0;
1631 do {
1632 r = compile_length_tree(NCAR(node), reg);
1633 if (r < 0) return r;
1634 len += r;
1635 } while (IS_NOT_NULL(node = NCDR(node)));
1636 r = len;
1637 break;
1638
1639 case NT_ALT:
1640 {
1641 int n = 0;
1642 len = 0;
1643 do {
1644 r = compile_length_tree(NCAR(node), reg);
1645 if (r < 0) return r;
1646 len += r;
1647 n++;
1648 } while (IS_NOT_NULL(node = NCDR(node)));
1649 r = len;
1650 r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1651 }
1652 break;
1653
1654 case NT_STR:
1655 if (NSTRING_IS_RAW(node))
1656 r = compile_length_string_raw_node(NSTR(node), reg);
1657 else
1658 r = compile_length_string_node(node, reg);
1659 break;
1660
1661 case NT_CCLASS:
1662 r = compile_length_cclass_node(NCCLASS(node), reg);
1663 break;
1664
1665 case NT_CTYPE:
1666 case NT_CANY:
1667 r = SIZE_OPCODE;
1668 break;
1669
1670 case NT_BREF:
1671 {
1672 BRefNode* br = NBREF(node);
1673
1674#ifdef USE_BACKREF_WITH_LEVEL
1675 if (IS_BACKREF_NEST_LEVEL(br)) {
1676 r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
1677 SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1678 }
1679 else
1680#endif
1681 if (br->back_num == 1) {
1682 r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1683 ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
1684 }
1685 else {
1686 r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1687 }
1688 }
1689 break;
1690
1691#ifdef USE_SUBEXP_CALL
1692 case NT_CALL:
1693 r = SIZE_OP_CALL;
1694 break;
1695#endif
1696
1697 case NT_QTFR:
1698 r = compile_length_quantifier_node(NQTFR(node), reg);
1699 break;
1700
1701 case NT_ENCLOSE:
1702 r = compile_length_enclose_node(NENCLOSE(node), reg);
1703 break;
1704
1705 case NT_ANCHOR:
1706 r = compile_length_anchor_node(NANCHOR(node), reg);
1707 break;
1708
1709 default:
1710 return ONIGERR_TYPE_BUG;
1711 break;
1712 }
1713
1714 return r;
1715}
1716
1717static int
1718compile_tree(Node* node, regex_t* reg)
1719{
1720 int n, type, len, pos, r = 0;
1721
1722 type = NTYPE(node);
1723 switch (type) {
1724 case NT_LIST:
1725 do {
1726 r = compile_tree(NCAR(node), reg);
1727 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1728 break;
1729
1730 case NT_ALT:
1731 {
1732 Node* x = node;
1733 len = 0;
1734 do {
1735 len += compile_length_tree(NCAR(x), reg);
1736 if (NCDR(x) != NULL) {
1737 len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1738 }
1739 } while (IS_NOT_NULL(x = NCDR(x)));
1740 pos = reg->used + len; /* goal position */
1741
1742 do {
1743 len = compile_length_tree(NCAR(node), reg);
1744 if (IS_NOT_NULL(NCDR(node))) {
1745 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
1746 if (r) break;
1747 }
1748 r = compile_tree(NCAR(node), reg);
1749 if (r) break;
1750 if (IS_NOT_NULL(NCDR(node))) {
1751 len = pos - (reg->used + SIZE_OP_JUMP);
1752 r = add_opcode_rel_addr(reg, OP_JUMP, len);
1753 if (r) break;
1754 }
1755 } while (IS_NOT_NULL(node = NCDR(node)));
1756 }
1757 break;
1758
1759 case NT_STR:
1760 if (NSTRING_IS_RAW(node))
1761 r = compile_string_raw_node(NSTR(node), reg);
1762 else
1763 r = compile_string_node(node, reg);
1764 break;
1765
1766 case NT_CCLASS:
1767 r = compile_cclass_node(NCCLASS(node), reg);
1768 break;
1769
1770 case NT_CTYPE:
1771 {
1772 int op;
1773
1774 switch (NCTYPE(node)->ctype) {
1775 case ONIGENC_CTYPE_WORD:
1776 if (NCTYPE(node)->ascii_range != 0) {
1777 if (NCTYPE(node)->not != 0) op = OP_NOT_ASCII_WORD;
1778 else op = OP_ASCII_WORD;
1779 }
1780 else {
1781 if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
1782 else op = OP_WORD;
1783 }
1784 break;
1785 default:
1786 return ONIGERR_TYPE_BUG;
1787 break;
1788 }
1789 r = add_opcode(reg, op);
1790 }
1791 break;
1792
1793 case NT_CANY:
1794 if (IS_MULTILINE(reg->options))
1795 r = add_opcode(reg, OP_ANYCHAR_ML);
1796 else
1797 r = add_opcode(reg, OP_ANYCHAR);
1798 break;
1799
1800 case NT_BREF:
1801 {
1802 BRefNode* br = NBREF(node);
1803
1804#ifdef USE_BACKREF_WITH_LEVEL
1805 if (IS_BACKREF_NEST_LEVEL(br)) {
1806 r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
1807 if (r) return r;
1808 r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1809 if (r) return r;
1810 r = add_length(reg, br->nest_level);
1811 if (r) return r;
1812
1813 goto add_bacref_mems;
1814 }
1815 else
1816#endif
1817 if (br->back_num == 1) {
1818 n = br->back_static[0];
1819 if (IS_IGNORECASE(reg->options)) {
1820 r = add_opcode(reg, OP_BACKREFN_IC);
1821 if (r) return r;
1822 r = add_mem_num(reg, n);
1823 }
1824 else {
1825 switch (n) {
1826 case 1: r = add_opcode(reg, OP_BACKREF1); break;
1827 case 2: r = add_opcode(reg, OP_BACKREF2); break;
1828 default:
1829 r = add_opcode(reg, OP_BACKREFN);
1830 if (r) return r;
1831 r = add_mem_num(reg, n);
1832 break;
1833 }
1834 }
1835 }
1836 else {
1837 int i;
1838 int* p;
1839
1840 if (IS_IGNORECASE(reg->options)) {
1841 r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1842 }
1843 else {
1844 r = add_opcode(reg, OP_BACKREF_MULTI);
1845 }
1846 if (r) return r;
1847
1848#ifdef USE_BACKREF_WITH_LEVEL
1849 add_bacref_mems:
1850#endif
1851 r = add_length(reg, br->back_num);
1852 if (r) return r;
1853 p = BACKREFS_P(br);
1854 for (i = br->back_num - 1; i >= 0; i--) {
1855 r = add_mem_num(reg, p[i]);
1856 if (r) return r;
1857 }
1858 }
1859 }
1860 break;
1861
1862#ifdef USE_SUBEXP_CALL
1863 case NT_CALL:
1864 r = compile_call(NCALL(node), reg);
1865 break;
1866#endif
1867
1868 case NT_QTFR:
1869 r = compile_quantifier_node(NQTFR(node), reg);
1870 break;
1871
1872 case NT_ENCLOSE:
1873 r = compile_enclose_node(NENCLOSE(node), reg);
1874 break;
1875
1876 case NT_ANCHOR:
1877 r = compile_anchor_node(NANCHOR(node), reg);
1878 break;
1879
1880 default:
1881#ifdef ONIG_DEBUG
1882 fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
1883#endif
1884 break;
1885 }
1886
1887 return r;
1888}
1889
1890#ifdef USE_NAMED_GROUP
1891
1892static int
1893noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
1894{
1895 int r = 0;
1896 Node* node = *plink;
1897
1898 switch (NTYPE(node)) {
1899 case NT_LIST:
1900 case NT_ALT:
1901 do {
1902 r = noname_disable_map(&(NCAR(node)), map, counter);
1903 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1904 break;
1905
1906 case NT_QTFR:
1907 {
1908 Node** ptarget = &(NQTFR(node)->target);
1909 Node* old = *ptarget;
1910 r = noname_disable_map(ptarget, map, counter);
1911 if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
1912 onig_reduce_nested_quantifier(node, *ptarget);
1913 }
1914 }
1915 break;
1916
1917 case NT_ENCLOSE:
1918 {
1919 EncloseNode* en = NENCLOSE(node);
1920 if (en->type == ENCLOSE_MEMORY) {
1921 if (IS_ENCLOSE_NAMED_GROUP(en)) {
1922 (*counter)++;
1923 map[en->regnum].new_val = *counter;
1924 en->regnum = *counter;
1925 }
1926 else if (en->regnum != 0) {
1927 *plink = en->target;
1928 en->target = NULL_NODE;
1929 onig_node_free(node);
1930 r = noname_disable_map(plink, map, counter);
1931 break;
1932 }
1933 }
1934 r = noname_disable_map(&(en->target), map, counter);
1935 }
1936 break;
1937
1938 case NT_ANCHOR:
1939 if (NANCHOR(node)->target)
1940 r = noname_disable_map(&(NANCHOR(node)->target), map, counter);
1941 break;
1942
1943 default:
1944 break;
1945 }
1946
1947 return r;
1948}
1949
1950static int
1951renumber_node_backref(Node* node, GroupNumRemap* map, const int num_mem)
1952{
1953 int i, pos, n, old_num;
1954 int *backs;
1955 BRefNode* bn = NBREF(node);
1956
1957 if (! IS_BACKREF_NAME_REF(bn))
1958 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1959
1960 old_num = bn->back_num;
1961 if (IS_NULL(bn->back_dynamic))
1962 backs = bn->back_static;
1963 else
1964 backs = bn->back_dynamic;
1965
1966 for (i = 0, pos = 0; i < old_num; i++) {
1967 if (backs[i] > num_mem) return ONIGERR_INVALID_BACKREF;
1968 n = map[backs[i]].new_val;
1969 if (n > 0) {
1970 backs[pos] = n;
1971 pos++;
1972 }
1973 }
1974
1975 bn->back_num = pos;
1976 return 0;
1977}
1978
1979static int
1980renumber_by_map(Node* node, GroupNumRemap* map, const int num_mem)
1981{
1982 int r = 0;
1983
1984 switch (NTYPE(node)) {
1985 case NT_LIST:
1986 case NT_ALT:
1987 do {
1988 r = renumber_by_map(NCAR(node), map, num_mem);
1989 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1990 break;
1991 case NT_QTFR:
1992 r = renumber_by_map(NQTFR(node)->target, map, num_mem);
1993 break;
1994 case NT_ENCLOSE:
1995 {
1996 EncloseNode* en = NENCLOSE(node);
1997 if (en->type == ENCLOSE_CONDITION) {
1998 if (en->regnum > num_mem) return ONIGERR_INVALID_BACKREF;
1999 en->regnum = map[en->regnum].new_val;
2000 }
2001 r = renumber_by_map(en->target, map, num_mem);
2002 }
2003 break;
2004
2005 case NT_BREF:
2006 r = renumber_node_backref(node, map, num_mem);
2007 break;
2008
2009 case NT_ANCHOR:
2010 if (NANCHOR(node)->target)
2011 r = renumber_by_map(NANCHOR(node)->target, map, num_mem);
2012 break;
2013
2014 default:
2015 break;
2016 }
2017
2018 return r;
2019}
2020
2021static int
2022numbered_ref_check(Node* node)
2023{
2024 int r = 0;
2025
2026 switch (NTYPE(node)) {
2027 case NT_LIST:
2028 case NT_ALT:
2029 do {
2030 r = numbered_ref_check(NCAR(node));
2031 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2032 break;
2033 case NT_QTFR:
2034 r = numbered_ref_check(NQTFR(node)->target);
2035 break;
2036 case NT_ENCLOSE:
2037 r = numbered_ref_check(NENCLOSE(node)->target);
2038 break;
2039
2040 case NT_BREF:
2041 if (! IS_BACKREF_NAME_REF(NBREF(node)))
2042 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
2043 break;
2044
2045 case NT_ANCHOR:
2046 if (NANCHOR(node)->target)
2047 r = numbered_ref_check(NANCHOR(node)->target);
2048 break;
2049
2050 default:
2051 break;
2052 }
2053
2054 return r;
2055}
2056
2057static int
2058disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
2059{
2060 int r, i, pos, counter;
2061 BitStatusType loc;
2062 GroupNumRemap* map;
2063
2064 map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
2065 CHECK_NULL_RETURN_MEMERR(map);
2066 for (i = 1; i <= env->num_mem; i++) {
2067 map[i].new_val = 0;
2068 }
2069 counter = 0;
2070 r = noname_disable_map(root, map, &counter);
2071 if (r != 0) return r;
2072
2073 r = renumber_by_map(*root, map, env->num_mem);
2074 if (r != 0) return r;
2075
2076 for (i = 1, pos = 1; i <= env->num_mem; i++) {
2077 if (map[i].new_val > 0) {
2078 SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
2079 pos++;
2080 }
2081 }
2082
2083 loc = env->capture_history;
2084 BIT_STATUS_CLEAR(env->capture_history);
2085 for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
2086 if (BIT_STATUS_AT(loc, i)) {
2087 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
2088 }
2089 }
2090
2091 env->num_mem = env->num_named;
2092 reg->num_mem = env->num_named;
2093
2094 return onig_renumber_name_table(reg, map);
2095}
2096#endif /* USE_NAMED_GROUP */
2097
2098#ifdef USE_SUBEXP_CALL
2099static int
2100unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
2101{
2102 int i, offset;
2103 EncloseNode* en;
2104 AbsAddrType addr;
2105
2106 for (i = 0; i < uslist->num; i++) {
2107 en = NENCLOSE(uslist->us[i].target);
2108 if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
2109 addr = en->call_addr;
2110 offset = uslist->us[i].offset;
2111
2112 BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
2113 }
2114 return 0;
2115}
2116#endif
2117
2118#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2119static int
2120quantifiers_memory_node_info(Node* node)
2121{
2122 int r = 0;
2123
2124 switch (NTYPE(node)) {
2125 case NT_LIST:
2126 case NT_ALT:
2127 {
2128 int v;
2129 do {
2130 v = quantifiers_memory_node_info(NCAR(node));
2131 if (v > r) r = v;
2132 } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
2133 }
2134 break;
2135
2136# ifdef USE_SUBEXP_CALL
2137 case NT_CALL:
2138 if (IS_CALL_RECURSION(NCALL(node))) {
2139 return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
2140 }
2141 else
2142 r = quantifiers_memory_node_info(NCALL(node)->target);
2143 break;
2144# endif
2145
2146 case NT_QTFR:
2147 {
2148 QtfrNode* qn = NQTFR(node);
2149 if (qn->upper != 0) {
2150 r = quantifiers_memory_node_info(qn->target);
2151 }
2152 }
2153 break;
2154
2155 case NT_ENCLOSE:
2156 {
2157 EncloseNode* en = NENCLOSE(node);
2158 switch (en->type) {
2159 case ENCLOSE_MEMORY:
2160 return NQ_TARGET_IS_EMPTY_MEM;
2161 break;
2162
2163 case ENCLOSE_OPTION:
2164 case ENCLOSE_STOP_BACKTRACK:
2165 case ENCLOSE_CONDITION:
2166 case ENCLOSE_ABSENT:
2167 r = quantifiers_memory_node_info(en->target);
2168 break;
2169 default:
2170 break;
2171 }
2172 }
2173 break;
2174
2175 case NT_BREF:
2176 case NT_STR:
2177 case NT_CTYPE:
2178 case NT_CCLASS:
2179 case NT_CANY:
2180 case NT_ANCHOR:
2181 default:
2182 break;
2183 }
2184
2185 return r;
2186}
2187#endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
2188
2189static int
2190get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
2191{
2192 OnigDistance tmin;
2193 int r = 0;
2194
2195 *min = 0;
2196 switch (NTYPE(node)) {
2197 case NT_BREF:
2198 {
2199 int i;
2200 int* backs;
2201 Node** nodes = SCANENV_MEM_NODES(env);
2202 BRefNode* br = NBREF(node);
2203 if (br->state & NST_RECURSION) break;
2204
2205 backs = BACKREFS_P(br);
2206 if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2207 r = get_min_match_length(nodes[backs[0]], min, env);
2208 if (r != 0) break;
2209 for (i = 1; i < br->back_num; i++) {
2210 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2211 r = get_min_match_length(nodes[backs[i]], &tmin, env);
2212 if (r != 0) break;
2213 if (*min > tmin) *min = tmin;
2214 }
2215 }
2216 break;
2217
2218#ifdef USE_SUBEXP_CALL
2219 case NT_CALL:
2220 if (IS_CALL_RECURSION(NCALL(node))) {
2221 EncloseNode* en = NENCLOSE(NCALL(node)->target);
2222 if (IS_ENCLOSE_MIN_FIXED(en))
2223 *min = en->min_len;
2224 }
2225 else
2226 r = get_min_match_length(NCALL(node)->target, min, env);
2227 break;
2228#endif
2229
2230 case NT_LIST:
2231 do {
2232 r = get_min_match_length(NCAR(node), &tmin, env);
2233 if (r == 0) *min += tmin;
2234 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2235 break;
2236
2237 case NT_ALT:
2238 {
2239 Node *x, *y;
2240 y = node;
2241 do {
2242 x = NCAR(y);
2243 r = get_min_match_length(x, &tmin, env);
2244 if (r != 0) break;
2245 if (y == node) *min = tmin;
2246 else if (*min > tmin) *min = tmin;
2247 } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
2248 }
2249 break;
2250
2251 case NT_STR:
2252 {
2253 StrNode* sn = NSTR(node);
2254 *min = sn->end - sn->s;
2255 }
2256 break;
2257
2258 case NT_CTYPE:
2259 *min = 1;
2260 break;
2261
2262 case NT_CCLASS:
2263 case NT_CANY:
2264 *min = 1;
2265 break;
2266
2267 case NT_QTFR:
2268 {
2269 QtfrNode* qn = NQTFR(node);
2270
2271 if (qn->lower > 0) {
2272 r = get_min_match_length(qn->target, min, env);
2273 if (r == 0)
2274 *min = distance_multiply(*min, qn->lower);
2275 }
2276 }
2277 break;
2278
2279 case NT_ENCLOSE:
2280 {
2281 EncloseNode* en = NENCLOSE(node);
2282 switch (en->type) {
2283 case ENCLOSE_MEMORY:
2284 if (IS_ENCLOSE_MIN_FIXED(en))
2285 *min = en->min_len;
2286 else {
2287 if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2288 *min = 0; /* recursive */
2289 else {
2290 SET_ENCLOSE_STATUS(node, NST_MARK1);
2291 r = get_min_match_length(en->target, min, env);
2292 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2293 if (r == 0) {
2294 en->min_len = *min;
2295 SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
2296 }
2297 }
2298 }
2299 break;
2300
2301 case ENCLOSE_OPTION:
2302 case ENCLOSE_STOP_BACKTRACK:
2303 case ENCLOSE_CONDITION:
2304 r = get_min_match_length(en->target, min, env);
2305 break;
2306
2307 case ENCLOSE_ABSENT:
2308 break;
2309 }
2310 }
2311 break;
2312
2313 case NT_ANCHOR:
2314 default:
2315 break;
2316 }
2317
2318 return r;
2319}
2320
2321static int
2322get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
2323{
2324 OnigDistance tmax;
2325 int r = 0;
2326
2327 *max = 0;
2328 switch (NTYPE(node)) {
2329 case NT_LIST:
2330 do {
2331 r = get_max_match_length(NCAR(node), &tmax, env);
2332 if (r == 0)
2333 *max = distance_add(*max, tmax);
2334 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2335 break;
2336
2337 case NT_ALT:
2338 do {
2339 r = get_max_match_length(NCAR(node), &tmax, env);
2340 if (r == 0 && *max < tmax) *max = tmax;
2341 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2342 break;
2343
2344 case NT_STR:
2345 {
2346 StrNode* sn = NSTR(node);
2347 *max = sn->end - sn->s;
2348 }
2349 break;
2350
2351 case NT_CTYPE:
2352 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2353 break;
2354
2355 case NT_CCLASS:
2356 case NT_CANY:
2357 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2358 break;
2359
2360 case NT_BREF:
2361 {
2362 int i;
2363 int* backs;
2364 Node** nodes = SCANENV_MEM_NODES(env);
2365 BRefNode* br = NBREF(node);
2366 if (br->state & NST_RECURSION) {
2367 *max = ONIG_INFINITE_DISTANCE;
2368 break;
2369 }
2370 backs = BACKREFS_P(br);
2371 for (i = 0; i < br->back_num; i++) {
2372 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2373 r = get_max_match_length(nodes[backs[i]], &tmax, env);
2374 if (r != 0) break;
2375 if (*max < tmax) *max = tmax;
2376 }
2377 }
2378 break;
2379
2380#ifdef USE_SUBEXP_CALL
2381 case NT_CALL:
2382 if (! IS_CALL_RECURSION(NCALL(node)))
2383 r = get_max_match_length(NCALL(node)->target, max, env);
2384 else
2385 *max = ONIG_INFINITE_DISTANCE;
2386 break;
2387#endif
2388
2389 case NT_QTFR:
2390 {
2391 QtfrNode* qn = NQTFR(node);
2392
2393 if (qn->upper != 0) {
2394 r = get_max_match_length(qn->target, max, env);
2395 if (r == 0 && *max != 0) {
2396 if (! IS_REPEAT_INFINITE(qn->upper))
2397 *max = distance_multiply(*max, qn->upper);
2398 else
2399 *max = ONIG_INFINITE_DISTANCE;
2400 }
2401 }
2402 }
2403 break;
2404
2405 case NT_ENCLOSE:
2406 {
2407 EncloseNode* en = NENCLOSE(node);
2408 switch (en->type) {
2409 case ENCLOSE_MEMORY:
2410 if (IS_ENCLOSE_MAX_FIXED(en))
2411 *max = en->max_len;
2412 else {
2413 if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2414 *max = ONIG_INFINITE_DISTANCE;
2415 else {
2416 SET_ENCLOSE_STATUS(node, NST_MARK1);
2417 r = get_max_match_length(en->target, max, env);
2418 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2419 if (r == 0) {
2420 en->max_len = *max;
2421 SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
2422 }
2423 }
2424 }
2425 break;
2426
2427 case ENCLOSE_OPTION:
2428 case ENCLOSE_STOP_BACKTRACK:
2429 case ENCLOSE_CONDITION:
2430 r = get_max_match_length(en->target, max, env);
2431 break;
2432
2433 case ENCLOSE_ABSENT:
2434 break;
2435 }
2436 }
2437 break;
2438
2439 case NT_ANCHOR:
2440 default:
2441 break;
2442 }
2443
2444 return r;
2445}
2446
2447#define GET_CHAR_LEN_VARLEN -1
2448#define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2449
2450/* fixed size pattern node only */
2451static int
2452get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
2453{
2454 int tlen;
2455 int r = 0;
2456
2457 level++;
2458 *len = 0;
2459 switch (NTYPE(node)) {
2460 case NT_LIST:
2461 do {
2462 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2463 if (r == 0)
2464 *len = (int )distance_add(*len, tlen);
2465 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2466 break;
2467
2468 case NT_ALT:
2469 {
2470 int tlen2;
2471 int varlen = 0;
2472
2473 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2474 while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
2475 r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
2476 if (r == 0) {
2477 if (tlen != tlen2)
2478 varlen = 1;
2479 }
2480 }
2481 if (r == 0) {
2482 if (varlen != 0) {
2483 if (level == 1)
2484 r = GET_CHAR_LEN_TOP_ALT_VARLEN;
2485 else
2486 r = GET_CHAR_LEN_VARLEN;
2487 }
2488 else
2489 *len = tlen;
2490 }
2491 }
2492 break;
2493
2494 case NT_STR:
2495 {
2496 StrNode* sn = NSTR(node);
2497 UChar *s = sn->s;
2498 while (s < sn->end) {
2499 s += enclen(reg->enc, s, sn->end);
2500 (*len)++;
2501 }
2502 }
2503 break;
2504
2505 case NT_QTFR:
2506 {
2507 QtfrNode* qn = NQTFR(node);
2508 if (qn->lower == qn->upper) {
2509 r = get_char_length_tree1(qn->target, reg, &tlen, level);
2510 if (r == 0)
2511 *len = (int )distance_multiply(tlen, qn->lower);
2512 }
2513 else
2514 r = GET_CHAR_LEN_VARLEN;
2515 }
2516 break;
2517
2518#ifdef USE_SUBEXP_CALL
2519 case NT_CALL:
2520 if (! IS_CALL_RECURSION(NCALL(node)))
2521 r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
2522 else
2523 r = GET_CHAR_LEN_VARLEN;
2524 break;
2525#endif
2526
2527 case NT_CTYPE:
2528 *len = 1;
2529 break;
2530
2531 case NT_CCLASS:
2532 case NT_CANY:
2533 *len = 1;
2534 break;
2535
2536 case NT_ENCLOSE:
2537 {
2538 EncloseNode* en = NENCLOSE(node);
2539 switch (en->type) {
2540 case ENCLOSE_MEMORY:
2541#ifdef USE_SUBEXP_CALL
2542 if (IS_ENCLOSE_CLEN_FIXED(en))
2543 *len = en->char_len;
2544 else {
2545 r = get_char_length_tree1(en->target, reg, len, level);
2546 if (r == 0) {
2547 en->char_len = *len;
2548 SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
2549 }
2550 }
2551 break;
2552#endif
2553 case ENCLOSE_OPTION:
2554 case ENCLOSE_STOP_BACKTRACK:
2555 case ENCLOSE_CONDITION:
2556 r = get_char_length_tree1(en->target, reg, len, level);
2557 break;
2558 case ENCLOSE_ABSENT:
2559 default:
2560 break;
2561 }
2562 }
2563 break;
2564
2565 case NT_ANCHOR:
2566 break;
2567
2568 default:
2569 r = GET_CHAR_LEN_VARLEN;
2570 break;
2571 }
2572
2573 return r;
2574}
2575
2576static int
2577get_char_length_tree(Node* node, regex_t* reg, int* len)
2578{
2579 return get_char_length_tree1(node, reg, len, 0);
2580}
2581
2582/* x is not included y ==> 1 : 0 */
2583static int
2584is_not_included(Node* x, Node* y, regex_t* reg)
2585{
2586 int i;
2587 OnigDistance len;
2588 OnigCodePoint code;
2589 UChar *p;
2590 int ytype;
2591
2592 retry:
2593 ytype = NTYPE(y);
2594 switch (NTYPE(x)) {
2595 case NT_CTYPE:
2596 {
2597 switch (ytype) {
2598 case NT_CTYPE:
2599 if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
2600 NCTYPE(y)->not != NCTYPE(x)->not &&
2601 NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
2602 return 1;
2603 else
2604 return 0;
2605 break;
2606
2607 case NT_CCLASS:
2608 swap:
2609 {
2610 Node* tmp;
2611 tmp = x; x = y; y = tmp;
2612 goto retry;
2613 }
2614 break;
2615
2616 case NT_STR:
2617 goto swap;
2618 break;
2619
2620 default:
2621 break;
2622 }
2623 }
2624 break;
2625
2626 case NT_CCLASS:
2627 {
2628 CClassNode* xc = NCCLASS(x);
2629 switch (ytype) {
2630 case NT_CTYPE:
2631 switch (NCTYPE(y)->ctype) {
2632 case ONIGENC_CTYPE_WORD:
2633 if (NCTYPE(y)->not == 0) {
2634 if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2635 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2636 if (BITSET_AT(xc->bs, i)) {
2637 if (NCTYPE(y)->ascii_range) {
2638 if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
2639 }
2640 else {
2641 if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
2642 }
2643 }
2644 }
2645 return 1;
2646 }
2647 return 0;
2648 }
2649 else {
2650 if (IS_NOT_NULL(xc->mbuf)) return 0;
2651 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2652 int is_word;
2653 if (NCTYPE(y)->ascii_range)
2654 is_word = IS_CODE_SB_WORD(reg->enc, i);
2655 else
2656 is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
2657 if (! is_word) {
2658 if (!IS_NCCLASS_NOT(xc)) {
2659 if (BITSET_AT(xc->bs, i))
2660 return 0;
2661 }
2662 else {
2663 if (! BITSET_AT(xc->bs, i))
2664 return 0;
2665 }
2666 }
2667 }
2668 return 1;
2669 }
2670 break;
2671
2672 default:
2673 break;
2674 }
2675 break;
2676
2677 case NT_CCLASS:
2678 {
2679 int v;
2680 CClassNode* yc = NCCLASS(y);
2681
2682 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2683 v = BITSET_AT(xc->bs, i);
2684 if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
2685 (v == 0 && IS_NCCLASS_NOT(xc))) {
2686 v = BITSET_AT(yc->bs, i);
2687 if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2688 (v == 0 && IS_NCCLASS_NOT(yc)))
2689 return 0;
2690 }
2691 }
2692 if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2693 (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2694 return 1;
2695 return 0;
2696 }
2697 break;
2698
2699 case NT_STR:
2700 goto swap;
2701 break;
2702
2703 default:
2704 break;
2705 }
2706 }
2707 break;
2708
2709 case NT_STR:
2710 {
2711 StrNode* xs = NSTR(x);
2712 if (NSTRING_LEN(x) == 0)
2713 break;
2714
2715 switch (ytype) {
2716 case NT_CTYPE:
2717 switch (NCTYPE(y)->ctype) {
2718 case ONIGENC_CTYPE_WORD:
2719 if (NCTYPE(y)->ascii_range) {
2720 if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
2721 return NCTYPE(y)->not;
2722 else
2723 return !(NCTYPE(y)->not);
2724 }
2725 else {
2726 if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2727 return NCTYPE(y)->not;
2728 else
2729 return !(NCTYPE(y)->not);
2730 }
2731 break;
2732 default:
2733 break;
2734 }
2735 break;
2736
2737 case NT_CCLASS:
2738 {
2739 CClassNode* cc = NCCLASS(y);
2740
2741 code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2742 xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2743 return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2744 }
2745 break;
2746
2747 case NT_STR:
2748 {
2749 UChar *q;
2750 StrNode* ys = NSTR(y);
2751 len = NSTRING_LEN(x);
2752 if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
2753 if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
2754 /* tiny version */
2755 return 0;
2756 }
2757 else {
2758 for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) {
2759 if (*p != *q) return 1;
2760 }
2761 }
2762 }
2763 break;
2764
2765 default:
2766 break;
2767 }
2768 }
2769 break;
2770
2771 default:
2772 break;
2773 }
2774
2775 return 0;
2776}
2777
2778static Node*
2779get_head_value_node(Node* node, int exact, regex_t* reg)
2780{
2781 Node* n = NULL_NODE;
2782
2783 switch (NTYPE(node)) {
2784 case NT_BREF:
2785 case NT_ALT:
2786 case NT_CANY:
2787#ifdef USE_SUBEXP_CALL
2788 case NT_CALL:
2789#endif
2790 break;
2791
2792 case NT_CTYPE:
2793 case NT_CCLASS:
2794 if (exact == 0) {
2795 n = node;
2796 }
2797 break;
2798
2799 case NT_LIST:
2800 n = get_head_value_node(NCAR(node), exact, reg);
2801 break;
2802
2803 case NT_STR:
2804 {
2805 StrNode* sn = NSTR(node);
2806 if (sn->end <= sn->s)
2807 break;
2808
2809 if (exact == 0 ||
2810 NSTRING_IS_RAW(node) || !IS_IGNORECASE(reg->options)) {
2811 n = node;
2812 }
2813 }
2814 break;
2815
2816 case NT_QTFR:
2817 {
2818 QtfrNode* qn = NQTFR(node);
2819 if (qn->lower > 0) {
2820#ifdef USE_OP_PUSH_OR_JUMP_EXACT
2821 if (IS_NOT_NULL(qn->head_exact))
2822 n = qn->head_exact;
2823 else
2824#endif
2825 n = get_head_value_node(qn->target, exact, reg);
2826 }
2827 }
2828 break;
2829
2830 case NT_ENCLOSE:
2831 {
2832 EncloseNode* en = NENCLOSE(node);
2833 switch (en->type) {
2834 case ENCLOSE_OPTION:
2835 {
2836 OnigOptionType options = reg->options;
2837
2838 reg->options = NENCLOSE(node)->option;
2839 n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
2840 reg->options = options;
2841 }
2842 break;
2843
2844 case ENCLOSE_MEMORY:
2845 case ENCLOSE_STOP_BACKTRACK:
2846 case ENCLOSE_CONDITION:
2847 n = get_head_value_node(en->target, exact, reg);
2848 break;
2849
2850 case ENCLOSE_ABSENT:
2851 break;
2852 }
2853 }
2854 break;
2855
2856 case NT_ANCHOR:
2857 if (NANCHOR(node)->type == ANCHOR_PREC_READ)
2858 n = get_head_value_node(NANCHOR(node)->target, exact, reg);
2859 break;
2860
2861 default:
2862 break;
2863 }
2864
2865 return n;
2866}
2867
2868static int
2869check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
2870{
2871 int type, r = 0;
2872
2873 type = NTYPE(node);
2874 if ((NTYPE2BIT(type) & type_mask) == 0)
2875 return 1;
2876
2877 switch (type) {
2878 case NT_LIST:
2879 case NT_ALT:
2880 do {
2881 r = check_type_tree(NCAR(node), type_mask, enclose_mask,
2882 anchor_mask);
2883 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2884 break;
2885
2886 case NT_QTFR:
2887 r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
2888 anchor_mask);
2889 break;
2890
2891 case NT_ENCLOSE:
2892 {
2893 EncloseNode* en = NENCLOSE(node);
2894 if ((en->type & enclose_mask) == 0)
2895 return 1;
2896
2897 r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
2898 }
2899 break;
2900
2901 case NT_ANCHOR:
2902 type = NANCHOR(node)->type;
2903 if ((type & anchor_mask) == 0)
2904 return 1;
2905
2906 if (NANCHOR(node)->target)
2907 r = check_type_tree(NANCHOR(node)->target,
2908 type_mask, enclose_mask, anchor_mask);
2909 break;
2910
2911 default:
2912 break;
2913 }
2914 return r;
2915}
2916
2917#ifdef USE_SUBEXP_CALL
2918
2919# define RECURSION_EXIST 1
2920# define RECURSION_INFINITE 2
2921
2922static int
2923subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
2924{
2925 int type;
2926 int r = 0;
2927
2928 type = NTYPE(node);
2929 switch (type) {
2930 case NT_LIST:
2931 {
2932 Node *x;
2933 OnigDistance min;
2934 int ret;
2935
2936 x = node;
2937 do {
2938 ret = subexp_inf_recursive_check(NCAR(x), env, head);
2939 if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2940 r |= ret;
2941 if (head) {
2942 ret = get_min_match_length(NCAR(x), &min, env);
2943 if (ret != 0) return ret;
2944 if (min != 0) head = 0;
2945 }
2946 } while (IS_NOT_NULL(x = NCDR(x)));
2947 }
2948 break;
2949
2950 case NT_ALT:
2951 {
2952 int ret;
2953 r = RECURSION_EXIST;
2954 do {
2955 ret = subexp_inf_recursive_check(NCAR(node), env, head);
2956 if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2957 r &= ret;
2958 } while (IS_NOT_NULL(node = NCDR(node)));
2959 }
2960 break;
2961
2962 case NT_QTFR:
2963 r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
2964 if (r == RECURSION_EXIST) {
2965 if (NQTFR(node)->lower == 0) r = 0;
2966 }
2967 break;
2968
2969 case NT_ANCHOR:
2970 {
2971 AnchorNode* an = NANCHOR(node);
2972 switch (an->type) {
2973 case ANCHOR_PREC_READ:
2974 case ANCHOR_PREC_READ_NOT:
2975 case ANCHOR_LOOK_BEHIND:
2976 case ANCHOR_LOOK_BEHIND_NOT:
2977 r = subexp_inf_recursive_check(an->target, env, head);
2978 break;
2979 }
2980 }
2981 break;
2982
2983 case NT_CALL:
2984 r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
2985 break;
2986
2987 case NT_ENCLOSE:
2988 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2989 return 0;
2990 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2991 return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2992 else {
2993 SET_ENCLOSE_STATUS(node, NST_MARK2);
2994 r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
2995 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
2996 }
2997 break;
2998
2999 default:
3000 break;
3001 }
3002
3003 return r;
3004}
3005
3006static int
3007subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
3008{
3009 int type;
3010 int r = 0;
3011
3012 type = NTYPE(node);
3013 switch (type) {
3014 case NT_LIST:
3015 case NT_ALT:
3016 do {
3017 r = subexp_inf_recursive_check_trav(NCAR(node), env);
3018 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3019 break;
3020
3021 case NT_QTFR:
3022 r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
3023 break;
3024
3025 case NT_ANCHOR:
3026 {
3027 AnchorNode* an = NANCHOR(node);
3028 switch (an->type) {
3029 case ANCHOR_PREC_READ:
3030 case ANCHOR_PREC_READ_NOT:
3031 case ANCHOR_LOOK_BEHIND:
3032 case ANCHOR_LOOK_BEHIND_NOT:
3033 r = subexp_inf_recursive_check_trav(an->target, env);
3034 break;
3035 }
3036 }
3037 break;
3038
3039 case NT_ENCLOSE:
3040 {
3041 EncloseNode* en = NENCLOSE(node);
3042
3043 if (IS_ENCLOSE_RECURSION(en)) {
3044 SET_ENCLOSE_STATUS(node, NST_MARK1);
3045 r = subexp_inf_recursive_check(en->target, env, 1);
3046 if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
3047 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3048 }
3049 r = subexp_inf_recursive_check_trav(en->target, env);
3050 }
3051
3052 break;
3053
3054 default:
3055 break;
3056 }
3057
3058 return r;
3059}
3060
3061static int
3062subexp_recursive_check(Node* node)
3063{
3064 int r = 0;
3065
3066 switch (NTYPE(node)) {
3067 case NT_LIST:
3068 case NT_ALT:
3069 do {
3070 r |= subexp_recursive_check(NCAR(node));
3071 } while (IS_NOT_NULL(node = NCDR(node)));
3072 break;
3073
3074 case NT_QTFR:
3075 r = subexp_recursive_check(NQTFR(node)->target);
3076 break;
3077
3078 case NT_ANCHOR:
3079 {
3080 AnchorNode* an = NANCHOR(node);
3081 switch (an->type) {
3082 case ANCHOR_PREC_READ:
3083 case ANCHOR_PREC_READ_NOT:
3084 case ANCHOR_LOOK_BEHIND:
3085 case ANCHOR_LOOK_BEHIND_NOT:
3086 r = subexp_recursive_check(an->target);
3087 break;
3088 }
3089 }
3090 break;
3091
3092 case NT_CALL:
3093 r = subexp_recursive_check(NCALL(node)->target);
3094 if (r != 0) SET_CALL_RECURSION(node);
3095 break;
3096
3097 case NT_ENCLOSE:
3098 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
3099 return 0;
3100 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
3101 return 1; /* recursion */
3102 else {
3103 SET_ENCLOSE_STATUS(node, NST_MARK2);
3104 r = subexp_recursive_check(NENCLOSE(node)->target);
3105 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
3106 }
3107 break;
3108
3109 default:
3110 break;
3111 }
3112
3113 return r;
3114}
3115
3116
3117static int
3118subexp_recursive_check_trav(Node* node, ScanEnv* env)
3119{
3120# define FOUND_CALLED_NODE 1
3121
3122 int type;
3123 int r = 0;
3124
3125 type = NTYPE(node);
3126 switch (type) {
3127 case NT_LIST:
3128 case NT_ALT:
3129 {
3130 int ret;
3131 do {
3132 ret = subexp_recursive_check_trav(NCAR(node), env);
3133 if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
3134 else if (ret < 0) return ret;
3135 } while (IS_NOT_NULL(node = NCDR(node)));
3136 }
3137 break;
3138
3139 case NT_QTFR:
3140 r = subexp_recursive_check_trav(NQTFR(node)->target, env);
3141 if (NQTFR(node)->upper == 0) {
3142 if (r == FOUND_CALLED_NODE)
3143 NQTFR(node)->is_referred = 1;
3144 }
3145 break;
3146
3147 case NT_ANCHOR:
3148 {
3149 AnchorNode* an = NANCHOR(node);
3150 switch (an->type) {
3151 case ANCHOR_PREC_READ:
3152 case ANCHOR_PREC_READ_NOT:
3153 case ANCHOR_LOOK_BEHIND:
3154 case ANCHOR_LOOK_BEHIND_NOT:
3155 r = subexp_recursive_check_trav(an->target, env);
3156 break;
3157 }
3158 }
3159 break;
3160
3161 case NT_ENCLOSE:
3162 {
3163 EncloseNode* en = NENCLOSE(node);
3164
3165 if (! IS_ENCLOSE_RECURSION(en)) {
3166 if (IS_ENCLOSE_CALLED(en)) {
3167 SET_ENCLOSE_STATUS(node, NST_MARK1);
3168 r = subexp_recursive_check(en->target);
3169 if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
3170 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3171 }
3172 }
3173 r = subexp_recursive_check_trav(en->target, env);
3174 if (IS_ENCLOSE_CALLED(en))
3175 r |= FOUND_CALLED_NODE;
3176 }
3177 break;
3178
3179 default:
3180 break;
3181 }
3182
3183 return r;
3184}
3185
3186static int
3187setup_subexp_call(Node* node, ScanEnv* env)
3188{
3189 int type;
3190 int r = 0;
3191
3192 type = NTYPE(node);
3193 switch (type) {
3194 case NT_LIST:
3195 do {
3196 r = setup_subexp_call(NCAR(node), env);
3197 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3198 break;
3199
3200 case NT_ALT:
3201 do {
3202 r = setup_subexp_call(NCAR(node), env);
3203 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3204 break;
3205
3206 case NT_QTFR:
3207 r = setup_subexp_call(NQTFR(node)->target, env);
3208 break;
3209 case NT_ENCLOSE:
3210 r = setup_subexp_call(NENCLOSE(node)->target, env);
3211 break;
3212
3213 case NT_CALL:
3214 {
3215 CallNode* cn = NCALL(node);
3216 Node** nodes = SCANENV_MEM_NODES(env);
3217
3218 if (cn->group_num != 0) {
3219 int gnum = cn->group_num;
3220
3221# ifdef USE_NAMED_GROUP
3222 if (env->num_named > 0 &&
3223 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
3224 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
3225 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
3226 }
3227# endif
3228 if (gnum > env->num_mem) {
3229 onig_scan_env_set_error_string(env,
3230 ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
3231 return ONIGERR_UNDEFINED_GROUP_REFERENCE;
3232 }
3233
3234# ifdef USE_NAMED_GROUP
3235 set_call_attr:
3236# endif
3237 cn->target = nodes[cn->group_num];
3238 if (IS_NULL(cn->target)) {
3239 onig_scan_env_set_error_string(env,
3240 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3241 return ONIGERR_UNDEFINED_NAME_REFERENCE;
3242 }
3243 SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
3244 BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
3245 cn->unset_addr_list = env->unset_addr_list;
3246 }
3247# ifdef USE_NAMED_GROUP
3248# ifdef USE_PERL_SUBEXP_CALL
3249 else if (cn->name == cn->name_end) {
3250 goto set_call_attr;
3251 }
3252# endif
3253 else {
3254 int *refs;
3255
3256 int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
3257 &refs);
3258 if (n <= 0) {
3259 onig_scan_env_set_error_string(env,
3260 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3261 return ONIGERR_UNDEFINED_NAME_REFERENCE;
3262 }
3263 else if (n > 1 &&
3264 ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL)) {
3265 onig_scan_env_set_error_string(env,
3266 ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
3267 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
3268 }
3269 else {
3270 cn->group_num = refs[0];
3271 goto set_call_attr;
3272 }
3273 }
3274# endif
3275 }
3276 break;
3277
3278 case NT_ANCHOR:
3279 {
3280 AnchorNode* an = NANCHOR(node);
3281
3282 switch (an->type) {
3283 case ANCHOR_PREC_READ:
3284 case ANCHOR_PREC_READ_NOT:
3285 case ANCHOR_LOOK_BEHIND:
3286 case ANCHOR_LOOK_BEHIND_NOT:
3287 r = setup_subexp_call(an->target, env);
3288 break;
3289 }
3290 }
3291 break;
3292
3293 default:
3294 break;
3295 }
3296
3297 return r;
3298}
3299#endif
3300
3301#define IN_ALT (1<<0)
3302#define IN_NOT (1<<1)
3303#define IN_REPEAT (1<<2)
3304#define IN_VAR_REPEAT (1<<3)
3305#define IN_CALL (1<<4)
3306#define IN_RECCALL (1<<5)
3307#define IN_LOOK_BEHIND (1<<6)
3308
3309/* divide different length alternatives in look-behind.
3310 (?<=A|B) ==> (?<=A)|(?<=B)
3311 (?<!A|B) ==> (?<!A)(?<!B)
3312*/
3313static int
3314divide_look_behind_alternatives(Node* node)
3315{
3316 Node *head, *np, *insert_node;
3317 AnchorNode* an = NANCHOR(node);
3318 int anc_type = an->type;
3319
3320 head = an->target;
3321 np = NCAR(head);
3322 swap_node(node, head);
3323 NCAR(node) = head;
3324 NANCHOR(head)->target = np;
3325
3326 np = node;
3327 while ((np = NCDR(np)) != NULL_NODE) {
3328 insert_node = onig_node_new_anchor(anc_type);
3329 CHECK_NULL_RETURN_MEMERR(insert_node);
3330 NANCHOR(insert_node)->target = NCAR(np);
3331 NCAR(np) = insert_node;
3332 }
3333
3334 if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3335 np = node;
3336 do {
3337 SET_NTYPE(np, NT_LIST); /* alt -> list */
3338 } while ((np = NCDR(np)) != NULL_NODE);
3339 }
3340 return 0;
3341}
3342
3343static int
3344setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
3345{
3346 int r, len;
3347 AnchorNode* an = NANCHOR(node);
3348
3349 r = get_char_length_tree(an->target, reg, &len);
3350 if (r == 0)
3351 an->char_len = len;
3352 else if (r == GET_CHAR_LEN_VARLEN)
3353 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3354 else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3355 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
3356 r = divide_look_behind_alternatives(node);
3357 else
3358 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3359 }
3360
3361 return r;
3362}
3363
3364static int
3365next_setup(Node* node, Node* next_node, regex_t* reg)
3366{
3367 int type;
3368
3369 retry:
3370 type = NTYPE(node);
3371 if (type == NT_QTFR) {
3372 QtfrNode* qn = NQTFR(node);
3373 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3374#ifdef USE_QTFR_PEEK_NEXT
3375 Node* n = get_head_value_node(next_node, 1, reg);
3376 /* '\0': for UTF-16BE etc... */
3377 if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
3378 qn->next_head_exact = n;
3379 }
3380#endif
3381 /* automatic possessification a*b ==> (?>a*)b */
3382 if (qn->lower <= 1) {
3383 int ttype = NTYPE(qn->target);
3384 if (IS_NODE_TYPE_SIMPLE(ttype)) {
3385 Node *x, *y;
3386 x = get_head_value_node(qn->target, 0, reg);
3387 if (IS_NOT_NULL(x)) {
3388 y = get_head_value_node(next_node, 0, reg);
3389 if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3390 Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
3391 CHECK_NULL_RETURN_MEMERR(en);
3392 SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
3393 swap_node(node, en);
3394 NENCLOSE(node)->target = en;
3395 }
3396 }
3397 }
3398 }
3399 }
3400 }
3401 else if (type == NT_ENCLOSE) {
3402 EncloseNode* en = NENCLOSE(node);
3403 if (en->type == ENCLOSE_MEMORY && !IS_ENCLOSE_CALLED(en)) {
3404 node = en->target;
3405 goto retry;
3406 }
3407 }
3408 return 0;
3409}
3410
3411
3412static int
3413update_string_node_case_fold(regex_t* reg, Node *node)
3414{
3415 UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3416 UChar *sbuf, *ebuf, *sp;
3417 int r, i, len;
3418 OnigDistance sbuf_size;
3419 StrNode* sn = NSTR(node);
3420
3421 end = sn->end;
3422 sbuf_size = (end - sn->s) * 2;
3423 sbuf = (UChar* )xmalloc(sbuf_size);
3424 CHECK_NULL_RETURN_MEMERR(sbuf);
3425 ebuf = sbuf + sbuf_size;
3426
3427 sp = sbuf;
3428 p = sn->s;
3429 while (p < end) {
3430 len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3431 for (i = 0; i < len; i++) {
3432 if (sp >= ebuf) {
3433 UChar* p = (UChar* )xrealloc(sbuf, sbuf_size * 2);
3434 if (IS_NULL(p)) {
3435 xfree(sbuf);
3436 return ONIGERR_MEMORY;
3437 }
3438 sbuf = p;
3439 sp = sbuf + sbuf_size;
3440 sbuf_size *= 2;
3441 ebuf = sbuf + sbuf_size;
3442 }
3443
3444 *sp++ = buf[i];
3445 }
3446 }
3447
3448 r = onig_node_str_set(node, sbuf, sp);
3449
3450 xfree(sbuf);
3451 return r;
3452}
3453
3454static int
3455expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
3456 regex_t* reg)
3457{
3458 int r;
3459 Node *node;
3460
3461 node = onig_node_new_str(s, end);
3462 if (IS_NULL(node)) return ONIGERR_MEMORY;
3463
3464 r = update_string_node_case_fold(reg, node);
3465 if (r != 0) {
3466 onig_node_free(node);
3467 return r;
3468 }
3469
3470 NSTRING_SET_AMBIG(node);
3471 NSTRING_SET_DONT_GET_OPT_INFO(node);
3472 *rnode = node;
3473 return 0;
3474}
3475
3476static int
3477is_case_fold_variable_len(int item_num, OnigCaseFoldCodeItem items[],
3478 int slen)
3479{
3480 int i;
3481
3482 for (i = 0; i < item_num; i++) {
3483 if (items[i].byte_len != slen) {
3484 return 1;
3485 }
3486 if (items[i].code_len != 1) {
3487 return 1;
3488 }
3489 }
3490 return 0;
3491}
3492
3493static int
3494expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
3495 UChar *p, int slen, UChar *end,
3496 regex_t* reg, Node **rnode)
3497{
3498 int r, i, j, len, varlen;
3499 Node *anode, *var_anode, *snode, *xnode, *an;
3500 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
3501
3502 *rnode = var_anode = NULL_NODE;
3503
3504 varlen = 0;
3505 for (i = 0; i < item_num; i++) {
3506 if (items[i].byte_len != slen) {
3507 varlen = 1;
3508 break;
3509 }
3510 }
3511
3512 if (varlen != 0) {
3513 *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3514 if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
3515
3516 xnode = onig_node_new_list(NULL, NULL);
3517 if (IS_NULL(xnode)) goto mem_err;
3518 NCAR(var_anode) = xnode;
3519
3520 anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3521 if (IS_NULL(anode)) goto mem_err;
3522 NCAR(xnode) = anode;
3523 }
3524 else {
3525 *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3526 if (IS_NULL(anode)) return ONIGERR_MEMORY;
3527 }
3528
3529 snode = onig_node_new_str(p, p + slen);
3530 if (IS_NULL(snode)) goto mem_err;
3531
3532 NCAR(anode) = snode;
3533
3534 for (i = 0; i < item_num; i++) {
3535 snode = onig_node_new_str(NULL, NULL);
3536 if (IS_NULL(snode)) goto mem_err;
3537
3538 for (j = 0; j < items[i].code_len; j++) {
3539 len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3540 if (len < 0) {
3541 r = len;
3542 goto mem_err2;
3543 }
3544
3545 r = onig_node_str_cat(snode, buf, buf + len);
3546 if (r != 0) goto mem_err2;
3547 }
3548
3549 an = onig_node_new_alt(NULL_NODE, NULL_NODE);
3550 if (IS_NULL(an)) {
3551 goto mem_err2;
3552 }
3553
3554 if (items[i].byte_len != slen) {
3555 Node *rem;
3556 UChar *q = p + items[i].byte_len;
3557
3558 if (q < end) {
3559 r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3560 if (r != 0) {
3561 onig_node_free(an);
3562 goto mem_err2;
3563 }
3564
3565 xnode = onig_node_list_add(NULL_NODE, snode);
3566 if (IS_NULL(xnode)) {
3567 onig_node_free(an);
3568 onig_node_free(rem);
3569 goto mem_err2;
3570 }
3571 if (IS_NULL(onig_node_list_add(xnode, rem))) {
3572 onig_node_free(an);
3573 onig_node_free(xnode);
3574 onig_node_free(rem);
3575 goto mem_err;
3576 }
3577
3578 NCAR(an) = xnode;
3579 }
3580 else {
3581 NCAR(an) = snode;
3582 }
3583
3584 NCDR(var_anode) = an;
3585 var_anode = an;
3586 }
3587 else {
3588 NCAR(an) = snode;
3589 NCDR(anode) = an;
3590 anode = an;
3591 }
3592 }
3593
3594 return varlen;
3595
3596 mem_err2:
3597 onig_node_free(snode);
3598
3599 mem_err:
3600 onig_node_free(*rnode);
3601
3602 return ONIGERR_MEMORY;
3603}
3604
3605#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3606
3607static int
3608expand_case_fold_string(Node* node, regex_t* reg, int state)
3609{
3610 int r, n, len, alt_num;
3611 int varlen = 0;
3612 int is_in_look_behind;
3613 UChar *start, *end, *p;
3614 Node *top_root, *root, *snode, *prev_node;
3615 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
3616 StrNode* sn;
3617
3618 if (NSTRING_IS_AMBIG(node)) return 0;
3619
3620 sn = NSTR(node);
3621
3622 start = sn->s;
3623 end = sn->end;
3624 if (start >= end) return 0;
3625
3626 is_in_look_behind = (state & IN_LOOK_BEHIND) != 0;
3627
3628 r = 0;
3629 top_root = root = prev_node = snode = NULL_NODE;
3630 alt_num = 1;
3631 p = start;
3632 while (p < end) {
3633 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
3634 p, end, items);
3635 if (n < 0) {
3636 r = n;
3637 goto err;
3638 }
3639
3640 len = enclen(reg->enc, p, end);
3641
3642 varlen = is_case_fold_variable_len(n, items, len);
3643 if (n == 0 || varlen == 0 || is_in_look_behind) {
3644 if (IS_NULL(snode)) {
3645 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3646 onig_node_free(top_root);
3647 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3648 if (IS_NULL(root)) {
3649 onig_node_free(prev_node);
3650 goto mem_err;
3651 }
3652 }
3653
3654 prev_node = snode = onig_node_new_str(NULL, NULL);
3655 if (IS_NULL(snode)) goto mem_err;
3656 if (IS_NOT_NULL(root)) {
3657 if (IS_NULL(onig_node_list_add(root, snode))) {
3658 onig_node_free(snode);
3659 goto mem_err;
3660 }
3661 }
3662 }
3663
3664 r = onig_node_str_cat(snode, p, p + len);
3665 if (r != 0) goto err;
3666 }
3667 else {
3668 alt_num *= (n + 1);
3669 if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
3670
3671 if (IS_NOT_NULL(snode)) {
3672 r = update_string_node_case_fold(reg, snode);
3673 if (r == 0) {
3674 NSTRING_SET_AMBIG(snode);
3675 }
3676 }
3677 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3678 onig_node_free(top_root);
3679 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3680 if (IS_NULL(root)) {
3681 onig_node_free(prev_node);
3682 goto mem_err;
3683 }
3684 }
3685
3686 r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
3687 if (r < 0) goto mem_err;
3688 if (r == 1) {
3689 if (IS_NULL(root)) {
3690 top_root = prev_node;
3691 }
3692 else {
3693 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3694 onig_node_free(prev_node);
3695 goto mem_err;
3696 }
3697 }
3698
3699 root = NCAR(prev_node);
3700 }
3701 else { /* r == 0 */
3702 if (IS_NOT_NULL(root)) {
3703 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3704 onig_node_free(prev_node);
3705 goto mem_err;
3706 }
3707 }
3708 }
3709
3710 snode = NULL_NODE;
3711 }
3712
3713 p += len;
3714 }
3715 if (IS_NOT_NULL(snode)) {
3716 r = update_string_node_case_fold(reg, snode);
3717 if (r == 0) {
3718 NSTRING_SET_AMBIG(snode);
3719 }
3720 }
3721
3722 if (p < end) {
3723 Node *srem;
3724
3725 r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3726 if (r != 0) goto mem_err;
3727
3728 if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3729 onig_node_free(top_root);
3730 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3731 if (IS_NULL(root)) {
3732 onig_node_free(srem);
3733 onig_node_free(prev_node);
3734 goto mem_err;
3735 }
3736 }
3737
3738 if (IS_NULL(root)) {
3739 prev_node = srem;
3740 }
3741 else {
3742 if (IS_NULL(onig_node_list_add(root, srem))) {
3743 onig_node_free(srem);
3744 goto mem_err;
3745 }
3746 }
3747 }
3748
3749 /* ending */
3750 top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3751 swap_node(node, top_root);
3752 onig_node_free(top_root);
3753 return 0;
3754
3755 mem_err:
3756 r = ONIGERR_MEMORY;
3757
3758 err:
3759 onig_node_free(top_root);
3760 return r;
3761}
3762
3763
3764#ifdef USE_COMBINATION_EXPLOSION_CHECK
3765
3766# define CEC_THRES_NUM_BIG_REPEAT 512
3767# define CEC_INFINITE_NUM 0x7fffffff
3768
3769# define CEC_IN_INFINITE_REPEAT (1<<0)
3770# define CEC_IN_FINITE_REPEAT (1<<1)
3771# define CEC_CONT_BIG_REPEAT (1<<2)
3772
3773static int
3774setup_comb_exp_check(Node* node, int state, ScanEnv* env)
3775{
3776 int type;
3777 int r = state;
3778
3779 type = NTYPE(node);
3780 switch (type) {
3781 case NT_LIST:
3782 {
3783 do {
3784 r = setup_comb_exp_check(NCAR(node), r, env);
3785 } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
3786 }
3787 break;
3788
3789 case NT_ALT:
3790 {
3791 int ret;
3792 do {
3793 ret = setup_comb_exp_check(NCAR(node), state, env);
3794 r |= ret;
3795 } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
3796 }
3797 break;
3798
3799 case NT_QTFR:
3800 {
3801 int child_state = state;
3802 int add_state = 0;
3803 QtfrNode* qn = NQTFR(node);
3804 Node* target = qn->target;
3805 int var_num;
3806
3807 if (! IS_REPEAT_INFINITE(qn->upper)) {
3808 if (qn->upper > 1) {
3809 /* {0,1}, {1,1} are allowed */
3810 child_state |= CEC_IN_FINITE_REPEAT;
3811
3812 /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
3813 if (env->backrefed_mem == 0) {
3814 if (NTYPE(qn->target) == NT_ENCLOSE) {
3815 EncloseNode* en = NENCLOSE(qn->target);
3816 if (en->type == ENCLOSE_MEMORY) {
3817 if (NTYPE(en->target) == NT_QTFR) {
3818 QtfrNode* q = NQTFR(en->target);
3819 if (IS_REPEAT_INFINITE(q->upper)
3820 && q->greedy == qn->greedy) {
3821 qn->upper = (qn->lower == 0 ? 1 : qn->lower);
3822 if (qn->upper == 1)
3823 child_state = state;
3824 }
3825 }
3826 }
3827 }
3828 }
3829 }
3830 }
3831
3832 if (state & CEC_IN_FINITE_REPEAT) {
3833 qn->comb_exp_check_num = -1;
3834 }
3835 else {
3836 if (IS_REPEAT_INFINITE(qn->upper)) {
3837 var_num = CEC_INFINITE_NUM;
3838 child_state |= CEC_IN_INFINITE_REPEAT;
3839 }
3840 else {
3841 var_num = qn->upper - qn->lower;
3842 }
3843
3844 if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3845 add_state |= CEC_CONT_BIG_REPEAT;
3846
3847 if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3848 ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3849 var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3850 if (qn->comb_exp_check_num == 0) {
3851 env->num_comb_exp_check++;
3852 qn->comb_exp_check_num = env->num_comb_exp_check;
3853 if (env->curr_max_regnum > env->comb_exp_max_regnum)
3854 env->comb_exp_max_regnum = env->curr_max_regnum;
3855 }
3856 }
3857 }
3858
3859 r = setup_comb_exp_check(target, child_state, env);
3860 r |= add_state;
3861 }
3862 break;
3863
3864 case NT_ENCLOSE:
3865 {
3866 EncloseNode* en = NENCLOSE(node);
3867
3868 switch (en->type) {
3869 case ENCLOSE_MEMORY:
3870 {
3871 if (env->curr_max_regnum < en->regnum)
3872 env->curr_max_regnum = en->regnum;
3873
3874 r = setup_comb_exp_check(en->target, state, env);
3875 }
3876 break;
3877
3878 default:
3879 r = setup_comb_exp_check(en->target, state, env);
3880 break;
3881 }
3882 }
3883 break;
3884
3885# ifdef USE_SUBEXP_CALL
3886 case NT_CALL:
3887 if (IS_CALL_RECURSION(NCALL(node)))
3888 env->has_recursion = 1;
3889 else
3890 r = setup_comb_exp_check(NCALL(node)->target, state, env);
3891 break;
3892# endif
3893
3894 default:
3895 break;
3896 }
3897
3898 return r;
3899}
3900#endif
3901
3902/* setup_tree does the following work.
3903 1. check empty loop. (set qn->target_empty_info)
3904 2. expand ignore-case in char class.
3905 3. set memory status bit flags. (reg->mem_stats)
3906 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
3907 5. find invalid patterns in look-behind.
3908 6. expand repeated string.
3909 */
3910static int
3911setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
3912{
3913 int type;
3914 int r = 0;
3915
3916restart:
3917 type = NTYPE(node);
3918 switch (type) {
3919 case NT_LIST:
3920 {
3921 Node* prev = NULL_NODE;
3922 do {
3923 r = setup_tree(NCAR(node), reg, state, env);
3924 if (IS_NOT_NULL(prev) && r == 0) {
3925 r = next_setup(prev, NCAR(node), reg);
3926 }
3927 prev = NCAR(node);
3928 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3929 }
3930 break;
3931
3932 case NT_ALT:
3933 do {
3934 r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
3935 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3936 break;
3937
3938 case NT_CCLASS:
3939 break;
3940
3941 case NT_STR:
3942 if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
3943 r = expand_case_fold_string(node, reg, state);
3944 }
3945 break;
3946
3947 case NT_CTYPE:
3948 case NT_CANY:
3949 break;
3950
3951#ifdef USE_SUBEXP_CALL
3952 case NT_CALL:
3953 break;
3954#endif
3955
3956 case NT_BREF:
3957 {
3958 int i;
3959 int* p;
3960 Node** nodes = SCANENV_MEM_NODES(env);
3961 BRefNode* br = NBREF(node);
3962 p = BACKREFS_P(br);
3963 for (i = 0; i < br->back_num; i++) {
3964 if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
3965 BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
3966 BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
3967#ifdef USE_BACKREF_WITH_LEVEL
3968 if (IS_BACKREF_NEST_LEVEL(br)) {
3969 BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
3970 }
3971#endif
3972 SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
3973 }
3974 }
3975 break;
3976
3977 case NT_QTFR:
3978 {
3979 OnigDistance d;
3980 QtfrNode* qn = NQTFR(node);
3981 Node* target = qn->target;
3982
3983 if ((state & IN_REPEAT) != 0) {
3984 qn->state |= NST_IN_REPEAT;
3985 }
3986
3987 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
3988 r = get_min_match_length(target, &d, env);
3989 if (r) break;
3990 if (d == 0) {
3991 qn->target_empty_info = NQ_TARGET_IS_EMPTY;
3992#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3993 r = quantifiers_memory_node_info(target);
3994 if (r < 0) break;
3995 if (r > 0) {
3996 qn->target_empty_info = r;
3997 }
3998#endif
3999#if 0
4000 r = get_max_match_length(target, &d, env);
4001 if (r == 0 && d == 0) {
4002 /* ()* ==> ()?, ()+ ==> () */
4003 qn->upper = 1;
4004 if (qn->lower > 1) qn->lower = 1;
4005 if (NTYPE(target) == NT_STR) {
4006 qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */
4007 }
4008 }
4009#endif
4010 }
4011 }
4012
4013 state |= IN_REPEAT;
4014 if (qn->lower != qn->upper)
4015 state |= IN_VAR_REPEAT;
4016 r = setup_tree(target, reg, state, env);
4017 if (r) break;
4018
4019 /* expand string */
4020#define EXPAND_STRING_MAX_LENGTH 100
4021 if (NTYPE(target) == NT_STR) {
4022 if (qn->lower > 1) {
4023 int i, n = qn->lower;
4024 OnigDistance len = NSTRING_LEN(target);
4025 StrNode* sn = NSTR(target);
4026 Node* np;
4027
4028 np = onig_node_new_str(sn->s, sn->end);
4029 if (IS_NULL(np)) return ONIGERR_MEMORY;
4030 NSTR(np)->flag = sn->flag;
4031
4032 for (i = 1; i < n && (i+1) * len <= EXPAND_STRING_MAX_LENGTH; i++) {
4033 r = onig_node_str_cat(np, sn->s, sn->end);
4034 if (r) {
4035 onig_node_free(np);
4036 return r;
4037 }
4038 }
4039 if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
4040 Node *np1, *np2;
4041
4042 qn->lower -= i;
4043 if (! IS_REPEAT_INFINITE(qn->upper))
4044 qn->upper -= i;
4045
4046 np1 = onig_node_new_list(np, NULL);
4047 if (IS_NULL(np1)) {
4048 onig_node_free(np);
4049 return ONIGERR_MEMORY;
4050 }
4051 swap_node(np1, node);
4052 np2 = onig_node_list_add(node, np1);
4053 if (IS_NULL(np2)) {
4054 onig_node_free(np1);
4055 return ONIGERR_MEMORY;
4056 }
4057 }
4058 else {
4059 swap_node(np, node);
4060 onig_node_free(np);
4061 }
4062 break; /* break case NT_QTFR: */
4063 }
4064 }
4065
4066#ifdef USE_OP_PUSH_OR_JUMP_EXACT
4067 if (qn->greedy && (qn->target_empty_info != 0)) {
4068 if (NTYPE(target) == NT_QTFR) {
4069 QtfrNode* tqn = NQTFR(target);
4070 if (IS_NOT_NULL(tqn->head_exact)) {
4071 qn->head_exact = tqn->head_exact;
4072 tqn->head_exact = NULL;
4073 }
4074 }
4075 else {
4076 qn->head_exact = get_head_value_node(qn->target, 1, reg);
4077 }
4078 }
4079#endif
4080 }
4081 break;
4082
4083 case NT_ENCLOSE:
4084 {
4085 EncloseNode* en = NENCLOSE(node);
4086
4087 switch (en->type) {
4088 case ENCLOSE_OPTION:
4089 {
4090 OnigOptionType options = reg->options;
4091 reg->options = NENCLOSE(node)->option;
4092 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4093 reg->options = options;
4094 }
4095 break;
4096
4097 case ENCLOSE_MEMORY:
4098 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) {
4099 BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
4100 /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
4101 }
4102 if (IS_ENCLOSE_CALLED(en))
4103 state |= IN_CALL;
4104 if (IS_ENCLOSE_RECURSION(en))
4105 state |= IN_RECCALL;
4106 else if ((state & IN_RECCALL) != 0)
4107 SET_CALL_RECURSION(node);
4108 r = setup_tree(en->target, reg, state, env);
4109 break;
4110
4111 case ENCLOSE_STOP_BACKTRACK:
4112 {
4113 Node* target = en->target;
4114 r = setup_tree(target, reg, state, env);
4115 if (NTYPE(target) == NT_QTFR) {
4116 QtfrNode* tqn = NQTFR(target);
4117 if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
4118 tqn->greedy != 0) { /* (?>a*), a*+ etc... */
4119 int qtype = NTYPE(tqn->target);
4120 if (IS_NODE_TYPE_SIMPLE(qtype))
4121 SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
4122 }
4123 }
4124 }
4125 break;
4126
4127 case ENCLOSE_CONDITION:
4128#ifdef USE_NAMED_GROUP
4129 if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
4130 env->num_named > 0 &&
4131 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
4132 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
4133 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
4134 }
4135#endif
4136 if (NENCLOSE(node)->regnum > env->num_mem)
4137 return ONIGERR_INVALID_BACKREF;
4138 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4139 break;
4140
4141 case ENCLOSE_ABSENT:
4142 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4143 break;
4144 }
4145 }
4146 break;
4147
4148 case NT_ANCHOR:
4149 {
4150 AnchorNode* an = NANCHOR(node);
4151
4152 switch (an->type) {
4153 case ANCHOR_PREC_READ:
4154 r = setup_tree(an->target, reg, state, env);
4155 break;
4156 case ANCHOR_PREC_READ_NOT:
4157 r = setup_tree(an->target, reg, (state | IN_NOT), env);
4158 break;
4159
4160/* allowed node types in look-behind */
4161#define ALLOWED_TYPE_IN_LB \
4162 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
4163 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
4164
4165#define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
4166#define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION
4167
4168#define ALLOWED_ANCHOR_IN_LB \
4169( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4170 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4171 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4172 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4173#define ALLOWED_ANCHOR_IN_LB_NOT \
4174( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4175 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4176 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4177 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4178
4179 case ANCHOR_LOOK_BEHIND:
4180 {
4181 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4182 ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
4183 if (r < 0) return r;
4184 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4185 if (NTYPE(node) != NT_ANCHOR) goto restart;
4186 r = setup_tree(an->target, reg, (state | IN_LOOK_BEHIND), env);
4187 if (r != 0) return r;
4188 r = setup_look_behind(node, reg, env);
4189 }
4190 break;
4191
4192 case ANCHOR_LOOK_BEHIND_NOT:
4193 {
4194 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4195 ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
4196 if (r < 0) return r;
4197 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4198 if (NTYPE(node) != NT_ANCHOR) goto restart;
4199 r = setup_tree(an->target, reg, (state | IN_NOT | IN_LOOK_BEHIND),
4200 env);
4201 if (r != 0) return r;
4202 r = setup_look_behind(node, reg, env);
4203 }
4204 break;
4205 }
4206 }
4207 break;
4208
4209 default:
4210 break;
4211 }
4212
4213 return r;
4214}
4215
4216/* set skip map for Sunday's quick search */
4217static int
4218set_bm_skip(UChar* s, UChar* end, regex_t* reg,
4219 UChar skip[], int** int_skip, int ignore_case)
4220{
4221 OnigDistance i, len;
4222 int clen, flen, n, j, k;
4223 UChar *p, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
4224 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4225 OnigEncoding enc = reg->enc;
4226
4227 len = end - s;
4228 if (len < ONIG_CHAR_TABLE_SIZE) {
4229 if (ignore_case) {
4230 for (i = 0; i < len; i += clen) {
4231 p = s + i;
4232 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4233 p, end, items);
4234 clen = enclen(enc, p, end);
4235 if (p + clen > end)
4236 clen = (int )(end - p);
4237
4238 for (j = 0; j < n; j++) {
4239 if ((items[j].code_len != 1) || (items[j].byte_len != clen)) {
4240 /* Different length isn't supported. Stop optimization at here. */
4241 end = p;
4242 goto endcheck;
4243 }
4244 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf);
4245 if (flen != clen) {
4246 /* Different length isn't supported. Stop optimization at here. */
4247 end = p;
4248 goto endcheck;
4249 }
4250 }
4251 }
4252endcheck:
4253 ;
4254 }
4255
4256 len = end - s;
4257 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
4258 skip[i] = (UChar )(len + 1);
4259 n = 0;
4260 for (i = 0; i < len; i += clen) {
4261 p = s + i;
4262 if (ignore_case)
4263 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4264 p, end, items);
4265 clen = enclen(enc, p, end);
4266 if (p + clen > end)
4267 clen = (int )(end - p);
4268
4269 for (j = 0; j < clen; j++) {
4270 skip[s[i + j]] = (UChar )(len - i - j);
4271 for (k = 0; k < n; k++) {
4272 ONIGENC_CODE_TO_MBC(enc, items[k].code[0], buf);
4273 skip[buf[j]] = (UChar )(len - i - j);
4274 }
4275 }
4276 }
4277 }
4278 else {
4279# if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
4280 /* This should not happen. */
4281 return ONIGERR_TYPE_BUG;
4282# else
4283 if (IS_NULL(*int_skip)) {
4284 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4285 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
4286 }
4287 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )(len + 1);
4288
4289 n = 0;
4290 for (i = 0; i < len; i += clen) {
4291 p = s + i;
4292 if (ignore_case)
4293 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4294 p, end, items);
4295 clen = enclen(enc, p, end);
4296 if (p + clen > end)
4297 clen = (int )(end - p);
4298
4299 for (j = 0; j < n; j++) {
4300 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4301 return 1; /* different length isn't supported. */
4302 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4303 if (flen != clen)
4304 return 1; /* different length isn't supported. */
4305 }
4306 for (j = 0; j < clen; j++) {
4307 (*int_skip)[s[i + j]] = (int )(len - i - j);
4308 for (k = 0; k < n; k++) {
4309 (*int_skip)[buf[k][j]] = (int )(len - i - j);
4310 }
4311 }
4312 }
4313# endif
4314 }
4315 return (int)len;
4316}
4317
4318typedef struct {
4319 OnigDistance min; /* min byte length */
4320 OnigDistance max; /* max byte length */
4321} MinMaxLen;
4322
4323typedef struct {
4324 MinMaxLen mmd;
4325 OnigEncoding enc;
4326 OnigOptionType options;
4327 OnigCaseFoldType case_fold_flag;
4328 ScanEnv* scan_env;
4329} OptEnv;
4330
4331typedef struct {
4332 int left_anchor;
4333 int right_anchor;
4334} OptAncInfo;
4335
4336typedef struct {
4337 MinMaxLen mmd; /* info position */
4338 OptAncInfo anc;
4339
4340 int reach_end;
4341 int ignore_case; /* -1: unset, 0: case sensitive, 1: ignore case */
4342 int len;
4343 UChar s[OPT_EXACT_MAXLEN];
4344} OptExactInfo;
4345
4346typedef struct {
4347 MinMaxLen mmd; /* info position */
4348 OptAncInfo anc;
4349
4350 int value; /* weighted value */
4351 UChar map[ONIG_CHAR_TABLE_SIZE];
4352} OptMapInfo;
4353
4354typedef struct {
4355 MinMaxLen len;
4356
4357 OptAncInfo anc;
4358 OptExactInfo exb; /* boundary */
4359 OptExactInfo exm; /* middle */
4360 OptExactInfo expr; /* prec read (?=...) */
4361
4362 OptMapInfo map; /* boundary */
4363} NodeOptInfo;
4364
4365
4366static int
4367map_position_value(OnigEncoding enc, int i)
4368{
4369 static const short int ByteValTable[] = {
4370 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
4371 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4372 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
4373 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
4374 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4375 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
4376 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4377 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
4378 };
4379
4380 if (i < numberof(ByteValTable)) {
4381 if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4382 return 20;
4383 else
4384 return (int )ByteValTable[i];
4385 }
4386 else
4387 return 4; /* Take it easy. */
4388}
4389
4390static int
4391distance_value(MinMaxLen* mm)
4392{
4393 /* 1000 / (min-max-dist + 1) */
4394 static const short int dist_vals[] = {
4395 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4396 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4397 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4398 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4399 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4400 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4401 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4402 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4403 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4404 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4405 };
4406
4407 OnigDistance d;
4408
4409 if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
4410
4411 d = mm->max - mm->min;
4412 if (d < numberof(dist_vals))
4413 /* return dist_vals[d] * 16 / (mm->min + 12); */
4414 return (int )dist_vals[d];
4415 else
4416 return 1;
4417}
4418
4419static int
4420comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
4421{
4422 if (v2 <= 0) return -1;
4423 if (v1 <= 0) return 1;
4424
4425 v1 *= distance_value(d1);
4426 v2 *= distance_value(d2);
4427
4428 if (v2 > v1) return 1;
4429 if (v2 < v1) return -1;
4430
4431 if (d2->min < d1->min) return 1;
4432 if (d2->min > d1->min) return -1;
4433 return 0;
4434}
4435
4436static int
4437is_equal_mml(MinMaxLen* a, MinMaxLen* b)
4438{
4439 return (a->min == b->min && a->max == b->max) ? 1 : 0;
4440}
4441
4442
4443static void
4444set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
4445{
4446 mml->min = min;
4447 mml->max = max;
4448}
4449
4450static void
4451clear_mml(MinMaxLen* mml)
4452{
4453 mml->min = mml->max = 0;
4454}
4455
4456static void
4457copy_mml(MinMaxLen* to, MinMaxLen* from)
4458{
4459 to->min = from->min;
4460 to->max = from->max;
4461}
4462
4463static void
4464add_mml(MinMaxLen* to, MinMaxLen* from)
4465{
4466 to->min = distance_add(to->min, from->min);
4467 to->max = distance_add(to->max, from->max);
4468}
4469
4470#if 0
4471static void
4472add_len_mml(MinMaxLen* to, OnigDistance len)
4473{
4474 to->min = distance_add(to->min, len);
4475 to->max = distance_add(to->max, len);
4476}
4477#endif
4478
4479static void
4480alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
4481{
4482 if (to->min > from->min) to->min = from->min;
4483 if (to->max < from->max) to->max = from->max;
4484}
4485
4486static void
4487copy_opt_env(OptEnv* to, OptEnv* from)
4488{
4489 *to = *from;
4490}
4491
4492static void
4493clear_opt_anc_info(OptAncInfo* anc)
4494{
4495 anc->left_anchor = 0;
4496 anc->right_anchor = 0;
4497}
4498
4499static void
4500copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
4501{
4502 *to = *from;
4503}
4504
4505static void
4506concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
4507 OnigDistance left_len, OnigDistance right_len)
4508{
4509 clear_opt_anc_info(to);
4510
4511 to->left_anchor = left->left_anchor;
4512 if (left_len == 0) {
4513 to->left_anchor |= right->left_anchor;
4514 }
4515
4516 to->right_anchor = right->right_anchor;
4517 if (right_len == 0) {
4518 to->right_anchor |= left->right_anchor;
4519 }
4520 else {
4521 to->right_anchor |= (left->right_anchor & ANCHOR_PREC_READ_NOT);
4522 }
4523}
4524
4525static int
4526is_left_anchor(int anc)
4527{
4528 if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
4529 anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
4530 anc == ANCHOR_PREC_READ_NOT)
4531 return 0;
4532
4533 return 1;
4534}
4535
4536static int
4537is_set_opt_anc_info(OptAncInfo* to, int anc)
4538{
4539 if ((to->left_anchor & anc) != 0) return 1;
4540
4541 return ((to->right_anchor & anc) != 0 ? 1 : 0);
4542}
4543
4544static void
4545add_opt_anc_info(OptAncInfo* to, int anc)
4546{
4547 if (is_left_anchor(anc))
4548 to->left_anchor |= anc;
4549 else
4550 to->right_anchor |= anc;
4551}
4552
4553static void
4554remove_opt_anc_info(OptAncInfo* to, int anc)
4555{
4556 if (is_left_anchor(anc))
4557 to->left_anchor &= ~anc;
4558 else
4559 to->right_anchor &= ~anc;
4560}
4561
4562static void
4563alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
4564{
4565 to->left_anchor &= add->left_anchor;
4566 to->right_anchor &= add->right_anchor;
4567}
4568
4569static int
4570is_full_opt_exact_info(OptExactInfo* ex)
4571{
4572 return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4573}
4574
4575static void
4576clear_opt_exact_info(OptExactInfo* ex)
4577{
4578 clear_mml(&ex->mmd);
4579 clear_opt_anc_info(&ex->anc);
4580 ex->reach_end = 0;
4581 ex->ignore_case = -1; /* unset */
4582 ex->len = 0;
4583 ex->s[0] = '\0';
4584}
4585
4586static void
4587copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
4588{
4589 *to = *from;
4590}
4591
4592static void
4593concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
4594{
4595 int i, j, len;
4596 UChar *p, *end;
4597 OptAncInfo tanc;
4598
4599 if (to->ignore_case < 0)
4600 to->ignore_case = add->ignore_case;
4601 else if (to->ignore_case != add->ignore_case)
4602 return ; /* avoid */
4603
4604 p = add->s;
4605 end = p + add->len;
4606 for (i = to->len; p < end; ) {
4607 len = enclen(enc, p, end);
4608 if (i + len > OPT_EXACT_MAXLEN) break;
4609 for (j = 0; j < len && p < end; j++)
4610 to->s[i++] = *p++;
4611 }
4612
4613 to->len = i;
4614 to->reach_end = (p == end ? add->reach_end : 0);
4615
4616 concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4617 if (! to->reach_end) tanc.right_anchor = 0;
4618 copy_opt_anc_info(&to->anc, &tanc);
4619}
4620
4621static void
4622concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
4623 int raw ARG_UNUSED, OnigEncoding enc)
4624{
4625 int i, j, len;
4626 UChar *p;
4627
4628 for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4629 len = enclen(enc, p, end);
4630 if (i + len > OPT_EXACT_MAXLEN) break;
4631 for (j = 0; j < len && p < end; j++)
4632 to->s[i++] = *p++;
4633 }
4634
4635 to->len = i;
4636}
4637
4638static void
4639alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
4640{
4641 int i, j, len;
4642
4643 if (add->len == 0 || to->len == 0) {
4644 clear_opt_exact_info(to);
4645 return ;
4646 }
4647
4648 if (! is_equal_mml(&to->mmd, &add->mmd)) {
4649 clear_opt_exact_info(to);
4650 return ;
4651 }
4652
4653 for (i = 0; i < to->len && i < add->len; ) {
4654 if (to->s[i] != add->s[i]) break;
4655 len = enclen(env->enc, to->s + i, to->s + to->len);
4656
4657 for (j = 1; j < len; j++) {
4658 if (to->s[i+j] != add->s[i+j]) break;
4659 }
4660 if (j < len) break;
4661 i += len;
4662 }
4663
4664 if (! add->reach_end || i < add->len || i < to->len) {
4665 to->reach_end = 0;
4666 }
4667 to->len = i;
4668 if (to->ignore_case < 0)
4669 to->ignore_case = add->ignore_case;
4670 else if (add->ignore_case >= 0)
4671 to->ignore_case |= add->ignore_case;
4672
4673 alt_merge_opt_anc_info(&to->anc, &add->anc);
4674 if (! to->reach_end) to->anc.right_anchor = 0;
4675}
4676
4677static void
4678select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
4679{
4680 int v1, v2;
4681
4682 v1 = now->len;
4683 v2 = alt->len;
4684
4685 if (v2 == 0) {
4686 return ;
4687 }
4688 else if (v1 == 0) {
4689 copy_opt_exact_info(now, alt);
4690 return ;
4691 }
4692 else if (v1 <= 2 && v2 <= 2) {
4693 /* ByteValTable[x] is big value --> low price */
4694 v2 = map_position_value(enc, now->s[0]);
4695 v1 = map_position_value(enc, alt->s[0]);
4696
4697 if (now->len > 1) v1 += 5;
4698 if (alt->len > 1) v2 += 5;
4699 }
4700
4701 if (now->ignore_case <= 0) v1 *= 2;
4702 if (alt->ignore_case <= 0) v2 *= 2;
4703
4704 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4705 copy_opt_exact_info(now, alt);
4706}
4707
4708static void
4709clear_opt_map_info(OptMapInfo* map)
4710{
4711 static const OptMapInfo clean_info = {
4712 {0, 0}, {0, 0}, 0,
4713 {
4714 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4715 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4716 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4717 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4718 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4719 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4720 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4721 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4722 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4723 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4724 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4725 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4726 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4727 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4728 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4729 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4730 }
4731 };
4732
4733 xmemcpy(map, &clean_info, sizeof(OptMapInfo));
4734}
4735
4736static void
4737copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
4738{
4739 *to = *from;
4740}
4741
4742static void
4743add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
4744{
4745 if (map->map[c] == 0) {
4746 map->map[c] = 1;
4747 map->value += map_position_value(enc, c);
4748 }
4749}
4750
4751static int
4752add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
4753 OnigEncoding enc, OnigCaseFoldType case_fold_flag)
4754{
4755 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4756 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
4757 int i, n;
4758
4759 add_char_opt_map_info(map, p[0], enc);
4760
4761 case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
4762 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
4763 if (n < 0) return n;
4764
4765 for (i = 0; i < n; i++) {
4766 ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
4767 add_char_opt_map_info(map, buf[0], enc);
4768 }
4769
4770 return 0;
4771}
4772
4773static void
4774select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
4775{
4776 const int z = 1<<15; /* 32768: something big value */
4777
4778 int v1, v2;
4779
4780 if (alt->value == 0) return ;
4781 if (now->value == 0) {
4782 copy_opt_map_info(now, alt);
4783 return ;
4784 }
4785
4786 v1 = z / now->value;
4787 v2 = z / alt->value;
4788 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4789 copy_opt_map_info(now, alt);
4790}
4791
4792static int
4793comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
4794{
4795#define COMP_EM_BASE 20
4796 int ve, vm;
4797
4798 if (m->value <= 0) return -1;
4799
4800 ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
4801 vm = COMP_EM_BASE * 5 * 2 / m->value;
4802 return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
4803}
4804
4805static void
4806alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
4807{
4808 int i, val;
4809
4810 /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
4811 if (to->value == 0) return ;
4812 if (add->value == 0 || to->mmd.max < add->mmd.min) {
4813 clear_opt_map_info(to);
4814 return ;
4815 }
4816
4817 alt_merge_mml(&to->mmd, &add->mmd);
4818
4819 val = 0;
4820 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
4821 if (add->map[i])
4822 to->map[i] = 1;
4823
4824 if (to->map[i])
4825 val += map_position_value(enc, i);
4826 }
4827 to->value = val;
4828
4829 alt_merge_opt_anc_info(&to->anc, &add->anc);
4830}
4831
4832static void
4833set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
4834{
4835 copy_mml(&(opt->exb.mmd), mmd);
4836 copy_mml(&(opt->expr.mmd), mmd);
4837 copy_mml(&(opt->map.mmd), mmd);
4838}
4839
4840static void
4841clear_node_opt_info(NodeOptInfo* opt)
4842{
4843 clear_mml(&opt->len);
4844 clear_opt_anc_info(&opt->anc);
4845 clear_opt_exact_info(&opt->exb);
4846 clear_opt_exact_info(&opt->exm);
4847 clear_opt_exact_info(&opt->expr);
4848 clear_opt_map_info(&opt->map);
4849}
4850
4851static void
4852copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
4853{
4854 *to = *from;
4855}
4856
4857static void
4858concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
4859{
4860 int exb_reach, exm_reach;
4861 OptAncInfo tanc;
4862
4863 concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
4864 copy_opt_anc_info(&to->anc, &tanc);
4865
4866 if (add->exb.len > 0 && to->len.max == 0) {
4867 concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
4868 to->len.max, add->len.max);
4869 copy_opt_anc_info(&add->exb.anc, &tanc);
4870 }
4871
4872 if (add->map.value > 0 && to->len.max == 0) {
4873 if (add->map.mmd.max == 0)
4874 add->map.anc.left_anchor |= to->anc.left_anchor;
4875 }
4876
4877 exb_reach = to->exb.reach_end;
4878 exm_reach = to->exm.reach_end;
4879
4880 if (add->len.max != 0)
4881 to->exb.reach_end = to->exm.reach_end = 0;
4882
4883 if (add->exb.len > 0) {
4884 if (exb_reach) {
4885 concat_opt_exact_info(&to->exb, &add->exb, enc);
4886 clear_opt_exact_info(&add->exb);
4887 }
4888 else if (exm_reach) {
4889 concat_opt_exact_info(&to->exm, &add->exb, enc);
4890 clear_opt_exact_info(&add->exb);
4891 }
4892 }
4893 select_opt_exact_info(enc, &to->exm, &add->exb);
4894 select_opt_exact_info(enc, &to->exm, &add->exm);
4895
4896 if (to->expr.len > 0) {
4897 if (add->len.max > 0) {
4898 if (to->expr.len > (int )add->len.max)
4899 to->expr.len = (int )add->len.max;
4900
4901 if (to->expr.mmd.max == 0)
4902 select_opt_exact_info(enc, &to->exb, &to->expr);
4903 else
4904 select_opt_exact_info(enc, &to->exm, &to->expr);
4905 }
4906 }
4907 else if (add->expr.len > 0) {
4908 copy_opt_exact_info(&to->expr, &add->expr);
4909 }
4910
4911 select_opt_map_info(&to->map, &add->map);
4912
4913 add_mml(&to->len, &add->len);
4914}
4915
4916static void
4917alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
4918{
4919 alt_merge_opt_anc_info (&to->anc, &add->anc);
4920 alt_merge_opt_exact_info(&to->exb, &add->exb, env);
4921 alt_merge_opt_exact_info(&to->exm, &add->exm, env);
4922 alt_merge_opt_exact_info(&to->expr, &add->expr, env);
4923 alt_merge_opt_map_info(env->enc, &to->map, &add->map);
4924
4925 alt_merge_mml(&to->len, &add->len);
4926}
4927
4928
4929#define MAX_NODE_OPT_INFO_REF_COUNT 5
4930
4931static int
4932optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
4933{
4934 int type;
4935 int r = 0;
4936
4937 clear_node_opt_info(opt);
4938 set_bound_node_opt_info(opt, &env->mmd);
4939
4940 type = NTYPE(node);
4941 switch (type) {
4942 case NT_LIST:
4943 {
4944 OptEnv nenv;
4945 NodeOptInfo nopt;
4946 Node* nd = node;
4947
4948 copy_opt_env(&nenv, env);
4949 do {
4950 r = optimize_node_left(NCAR(nd), &nopt, &nenv);
4951 if (r == 0) {
4952 add_mml(&nenv.mmd, &nopt.len);
4953 concat_left_node_opt_info(env->enc, opt, &nopt);
4954 }
4955 } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
4956 }
4957 break;
4958
4959 case NT_ALT:
4960 {
4961 NodeOptInfo nopt;
4962 Node* nd = node;
4963
4964 do {
4965 r = optimize_node_left(NCAR(nd), &nopt, env);
4966 if (r == 0) {
4967 if (nd == node) copy_node_opt_info(opt, &nopt);
4968 else alt_merge_node_opt_info(opt, &nopt, env);
4969 }
4970 } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
4971 }
4972 break;
4973
4974 case NT_STR:
4975 {
4976 StrNode* sn = NSTR(node);
4977 OnigDistance slen = sn->end - sn->s;
4978 int is_raw = NSTRING_IS_RAW(node);
4979
4980 if (! NSTRING_IS_AMBIG(node)) {
4981 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4982 is_raw, env->enc);
4983 opt->exb.ignore_case = 0;
4984 if (slen > 0) {
4985 add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
4986 }
4987 set_mml(&opt->len, slen, slen);
4988 }
4989 else {
4990 OnigDistance max;
4991
4992 if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
4993 int n = onigenc_strlen(env->enc, sn->s, sn->end);
4994 max = (OnigDistance )ONIGENC_MBC_MAXLEN_DIST(env->enc) * (OnigDistance)n;
4995 }
4996 else {
4997 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4998 is_raw, env->enc);
4999 opt->exb.ignore_case = 1;
5000
5001 if (slen > 0) {
5002 r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
5003 env->enc, env->case_fold_flag);
5004 if (r != 0) break;
5005 }
5006
5007 max = slen;
5008 }
5009
5010 set_mml(&opt->len, slen, max);
5011 }
5012
5013 if ((OnigDistance )opt->exb.len == slen)
5014 opt->exb.reach_end = 1;
5015 }
5016 break;
5017
5018 case NT_CCLASS:
5019 {
5020 int i, z;
5021 CClassNode* cc = NCCLASS(node);
5022
5023 /* no need to check ignore case. (set in setup_tree()) */
5024
5025 if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
5026 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5027 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5028
5029 set_mml(&opt->len, min, max);
5030 }
5031 else {
5032 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5033 z = BITSET_AT(cc->bs, i);
5034 if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
5035 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5036 }
5037 }
5038 set_mml(&opt->len, 1, 1);
5039 }
5040 }
5041 break;
5042
5043 case NT_CTYPE:
5044 {
5045 int i, min, max;
5046 int maxcode;
5047
5048 max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5049
5050 if (max == 1) {
5051 min = 1;
5052
5053 maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
5054 switch (NCTYPE(node)->ctype) {
5055 case ONIGENC_CTYPE_WORD:
5056 if (NCTYPE(node)->not != 0) {
5057 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5058 if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
5059 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5060 }
5061 }
5062 }
5063 else {
5064 for (i = 0; i < maxcode; i++) {
5065 if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
5066 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5067 }
5068 }
5069 }
5070 break;
5071 }
5072 }
5073 else {
5074 min = ONIGENC_MBC_MINLEN(env->enc);
5075 }
5076 set_mml(&opt->len, min, max);
5077 }
5078 break;
5079
5080 case NT_CANY:
5081 {
5082 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5083 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5084 set_mml(&opt->len, min, max);
5085 }
5086 break;
5087
5088 case NT_ANCHOR:
5089 switch (NANCHOR(node)->type) {
5090 case ANCHOR_BEGIN_BUF:
5091 case ANCHOR_BEGIN_POSITION:
5092 case ANCHOR_BEGIN_LINE:
5093 case ANCHOR_END_BUF:
5094 case ANCHOR_SEMI_END_BUF:
5095 case ANCHOR_END_LINE:
5096 case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
5097 case ANCHOR_PREC_READ_NOT: /* just for (?!x).* */
5098 add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
5099 break;
5100
5101 case ANCHOR_PREC_READ:
5102 {
5103 NodeOptInfo nopt;
5104
5105 r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
5106 if (r == 0) {
5107 if (nopt.exb.len > 0)
5108 copy_opt_exact_info(&opt->expr, &nopt.exb);
5109 else if (nopt.exm.len > 0)
5110 copy_opt_exact_info(&opt->expr, &nopt.exm);
5111
5112 opt->expr.reach_end = 0;
5113
5114 if (nopt.map.value > 0)
5115 copy_opt_map_info(&opt->map, &nopt.map);
5116 }
5117 }
5118 break;
5119
5120 case ANCHOR_LOOK_BEHIND_NOT:
5121 break;
5122 }
5123 break;
5124
5125 case NT_BREF:
5126 {
5127 int i;
5128 int* backs;
5129 OnigDistance min, max, tmin, tmax;
5130 Node** nodes = SCANENV_MEM_NODES(env->scan_env);
5131 BRefNode* br = NBREF(node);
5132
5133 if (br->state & NST_RECURSION) {
5134 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5135 break;
5136 }
5137 backs = BACKREFS_P(br);
5138 r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
5139 if (r != 0) break;
5140 r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
5141 if (r != 0) break;
5142 for (i = 1; i < br->back_num; i++) {
5143 r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
5144 if (r != 0) break;
5145 r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
5146 if (r != 0) break;
5147 if (min > tmin) min = tmin;
5148 if (max < tmax) max = tmax;
5149 }
5150 if (r == 0) set_mml(&opt->len, min, max);
5151 }
5152 break;
5153
5154#ifdef USE_SUBEXP_CALL
5155 case NT_CALL:
5156 if (IS_CALL_RECURSION(NCALL(node)))
5157 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5158 else {
5159 OnigOptionType save = env->options;
5160 env->options = NENCLOSE(NCALL(node)->target)->option;
5161 r = optimize_node_left(NCALL(node)->target, opt, env);
5162 env->options = save;
5163 }
5164 break;
5165#endif
5166
5167 case NT_QTFR:
5168 {
5169 int i;
5170 OnigDistance min, max;
5171 NodeOptInfo nopt;
5172 QtfrNode* qn = NQTFR(node);
5173
5174 r = optimize_node_left(qn->target, &nopt, env);
5175 if (r) break;
5176
5177 if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
5178 if (env->mmd.max == 0 &&
5179 NTYPE(qn->target) == NT_CANY && qn->greedy) {
5180 if (IS_MULTILINE(env->options))
5181 /* implicit anchor: /.*a/ ==> /\A.*a/ */
5182 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
5183 else
5184 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
5185 }
5186 }
5187 else {
5188 if (qn->lower > 0) {
5189 copy_node_opt_info(opt, &nopt);
5190 if (nopt.exb.len > 0) {
5191 if (nopt.exb.reach_end) {
5192 for (i = 2; i <= qn->lower &&
5193 ! is_full_opt_exact_info(&opt->exb); i++) {
5194 concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
5195 }
5196 if (i < qn->lower) {
5197 opt->exb.reach_end = 0;
5198 }
5199 }
5200 }
5201
5202 if (qn->lower != qn->upper) {
5203 opt->exb.reach_end = 0;
5204 opt->exm.reach_end = 0;
5205 }
5206 if (qn->lower > 1)
5207 opt->exm.reach_end = 0;
5208 }
5209 }
5210
5211 min = distance_multiply(nopt.len.min, qn->lower);
5212 if (IS_REPEAT_INFINITE(qn->upper))
5213 max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
5214 else
5215 max = distance_multiply(nopt.len.max, qn->upper);
5216
5217 set_mml(&opt->len, min, max);
5218 }
5219 break;
5220
5221 case NT_ENCLOSE:
5222 {
5223 EncloseNode* en = NENCLOSE(node);
5224
5225 switch (en->type) {
5226 case ENCLOSE_OPTION:
5227 {
5228 OnigOptionType save = env->options;
5229
5230 env->options = en->option;
5231 r = optimize_node_left(en->target, opt, env);
5232 env->options = save;
5233 }
5234 break;
5235
5236 case ENCLOSE_MEMORY:
5237#ifdef USE_SUBEXP_CALL
5238 en->opt_count++;
5239 if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
5240 OnigDistance min, max;
5241
5242 min = 0;
5243 max = ONIG_INFINITE_DISTANCE;
5244 if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
5245 if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
5246 set_mml(&opt->len, min, max);
5247 }
5248 else
5249#endif
5250 {
5251 r = optimize_node_left(en->target, opt, env);
5252
5253 if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
5254 if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
5255 remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
5256 }
5257 }
5258 break;
5259
5260 case ENCLOSE_STOP_BACKTRACK:
5261 case ENCLOSE_CONDITION:
5262 r = optimize_node_left(en->target, opt, env);
5263 break;
5264
5265 case ENCLOSE_ABSENT:
5266 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5267 break;
5268 }
5269 }
5270 break;
5271
5272 default:
5273#ifdef ONIG_DEBUG
5274 fprintf(stderr, "optimize_node_left: undefined node type %d\n",
5275 NTYPE(node));
5276#endif
5277 r = ONIGERR_TYPE_BUG;
5278 break;
5279 }
5280
5281 return r;
5282}
5283
5284static int
5285set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
5286{
5287 int allow_reverse;
5288
5289 if (e->len == 0) return 0;
5290
5291 reg->exact = (UChar* )xmalloc(e->len);
5292 CHECK_NULL_RETURN_MEMERR(reg->exact);
5293 xmemcpy(reg->exact, e->s, e->len);
5294 reg->exact_end = reg->exact + e->len;
5295
5296 allow_reverse =
5297 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
5298
5299 if (e->ignore_case > 0) {
5300 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5301 int orig_len = e->len;
5302 e->len = set_bm_skip(reg->exact, reg->exact_end, reg,
5303 reg->map, &(reg->int_map), 1);
5304 if (e->len >= 3) {
5305 reg->exact_end = reg->exact + e->len;
5306 reg->optimize = (allow_reverse != 0
5307 ? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC);
5308 }
5309 else {
5310 /* Even if BM skip table can't be built (e.g., pattern starts with
5311 's' or 'k' which have multi-byte case fold variants), we should
5312 still use EXACT_IC optimization with the original pattern.
5313 Without this fallback, patterns like /slackware/i have no
5314 optimization at all, causing severe performance regression
5315 especially with non-ASCII strings. See [Bug #21824] */
5316 e->len = orig_len; /* Restore original length for EXACT_IC */
5317 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5318 }
5319 }
5320 else {
5321 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5322 }
5323 }
5324 else {
5325 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5326 set_bm_skip(reg->exact, reg->exact_end, reg,
5327 reg->map, &(reg->int_map), 0);
5328 reg->optimize = (allow_reverse != 0
5329 ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
5330 }
5331 else {
5332 reg->optimize = ONIG_OPTIMIZE_EXACT;
5333 }
5334 }
5335
5336 reg->dmin = e->mmd.min;
5337 reg->dmax = e->mmd.max;
5338
5339 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5340 reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact));
5341 }
5342
5343 return 0;
5344}
5345
5346static void
5347set_optimize_map_info(regex_t* reg, OptMapInfo* m)
5348{
5349 int i;
5350
5351 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5352 reg->map[i] = m->map[i];
5353
5354 reg->optimize = ONIG_OPTIMIZE_MAP;
5355 reg->dmin = m->mmd.min;
5356 reg->dmax = m->mmd.max;
5357
5358 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5359 reg->threshold_len = (int )(reg->dmin + 1);
5360 }
5361}
5362
5363static void
5364set_sub_anchor(regex_t* reg, OptAncInfo* anc)
5365{
5366 reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
5367 reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
5368}
5369
5370#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5371static void print_optimize_info(FILE* f, regex_t* reg);
5372#endif
5373
5374static int
5375set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
5376{
5377
5378 int r;
5379 NodeOptInfo opt;
5380 OptEnv env;
5381
5382 env.enc = reg->enc;
5383 env.options = reg->options;
5384 env.case_fold_flag = reg->case_fold_flag;
5385 env.scan_env = scan_env;
5386 clear_mml(&env.mmd);
5387
5388 r = optimize_node_left(node, &opt, &env);
5389 if (r) return r;
5390
5391 reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
5392 ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
5393 ANCHOR_LOOK_BEHIND);
5394
5395 if ((opt.anc.left_anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)
5396 reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML;
5397
5398 reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
5399 ANCHOR_PREC_READ_NOT);
5400
5401 if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
5402 reg->anchor_dmin = opt.len.min;
5403 reg->anchor_dmax = opt.len.max;
5404 }
5405
5406 if (opt.exb.len > 0 || opt.exm.len > 0) {
5407 select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
5408 if (opt.map.value > 0 &&
5409 comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
5410 goto set_map;
5411 }
5412 else {
5413 r = set_optimize_exact_info(reg, &opt.exb);
5414 set_sub_anchor(reg, &opt.exb.anc);
5415 }
5416 }
5417 else if (opt.map.value > 0) {
5418 set_map:
5419 set_optimize_map_info(reg, &opt.map);
5420 set_sub_anchor(reg, &opt.map.anc);
5421 }
5422 else {
5423 reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
5424 if (opt.len.max == 0)
5425 reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
5426 }
5427
5428#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5429 print_optimize_info(stderr, reg);
5430#endif
5431 return r;
5432}
5433
5434static void
5435clear_optimize_info(regex_t* reg)
5436{
5437 reg->optimize = ONIG_OPTIMIZE_NONE;
5438 reg->anchor = 0;
5439 reg->anchor_dmin = 0;
5440 reg->anchor_dmax = 0;
5441 reg->sub_anchor = 0;
5442 reg->exact_end = (UChar* )NULL;
5443 reg->threshold_len = 0;
5444 xfree(reg->exact);
5445 reg->exact = (UChar* )NULL;
5446}
5447
5448#ifdef ONIG_DEBUG
5449
5450static void print_enc_string(FILE* fp, OnigEncoding enc,
5451 const UChar *s, const UChar *end)
5452{
5453 fprintf(fp, "\nPATTERN: /");
5454
5455 if (ONIGENC_MBC_MINLEN(enc) > 1) {
5456 const UChar *p;
5457 OnigCodePoint code;
5458
5459 p = s;
5460 while (p < end) {
5461 code = ONIGENC_MBC_TO_CODE(enc, p, end);
5462 if (code >= 0x80) {
5463 fprintf(fp, " 0x%04x ", (int )code);
5464 }
5465 else {
5466 fputc((int )code, fp);
5467 }
5468
5469 p += enclen(enc, p, end);
5470 }
5471 }
5472 else {
5473 while (s < end) {
5474 fputc((int )*s, fp);
5475 s++;
5476 }
5477 }
5478
5479 fprintf(fp, "/ (%s)\n", enc->name);
5480}
5481#endif /* ONIG_DEBUG */
5482
5483#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5484static void
5485print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
5486{
5487 if (a == ONIG_INFINITE_DISTANCE)
5488 fputs("inf", f);
5489 else
5490 fprintf(f, "(%"PRIuPTR")", a);
5491
5492 fputs("-", f);
5493
5494 if (b == ONIG_INFINITE_DISTANCE)
5495 fputs("inf", f);
5496 else
5497 fprintf(f, "(%"PRIuPTR")", b);
5498}
5499
5500static void
5501print_anchor(FILE* f, int anchor)
5502{
5503 int q = 0;
5504
5505 fprintf(f, "[");
5506
5507 if (anchor & ANCHOR_BEGIN_BUF) {
5508 fprintf(f, "begin-buf");
5509 q = 1;
5510 }
5511 if (anchor & ANCHOR_BEGIN_LINE) {
5512 if (q) fprintf(f, ", ");
5513 q = 1;
5514 fprintf(f, "begin-line");
5515 }
5516 if (anchor & ANCHOR_BEGIN_POSITION) {
5517 if (q) fprintf(f, ", ");
5518 q = 1;
5519 fprintf(f, "begin-pos");
5520 }
5521 if (anchor & ANCHOR_END_BUF) {
5522 if (q) fprintf(f, ", ");
5523 q = 1;
5524 fprintf(f, "end-buf");
5525 }
5526 if (anchor & ANCHOR_SEMI_END_BUF) {
5527 if (q) fprintf(f, ", ");
5528 q = 1;
5529 fprintf(f, "semi-end-buf");
5530 }
5531 if (anchor & ANCHOR_END_LINE) {
5532 if (q) fprintf(f, ", ");
5533 q = 1;
5534 fprintf(f, "end-line");
5535 }
5536 if (anchor & ANCHOR_ANYCHAR_STAR) {
5537 if (q) fprintf(f, ", ");
5538 q = 1;
5539 fprintf(f, "anychar-star");
5540 }
5541 if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
5542 if (q) fprintf(f, ", ");
5543 fprintf(f, "anychar-star-ml");
5544 }
5545
5546 fprintf(f, "]");
5547}
5548
5549static void
5550print_optimize_info(FILE* f, regex_t* reg)
5551{
5552 static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5553 "EXACT_IC", "MAP",
5554 "EXACT_BM_IC", "EXACT_BM_NOT_REV_IC" };
5555
5556 fprintf(f, "optimize: %s\n", on[reg->optimize]);
5557 fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
5558 if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5559 print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5560 fprintf(f, "\n");
5561
5562 if (reg->optimize) {
5563 fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
5564 fprintf(f, "\n");
5565 }
5566 fprintf(f, "\n");
5567
5568 if (reg->exact) {
5569 UChar *p;
5570 fprintf(f, "exact: [");
5571 for (p = reg->exact; p < reg->exact_end; p++) {
5572 fputc(*p, f);
5573 }
5574 fprintf(f, "]: length: %"PRIdPTR"\n", (reg->exact_end - reg->exact));
5575 }
5576 else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
5577 int c, i, n = 0;
5578
5579 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5580 if (reg->map[i]) n++;
5581
5582 fprintf(f, "map: n=%d\n", n);
5583 if (n > 0) {
5584 c = 0;
5585 fputc('[', f);
5586 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5587 if (reg->map[i] != 0) {
5588 if (c > 0) fputs(", ", f);
5589 c++;
5590 if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5591 ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
5592 fputc(i, f);
5593 else
5594 fprintf(f, "%d", i);
5595 }
5596 }
5597 fprintf(f, "]\n");
5598 }
5599 }
5600}
5601#endif /* ONIG_DEBUG_COMPILE || ONIG_DEBUG_MATCH */
5602
5603
5604extern void
5605onig_free_body(regex_t* reg)
5606{
5607 if (IS_NOT_NULL(reg)) {
5608 xfree(reg->p);
5609 xfree(reg->exact);
5610 xfree(reg->int_map);
5611 xfree(reg->int_map_backward);
5612 xfree(reg->repeat_range);
5613 onig_free(reg->chain);
5614
5615#ifdef USE_NAMED_GROUP
5616 onig_names_free(reg);
5617#endif
5618 }
5619}
5620
5621extern void
5622onig_free(regex_t* reg)
5623{
5624 if (IS_NOT_NULL(reg)) {
5625 onig_free_body(reg);
5626 xfree(reg);
5627 }
5628}
5629
5630static void*
5631dup_copy(const void *ptr, size_t size)
5632{
5633 void *newptr = xmalloc(size);
5634 if (IS_NOT_NULL(newptr)) {
5635 memcpy(newptr, ptr, size);
5636 }
5637 return newptr;
5638}
5639
5640extern int
5641onig_reg_copy(regex_t** nreg, regex_t* oreg)
5642{
5643 if (IS_NOT_NULL(oreg)) {
5644 regex_t *reg = *nreg = (regex_t* )xmalloc(sizeof(regex_t));
5645 if (IS_NULL(reg)) return ONIGERR_MEMORY;
5646
5647 *reg = *oreg;
5648
5649# define COPY_FAILED(mem, size) IS_NULL(reg->mem = dup_copy(reg->mem, size))
5650
5651 if (IS_NOT_NULL(reg->exact)) {
5652 size_t exact_size = reg->exact_end - reg->exact;
5653 if (COPY_FAILED(exact, exact_size))
5654 goto err;
5655 (reg)->exact_end = (reg)->exact + exact_size;
5656 }
5657
5658 if (IS_NOT_NULL(reg->int_map)) {
5659 if (COPY_FAILED(int_map, sizeof(int) * ONIG_CHAR_TABLE_SIZE))
5660 goto err_int_map;
5661 }
5662 if (IS_NOT_NULL(reg->int_map_backward)) {
5663 if (COPY_FAILED(int_map_backward, sizeof(int) * ONIG_CHAR_TABLE_SIZE))
5664 goto err_int_map_backward;
5665 }
5666 if (IS_NOT_NULL(reg->p)) {
5667 if (COPY_FAILED(p, reg->alloc))
5668 goto err_p;
5669 }
5670 if (IS_NOT_NULL(reg->repeat_range)) {
5671 if (COPY_FAILED(repeat_range, reg->repeat_range_alloc * sizeof(OnigRepeatRange)))
5672 goto err_repeat_range;
5673 }
5674 if (IS_NOT_NULL(reg->name_table)) {
5675 if (onig_names_copy(reg, oreg))
5676 goto err_name_table;
5677 }
5678 if (IS_NOT_NULL(reg->chain)) {
5679 if (onig_reg_copy(&reg->chain, reg->chain))
5680 goto err_chain;
5681 }
5682 return 0;
5683# undef COPY_FAILED
5684
5685 err_chain:
5686 onig_names_free(reg);
5687 err_name_table:
5688 xfree(reg->repeat_range);
5689 err_repeat_range:
5690 xfree(reg->p);
5691 err_p:
5692 xfree(reg->int_map_backward);
5693 err_int_map_backward:
5694 xfree(reg->int_map);
5695 err_int_map:
5696 xfree(reg->exact);
5697 err:
5698 xfree(reg);
5699 return ONIGERR_MEMORY;
5700 }
5701 return 0;
5702}
5703
5704#ifdef RUBY
5705size_t
5706onig_memsize(const regex_t *reg)
5707{
5708 size_t size = sizeof(regex_t);
5709 if (IS_NULL(reg)) return 0;
5710 if (IS_NOT_NULL(reg->p)) size += reg->alloc;
5711 if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
5712 if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5713 if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5714 if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
5715 if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
5716
5717 return size;
5718}
5719
5720size_t
5721onig_region_memsize(const OnigRegion *regs)
5722{
5723 size_t size = sizeof(*regs);
5724 if (IS_NULL(regs)) return 0;
5725 size += regs->allocated * (sizeof(*regs->beg) + sizeof(*regs->end));
5726 return size;
5727}
5728#endif
5729
5730#define REGEX_TRANSFER(to,from) do {\
5731 onig_free_body(to);\
5732 xmemcpy(to, from, sizeof(regex_t));\
5733 xfree(from);\
5734} while (0)
5735
5736#if 0
5737extern void
5738onig_transfer(regex_t* to, regex_t* from)
5739{
5740 REGEX_TRANSFER(to, from);
5741}
5742#endif
5743
5744#ifdef ONIG_DEBUG_COMPILE
5745static void print_compiled_byte_code_list(FILE* f, regex_t* reg);
5746#endif
5747#ifdef ONIG_DEBUG_PARSE_TREE
5748static void print_tree(FILE* f, Node* node);
5749#endif
5750
5751#ifdef RUBY
5752extern int
5753onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5754 OnigErrorInfo* einfo)
5755{
5756 return onig_compile_ruby(reg, pattern, pattern_end, einfo, NULL, 0);
5757}
5758#endif
5759
5760#ifdef RUBY
5761extern int
5762onig_compile_ruby(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5763 OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
5764#else
5765extern int
5766onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5767 OnigErrorInfo* einfo)
5768#endif
5769{
5770#define COMPILE_INIT_SIZE 20
5771
5772 int r;
5773 OnigDistance init_size;
5774 Node* root;
5775 ScanEnv scan_env = {0};
5776#ifdef USE_SUBEXP_CALL
5777 UnsetAddrList uslist;
5778#endif
5779
5780 if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
5781
5782#ifdef RUBY
5783 scan_env.sourcefile = sourcefile;
5784 scan_env.sourceline = sourceline;
5785#endif
5786
5787#ifdef ONIG_DEBUG
5788 print_enc_string(stderr, reg->enc, pattern, pattern_end);
5789#endif
5790
5791 if (reg->alloc == 0) {
5792 init_size = (pattern_end - pattern) * 2;
5793 if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5794 r = BBUF_INIT(reg, init_size);
5795 if (r != 0) goto end;
5796 }
5797 else
5798 reg->used = 0;
5799
5800 reg->num_mem = 0;
5801 reg->num_repeat = 0;
5802 reg->num_null_check = 0;
5803 reg->repeat_range_alloc = 0;
5804 reg->repeat_range = (OnigRepeatRange* )NULL;
5805#ifdef USE_COMBINATION_EXPLOSION_CHECK
5806 reg->num_comb_exp_check = 0;
5807#endif
5808
5809 r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
5810 if (r != 0) goto err;
5811
5812#ifdef ONIG_DEBUG_PARSE_TREE
5813# if 0
5814 fprintf(stderr, "ORIGINAL PARSE TREE:\n");
5815 print_tree(stderr, root);
5816# endif
5817#endif
5818
5819#ifdef USE_NAMED_GROUP
5820 /* mixed use named group and no-named group */
5821 if (scan_env.num_named > 0 &&
5822 IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
5823 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
5824 if (scan_env.num_named != scan_env.num_mem)
5825 r = disable_noname_group_capture(&root, reg, &scan_env);
5826 else
5827 r = numbered_ref_check(root);
5828
5829 if (r != 0) goto err;
5830 }
5831#endif
5832
5833#ifdef USE_SUBEXP_CALL
5834 if (scan_env.num_call > 0) {
5835 r = unset_addr_list_init(&uslist, scan_env.num_call);
5836 if (r != 0) goto err;
5837 scan_env.unset_addr_list = &uslist;
5838 r = setup_subexp_call(root, &scan_env);
5839 if (r != 0) goto err_unset;
5840 r = subexp_recursive_check_trav(root, &scan_env);
5841 if (r < 0) goto err_unset;
5842 r = subexp_inf_recursive_check_trav(root, &scan_env);
5843 if (r != 0) goto err_unset;
5844
5845 reg->num_call = scan_env.num_call;
5846 }
5847 else
5848 reg->num_call = 0;
5849#endif
5850
5851 r = setup_tree(root, reg, 0, &scan_env);
5852 if (r != 0) goto err_unset;
5853
5854#ifdef ONIG_DEBUG_PARSE_TREE
5855 print_tree(stderr, root);
5856#endif
5857
5858 reg->capture_history = scan_env.capture_history;
5859 reg->bt_mem_start = scan_env.bt_mem_start;
5860 reg->bt_mem_start |= reg->capture_history;
5861 if (IS_FIND_CONDITION(reg->options))
5862 BIT_STATUS_ON_ALL(reg->bt_mem_end);
5863 else {
5864 reg->bt_mem_end = scan_env.bt_mem_end;
5865 reg->bt_mem_end |= reg->capture_history;
5866 }
5867
5868#ifdef USE_COMBINATION_EXPLOSION_CHECK
5869 if (scan_env.backrefed_mem == 0
5870# ifdef USE_SUBEXP_CALL
5871 || scan_env.num_call == 0
5872# endif
5873 ) {
5874 setup_comb_exp_check(root, 0, &scan_env);
5875# ifdef USE_SUBEXP_CALL
5876 if (scan_env.has_recursion != 0) {
5877 scan_env.num_comb_exp_check = 0;
5878 }
5879 else
5880# endif
5881 if (scan_env.comb_exp_max_regnum > 0) {
5882 int i;
5883 for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
5884 if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
5885 scan_env.num_comb_exp_check = 0;
5886 break;
5887 }
5888 }
5889 }
5890 }
5891
5892 reg->num_comb_exp_check = scan_env.num_comb_exp_check;
5893#endif
5894
5895 clear_optimize_info(reg);
5896#ifndef ONIG_DONT_OPTIMIZE
5897 r = set_optimize_info_from_tree(root, reg, &scan_env);
5898 if (r != 0) goto err_unset;
5899#endif
5900
5901 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
5902 xfree(scan_env.mem_nodes_dynamic);
5903 scan_env.mem_nodes_dynamic = (Node** )NULL;
5904 }
5905
5906 r = compile_tree(root, reg);
5907 if (r == 0) {
5908 r = add_opcode(reg, OP_END);
5909#ifdef USE_SUBEXP_CALL
5910 if (scan_env.num_call > 0) {
5911 r = unset_addr_list_fix(&uslist, reg);
5912 unset_addr_list_end(&uslist);
5913 if (r) goto err;
5914 }
5915#endif
5916
5917 if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
5918 reg->stack_pop_level = STACK_POP_LEVEL_ALL;
5919 else {
5920 if (reg->bt_mem_start != 0)
5921 reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
5922 else
5923 reg->stack_pop_level = STACK_POP_LEVEL_FREE;
5924 }
5925 }
5926#ifdef USE_SUBEXP_CALL
5927 else if (scan_env.num_call > 0) {
5928 unset_addr_list_end(&uslist);
5929 }
5930#endif
5931 onig_node_free(root);
5932
5933#ifdef ONIG_DEBUG_COMPILE
5934# ifdef USE_NAMED_GROUP
5935 onig_print_names(stderr, reg);
5936# endif
5937 print_compiled_byte_code_list(stderr, reg);
5938#endif
5939
5940 end:
5941 onig_reg_resize(reg);
5942 return r;
5943
5944 err_unset:
5945#ifdef USE_SUBEXP_CALL
5946 if (scan_env.num_call > 0) {
5947 unset_addr_list_end(&uslist);
5948 }
5949#endif
5950 err:
5951 if (IS_NOT_NULL(scan_env.error)) {
5952 if (IS_NOT_NULL(einfo)) {
5953 einfo->enc = scan_env.enc;
5954 einfo->par = scan_env.error;
5955 einfo->par_end = scan_env.error_end;
5956 }
5957 }
5958
5959 onig_node_free(root);
5960 xfree(scan_env.mem_nodes_dynamic);
5961
5962 return r;
5963}
5964
5965static int onig_inited = 0;
5966
5967extern int
5968onig_reg_init(regex_t* reg, OnigOptionType option,
5969 OnigCaseFoldType case_fold_flag,
5970 OnigEncoding enc, const OnigSyntaxType* syntax)
5971{
5972 if (! onig_inited)
5973 onig_init();
5974
5975 if (IS_NULL(reg))
5976 return ONIGERR_INVALID_ARGUMENT;
5977
5978 if (ONIGENC_IS_UNDEF(enc))
5979 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET;
5980
5981 if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
5982 == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
5983 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
5984 }
5985
5986 if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
5987 option |= syntax->options;
5988 option &= ~ONIG_OPTION_SINGLELINE;
5989 }
5990 else
5991 option |= syntax->options;
5992
5993 (reg)->enc = enc;
5994 (reg)->options = option;
5995 (reg)->syntax = syntax;
5996 (reg)->optimize = 0;
5997 (reg)->exact = (UChar* )NULL;
5998 (reg)->int_map = (int* )NULL;
5999 (reg)->int_map_backward = (int* )NULL;
6000 (reg)->chain = (regex_t* )NULL;
6001
6002 (reg)->p = (UChar* )NULL;
6003 (reg)->alloc = 0;
6004 (reg)->used = 0;
6005 (reg)->name_table = (void* )NULL;
6006
6007 (reg)->case_fold_flag = case_fold_flag;
6008
6009 (reg)->timelimit = 0;
6010
6011 return 0;
6012}
6013
6014extern int
6015onig_new_without_alloc(regex_t* reg, const UChar* pattern,
6016 const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
6017 const OnigSyntaxType* syntax, OnigErrorInfo* einfo)
6018{
6019 int r;
6020
6021 r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
6022 if (r) return r;
6023
6024 r = onig_compile(reg, pattern, pattern_end, einfo);
6025 return r;
6026}
6027
6028extern int
6029onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
6030 OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
6031 OnigErrorInfo* einfo)
6032{
6033 *reg = (regex_t* )xmalloc(sizeof(regex_t));
6034 if (IS_NULL(*reg)) return ONIGERR_MEMORY;
6035
6036 int r = onig_new_without_alloc(*reg, pattern, pattern_end, option, enc, syntax, einfo);
6037 if (r) {
6038 onig_free(*reg);
6039 *reg = NULL;
6040 }
6041
6042 return r;
6043}
6044
6045extern int
6046onig_initialize(OnigEncoding encodings[] ARG_UNUSED, int n ARG_UNUSED)
6047{
6048 return onig_init();
6049}
6050
6051extern int
6052onig_init(void)
6053{
6054 if (onig_inited != 0)
6055 return 0;
6056
6057 onig_inited = 1;
6058
6059#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6060 _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
6061#endif
6062
6063 onigenc_init();
6064 /* onigenc_set_default_caseconv_table((UChar* )0); */
6065
6066#ifdef ONIG_DEBUG_STATISTICS
6067 onig_statistics_init();
6068#endif
6069
6070 return 0;
6071}
6072
6073
6074static OnigEndCallListItemType* EndCallTop;
6075
6076extern void onig_add_end_call(void (*func)(void))
6077{
6078 OnigEndCallListItemType* item;
6079
6080 item = (OnigEndCallListItemType* )xmalloc(sizeof(*item));
6081 if (item == 0) return ;
6082
6083 item->next = EndCallTop;
6084 item->func = func;
6085
6086 EndCallTop = item;
6087}
6088
6089static void
6090exec_end_call_list(void)
6091{
6092 OnigEndCallListItemType* prev;
6093 void (*func)(void);
6094
6095 while (EndCallTop != 0) {
6096 func = EndCallTop->func;
6097 (*func)();
6098
6099 prev = EndCallTop;
6100 EndCallTop = EndCallTop->next;
6101 xfree(prev);
6102 }
6103}
6104
6105extern int
6106onig_end(void)
6107{
6108 exec_end_call_list();
6109
6110#ifdef ONIG_DEBUG_STATISTICS
6111 onig_print_statistics(stderr);
6112#endif
6113
6114#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6115 _CrtDumpMemoryLeaks();
6116#endif
6117
6118 onig_inited = 0;
6119
6120 return 0;
6121}
6122
6123extern int
6124onig_is_in_code_range(const UChar* p, OnigCodePoint code)
6125{
6126 OnigCodePoint n, *data;
6127 OnigCodePoint low, high, x;
6128
6129 GET_CODE_POINT(n, p);
6130 data = (OnigCodePoint* )p;
6131 data++;
6132
6133 for (low = 0, high = n; low < high; ) {
6134 x = (low + high) >> 1;
6135 if (code > data[x * 2 + 1])
6136 low = x + 1;
6137 else
6138 high = x;
6139 }
6140
6141 return ((low < n && code >= data[low * 2]) ? 1 : 0);
6142}
6143
6144extern int
6145onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
6146{
6147 int found;
6148
6149 if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
6150 if (IS_NULL(cc->mbuf)) {
6151 found = 0;
6152 }
6153 else {
6154 found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
6155 }
6156 }
6157 else {
6158 found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
6159 }
6160
6161 if (IS_NCCLASS_NOT(cc))
6162 return !found;
6163 else
6164 return found;
6165}
6166
6167extern int
6168onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
6169{
6170 int len;
6171
6172 if (ONIGENC_MBC_MINLEN(enc) > 1) {
6173 len = 2;
6174 }
6175 else {
6176 len = ONIGENC_CODE_TO_MBCLEN(enc, code);
6177 }
6178 return onig_is_code_in_cc_len(len, code, cc);
6179}
6180
6181
6182#ifdef ONIG_DEBUG
6183
6184/* arguments type */
6185# define ARG_SPECIAL -1
6186# define ARG_NON 0
6187# define ARG_RELADDR 1
6188# define ARG_ABSADDR 2
6189# define ARG_LENGTH 3
6190# define ARG_MEMNUM 4
6191# define ARG_OPTION 5
6192# define ARG_STATE_CHECK 6
6193
6194OnigOpInfoType OnigOpInfo[] = {
6195 { OP_FINISH, "finish", ARG_NON },
6196 { OP_END, "end", ARG_NON },
6197 { OP_EXACT1, "exact1", ARG_SPECIAL },
6198 { OP_EXACT2, "exact2", ARG_SPECIAL },
6199 { OP_EXACT3, "exact3", ARG_SPECIAL },
6200 { OP_EXACT4, "exact4", ARG_SPECIAL },
6201 { OP_EXACT5, "exact5", ARG_SPECIAL },
6202 { OP_EXACTN, "exactn", ARG_SPECIAL },
6203 { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
6204 { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
6205 { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
6206 { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
6207 { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
6208 { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
6209 { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
6210 { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
6211 { OP_CCLASS, "cclass", ARG_SPECIAL },
6212 { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
6213 { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
6214 { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
6215 { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
6216 { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
6217 { OP_ANYCHAR, "anychar", ARG_NON },
6218 { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
6219 { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
6220 { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
6221 { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
6222 { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
6223 { OP_WORD, "word", ARG_NON },
6224 { OP_NOT_WORD, "not-word", ARG_NON },
6225 { OP_WORD_BOUND, "word-bound", ARG_NON },
6226 { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
6227 { OP_WORD_BEGIN, "word-begin", ARG_NON },
6228 { OP_WORD_END, "word-end", ARG_NON },
6229 { OP_ASCII_WORD, "ascii-word", ARG_NON },
6230 { OP_NOT_ASCII_WORD, "not-ascii-word", ARG_NON },
6231 { OP_ASCII_WORD_BOUND, "ascii-word-bound", ARG_NON },
6232 { OP_NOT_ASCII_WORD_BOUND,"not-ascii-word-bound", ARG_NON },
6233 { OP_ASCII_WORD_BEGIN, "ascii-word-begin", ARG_NON },
6234 { OP_ASCII_WORD_END, "ascii-word-end", ARG_NON },
6235 { OP_BEGIN_BUF, "begin-buf", ARG_NON },
6236 { OP_END_BUF, "end-buf", ARG_NON },
6237 { OP_BEGIN_LINE, "begin-line", ARG_NON },
6238 { OP_END_LINE, "end-line", ARG_NON },
6239 { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
6240 { OP_BEGIN_POSITION, "begin-position", ARG_NON },
6241 { OP_BACKREF1, "backref1", ARG_NON },
6242 { OP_BACKREF2, "backref2", ARG_NON },
6243 { OP_BACKREFN, "backrefn", ARG_MEMNUM },
6244 { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
6245 { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
6246 { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
6247 { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL },
6248 { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
6249 { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
6250 { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
6251 { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
6252 { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
6253 { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
6254 { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
6255 { OP_SET_OPTION, "set-option", ARG_OPTION },
6256 { OP_KEEP, "keep", ARG_NON },
6257 { OP_FAIL, "fail", ARG_NON },
6258 { OP_JUMP, "jump", ARG_RELADDR },
6259 { OP_PUSH, "push", ARG_RELADDR },
6260 { OP_POP, "pop", ARG_NON },
6261 { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
6262 { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
6263 { OP_REPEAT, "repeat", ARG_SPECIAL },
6264 { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
6265 { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
6266 { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
6267 { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
6268 { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
6269 { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
6270 { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
6271 { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
6272 { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
6273 { OP_PUSH_POS, "push-pos", ARG_NON },
6274 { OP_POP_POS, "pop-pos", ARG_NON },
6275 { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
6276 { OP_FAIL_POS, "fail-pos", ARG_NON },
6277 { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
6278 { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
6279 { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
6280 { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
6281 { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
6282 { OP_PUSH_ABSENT_POS, "push-absent-pos", ARG_NON },
6283 { OP_ABSENT, "absent", ARG_RELADDR },
6284 { OP_ABSENT_END, "absent-end", ARG_NON },
6285 { OP_CALL, "call", ARG_ABSADDR },
6286 { OP_RETURN, "return", ARG_NON },
6287 { OP_CONDITION, "condition", ARG_SPECIAL },
6288 { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
6289 { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
6290 { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
6291 { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
6292 { OP_STATE_CHECK_ANYCHAR_ML_STAR,
6293 "state-check-anychar-ml*", ARG_STATE_CHECK },
6294 { -1, "", ARG_NON }
6295};
6296
6297static const char*
6298op2name(int opcode)
6299{
6300 int i;
6301
6302 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6303 if (opcode == OnigOpInfo[i].opcode)
6304 return OnigOpInfo[i].name;
6305 }
6306 return "";
6307}
6308
6309static int
6310op2arg_type(int opcode)
6311{
6312 int i;
6313
6314 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6315 if (opcode == OnigOpInfo[i].opcode)
6316 return OnigOpInfo[i].arg_type;
6317 }
6318 return ARG_SPECIAL;
6319}
6320
6321# ifdef ONIG_DEBUG_PARSE_TREE
6322static void
6323Indent(FILE* f, int indent)
6324{
6325 int i;
6326 for (i = 0; i < indent; i++) putc(' ', f);
6327}
6328# endif /* ONIG_DEBUG_PARSE_TREE */
6329
6330static void
6331p_string(FILE* f, ptrdiff_t len, UChar* s)
6332{
6333 fputs(":", f);
6334 while (len-- > 0) { fputc(*s++, f); }
6335}
6336
6337static void
6338p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
6339{
6340 int x = len * mb_len;
6341
6342 fprintf(f, ":%d:", len);
6343 while (x-- > 0) { fputc(*s++, f); }
6344}
6345
6346extern void
6347onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
6348 OnigEncoding enc)
6349{
6350 int i, n, arg_type;
6351 RelAddrType addr;
6352 LengthType len;
6353 MemNumType mem;
6354 StateCheckNumType scn;
6355 OnigCodePoint code;
6356 UChar *q;
6357
6358 fprintf(f, "[%s", op2name(*bp));
6359 arg_type = op2arg_type(*bp);
6360 if (arg_type != ARG_SPECIAL) {
6361 bp++;
6362 switch (arg_type) {
6363 case ARG_NON:
6364 break;
6365 case ARG_RELADDR:
6366 GET_RELADDR_INC(addr, bp);
6367 fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
6368 break;
6369 case ARG_ABSADDR:
6370 GET_ABSADDR_INC(addr, bp);
6371 fprintf(f, ":(%d)", addr);
6372 break;
6373 case ARG_LENGTH:
6374 GET_LENGTH_INC(len, bp);
6375 fprintf(f, ":%d", len);
6376 break;
6377 case ARG_MEMNUM:
6378 mem = *((MemNumType* )bp);
6379 bp += SIZE_MEMNUM;
6380 fprintf(f, ":%d", mem);
6381 break;
6382 case ARG_OPTION:
6383 {
6384 OnigOptionType option = *((OnigOptionType* )bp);
6385 bp += SIZE_OPTION;
6386 fprintf(f, ":%d", option);
6387 }
6388 break;
6389
6390 case ARG_STATE_CHECK:
6391 scn = *((StateCheckNumType* )bp);
6392 bp += SIZE_STATE_CHECK_NUM;
6393 fprintf(f, ":%d", scn);
6394 break;
6395 }
6396 }
6397 else {
6398 switch (*bp++) {
6399 case OP_EXACT1:
6400 case OP_ANYCHAR_STAR_PEEK_NEXT:
6401 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
6402 p_string(f, 1, bp++); break;
6403 case OP_EXACT2:
6404 p_string(f, 2, bp); bp += 2; break;
6405 case OP_EXACT3:
6406 p_string(f, 3, bp); bp += 3; break;
6407 case OP_EXACT4:
6408 p_string(f, 4, bp); bp += 4; break;
6409 case OP_EXACT5:
6410 p_string(f, 5, bp); bp += 5; break;
6411 case OP_EXACTN:
6412 GET_LENGTH_INC(len, bp);
6413 p_len_string(f, len, 1, bp);
6414 bp += len;
6415 break;
6416
6417 case OP_EXACTMB2N1:
6418 p_string(f, 2, bp); bp += 2; break;
6419 case OP_EXACTMB2N2:
6420 p_string(f, 4, bp); bp += 4; break;
6421 case OP_EXACTMB2N3:
6422 p_string(f, 6, bp); bp += 6; break;
6423 case OP_EXACTMB2N:
6424 GET_LENGTH_INC(len, bp);
6425 p_len_string(f, len, 2, bp);
6426 bp += len * 2;
6427 break;
6428 case OP_EXACTMB3N:
6429 GET_LENGTH_INC(len, bp);
6430 p_len_string(f, len, 3, bp);
6431 bp += len * 3;
6432 break;
6433 case OP_EXACTMBN:
6434 {
6435 int mb_len;
6436
6437 GET_LENGTH_INC(mb_len, bp);
6438 GET_LENGTH_INC(len, bp);
6439 fprintf(f, ":%d:%d:", mb_len, len);
6440 n = len * mb_len;
6441 while (n-- > 0) { fputc(*bp++, f); }
6442 }
6443 break;
6444
6445 case OP_EXACT1_IC:
6446 len = enclen(enc, bp, bpend);
6447 p_string(f, len, bp);
6448 bp += len;
6449 break;
6450 case OP_EXACTN_IC:
6451 GET_LENGTH_INC(len, bp);
6452 p_len_string(f, len, 1, bp);
6453 bp += len;
6454 break;
6455
6456 case OP_CCLASS:
6457 n = bitset_on_num((BitSetRef )bp);
6458 bp += SIZE_BITSET;
6459 fprintf(f, ":%d", n);
6460 break;
6461
6462 case OP_CCLASS_NOT:
6463 n = bitset_on_num((BitSetRef )bp);
6464 bp += SIZE_BITSET;
6465 fprintf(f, ":%d", n);
6466 break;
6467
6468 case OP_CCLASS_MB:
6469 case OP_CCLASS_MB_NOT:
6470 GET_LENGTH_INC(len, bp);
6471 q = bp;
6472# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6473 ALIGNMENT_RIGHT(q);
6474# endif
6475 GET_CODE_POINT(code, q);
6476 bp += len;
6477 fprintf(f, ":%d:%d", (int )code, len);
6478 break;
6479
6480 case OP_CCLASS_MIX:
6481 case OP_CCLASS_MIX_NOT:
6482 n = bitset_on_num((BitSetRef )bp);
6483 bp += SIZE_BITSET;
6484 GET_LENGTH_INC(len, bp);
6485 q = bp;
6486# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6487 ALIGNMENT_RIGHT(q);
6488# endif
6489 GET_CODE_POINT(code, q);
6490 bp += len;
6491 fprintf(f, ":%d:%d:%d", n, (int )code, len);
6492 break;
6493
6494 case OP_BACKREFN_IC:
6495 mem = *((MemNumType* )bp);
6496 bp += SIZE_MEMNUM;
6497 fprintf(f, ":%d", mem);
6498 break;
6499
6500 case OP_BACKREF_MULTI_IC:
6501 case OP_BACKREF_MULTI:
6502 fputs(" ", f);
6503 GET_LENGTH_INC(len, bp);
6504 for (i = 0; i < len; i++) {
6505 GET_MEMNUM_INC(mem, bp);
6506 if (i > 0) fputs(", ", f);
6507 fprintf(f, "%d", mem);
6508 }
6509 break;
6510
6511 case OP_BACKREF_WITH_LEVEL:
6512 {
6513 OnigOptionType option;
6514 LengthType level;
6515
6516 GET_OPTION_INC(option, bp);
6517 fprintf(f, ":%d", option);
6518 GET_LENGTH_INC(level, bp);
6519 fprintf(f, ":%d", level);
6520
6521 fputs(" ", f);
6522 GET_LENGTH_INC(len, bp);
6523 for (i = 0; i < len; i++) {
6524 GET_MEMNUM_INC(mem, bp);
6525 if (i > 0) fputs(", ", f);
6526 fprintf(f, "%d", mem);
6527 }
6528 }
6529 break;
6530
6531 case OP_REPEAT:
6532 case OP_REPEAT_NG:
6533 {
6534 mem = *((MemNumType* )bp);
6535 bp += SIZE_MEMNUM;
6536 addr = *((RelAddrType* )bp);
6537 bp += SIZE_RELADDR;
6538 fprintf(f, ":%d:%d", mem, addr);
6539 }
6540 break;
6541
6542 case OP_PUSH_OR_JUMP_EXACT1:
6543 case OP_PUSH_IF_PEEK_NEXT:
6544 addr = *((RelAddrType* )bp);
6545 bp += SIZE_RELADDR;
6546 fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
6547 p_string(f, 1, bp);
6548 bp += 1;
6549 break;
6550
6551 case OP_LOOK_BEHIND:
6552 GET_LENGTH_INC(len, bp);
6553 fprintf(f, ":%d", len);
6554 break;
6555
6556 case OP_PUSH_LOOK_BEHIND_NOT:
6557 GET_RELADDR_INC(addr, bp);
6558 GET_LENGTH_INC(len, bp);
6559 fprintf(f, ":%d:(%s%d)", len, (addr >= 0) ? "+" : "", addr);
6560 break;
6561
6562 case OP_STATE_CHECK_PUSH:
6563 case OP_STATE_CHECK_PUSH_OR_JUMP:
6564 scn = *((StateCheckNumType* )bp);
6565 bp += SIZE_STATE_CHECK_NUM;
6566 addr = *((RelAddrType* )bp);
6567 bp += SIZE_RELADDR;
6568 fprintf(f, ":%d:(%s%d)", scn, (addr >= 0) ? "+" : "", addr);
6569 break;
6570
6571 case OP_CONDITION:
6572 GET_MEMNUM_INC(mem, bp);
6573 GET_RELADDR_INC(addr, bp);
6574 fprintf(f, ":%d:(%s%d)", mem, (addr >= 0) ? "+" : "", addr);
6575 break;
6576
6577 default:
6578 fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
6579 bp[-1]);
6580 }
6581 }
6582 fputs("]", f);
6583 if (nextp) *nextp = bp;
6584}
6585
6586# ifdef ONIG_DEBUG_COMPILE
6587static void
6588print_compiled_byte_code_list(FILE* f, regex_t* reg)
6589{
6590 int ncode;
6591 UChar* bp = reg->p;
6592 UChar* end = reg->p + reg->used;
6593
6594 fprintf(f, "code length: %d", reg->used);
6595
6596 ncode = -1;
6597 while (bp < end) {
6598 ncode++;
6599 if (ncode % 5 == 0)
6600 fprintf(f, "\n%ld:", bp - reg->p);
6601 else
6602 fprintf(f, " %ld:", bp - reg->p);
6603 onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
6604 }
6605
6606 fprintf(f, "\n");
6607}
6608# endif /* ONIG_DEBUG_COMPILE */
6609
6610# ifdef ONIG_DEBUG_PARSE_TREE
6611static void
6612print_indent_tree(FILE* f, Node* node, int indent)
6613{
6614 int i, type, container_p = 0;
6615 int add = 3;
6616 UChar* p;
6617
6618 Indent(f, indent);
6619 if (IS_NULL(node)) {
6620 fprintf(f, "ERROR: null node!!!\n");
6621 exit (0);
6622 }
6623
6624 type = NTYPE(node);
6625 switch (type) {
6626 case NT_LIST:
6627 case NT_ALT:
6628 if (NTYPE(node) == NT_LIST)
6629 fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t )node);
6630 else
6631 fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t )node);
6632
6633 print_indent_tree(f, NCAR(node), indent + add);
6634 while (IS_NOT_NULL(node = NCDR(node))) {
6635 if (NTYPE(node) != type) {
6636 fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
6637 exit(0);
6638 }
6639 print_indent_tree(f, NCAR(node), indent + add);
6640 }
6641 break;
6642
6643 case NT_STR:
6644 fprintf(f, "<string%s:%"PRIxPTR">",
6645 (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t )node);
6646 for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
6647 if (*p >= 0x20 && *p < 0x7f)
6648 fputc(*p, f);
6649 else {
6650 fprintf(f, " 0x%02x", *p);
6651 }
6652 }
6653 break;
6654
6655 case NT_CCLASS:
6656 fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t )node);
6657 if (IS_NCCLASS_NOT(NCCLASS(node))) fputs("not ", f);
6658 if (NCCLASS(node)->mbuf) {
6659 BBuf* bbuf = NCCLASS(node)->mbuf;
6660 OnigCodePoint* data = (OnigCodePoint* )bbuf->p;
6661 OnigCodePoint* end = (OnigCodePoint* )(bbuf->p + bbuf->used);
6662 fprintf(f, "%d", *data++);
6663 for (; data < end; data+=2) {
6664 fprintf(f, ",");
6665 fprintf(f, "%04x-%04x", data[0], data[1]);
6666 }
6667 }
6668 break;
6669
6670 case NT_CTYPE:
6671 fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t )node);
6672 switch (NCTYPE(node)->ctype) {
6673 case ONIGENC_CTYPE_WORD:
6674 if (NCTYPE(node)->not != 0)
6675 fputs("not word", f);
6676 else
6677 fputs("word", f);
6678 break;
6679
6680 default:
6681 fprintf(f, "ERROR: undefined ctype.\n");
6682 exit(0);
6683 }
6684 break;
6685
6686 case NT_CANY:
6687 fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t )node);
6688 break;
6689
6690 case NT_ANCHOR:
6691 fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t )node);
6692 switch (NANCHOR(node)->type) {
6693 case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
6694 case ANCHOR_END_BUF: fputs("end buf", f); break;
6695 case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
6696 case ANCHOR_END_LINE: fputs("end line", f); break;
6697 case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
6698 case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
6699
6700 case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
6701 case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
6702# ifdef USE_WORD_BEGIN_END
6703 case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
6704 case ANCHOR_WORD_END: fputs("word end", f); break;
6705# endif
6706 case ANCHOR_PREC_READ: fputs("prec read", f); container_p = TRUE; break;
6707 case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); container_p = TRUE; break;
6708 case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); container_p = TRUE; break;
6709 case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break;
6710 case ANCHOR_KEEP: fputs("keep",f); break;
6711
6712 default:
6713 fprintf(f, "ERROR: undefined anchor type.\n");
6714 break;
6715 }
6716 break;
6717
6718 case NT_BREF:
6719 {
6720 int* p;
6721 BRefNode* br = NBREF(node);
6722 p = BACKREFS_P(br);
6723 fprintf(f, "<backref:%"PRIxPTR">", (intptr_t )node);
6724 for (i = 0; i < br->back_num; i++) {
6725 if (i > 0) fputs(", ", f);
6726 fprintf(f, "%d", p[i]);
6727 }
6728 }
6729 break;
6730
6731# ifdef USE_SUBEXP_CALL
6732 case NT_CALL:
6733 {
6734 CallNode* cn = NCALL(node);
6735 fprintf(f, "<call:%"PRIxPTR">", (intptr_t )node);
6736 p_string(f, cn->name_end - cn->name, cn->name);
6737 }
6738 break;
6739# endif
6740
6741 case NT_QTFR:
6742 fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t )node,
6743 NQTFR(node)->lower, NQTFR(node)->upper,
6744 (NQTFR(node)->greedy ? "" : "?"));
6745 print_indent_tree(f, NQTFR(node)->target, indent + add);
6746 break;
6747
6748 case NT_ENCLOSE:
6749 fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t )node);
6750 switch (NENCLOSE(node)->type) {
6751 case ENCLOSE_OPTION:
6752 fprintf(f, "option:%d", NENCLOSE(node)->option);
6753 break;
6754 case ENCLOSE_MEMORY:
6755 fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
6756 break;
6757 case ENCLOSE_STOP_BACKTRACK:
6758 fprintf(f, "stop-bt");
6759 break;
6760 case ENCLOSE_CONDITION:
6761 fprintf(f, "condition:%d", NENCLOSE(node)->regnum);
6762 break;
6763 case ENCLOSE_ABSENT:
6764 fprintf(f, "absent");
6765 break;
6766
6767 default:
6768 break;
6769 }
6770 fprintf(f, "\n");
6771 print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6772 break;
6773
6774 default:
6775 fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
6776 break;
6777 }
6778
6779 if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
6780 type != NT_ENCLOSE)
6781 fprintf(f, "\n");
6782
6783 if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
6784
6785 fflush(f);
6786}
6787
6788static void
6789print_tree(FILE* f, Node* node)
6790{
6791 print_indent_tree(f, node, 0);
6792}
6793# endif /* ONIG_DEBUG_PARSE_TREE */
6794#endif /* ONIG_DEBUG */
#define xfree
Old name of ruby_xfree.
Definition xmalloc.h:58
#define xrealloc
Old name of ruby_xrealloc.
Definition xmalloc.h:56
#define xmalloc
Old name of ruby_xmalloc.
Definition xmalloc.h:53
int len
Length of the buffer.
Definition io.h:8
VALUE type(ANYARGS)
ANYARGS-ed function type.