Ruby  1.9.3p392(2013-02-22revision39386)
oniguruma.h
Go to the documentation of this file.
1 #ifndef ONIGURUMA_H
2 #define ONIGURUMA_H
3 /**********************************************************************
4  oniguruma.h - Oniguruma (regular expression library)
5 **********************************************************************/
6 /*-
7  * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  * notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in the
17  * documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #ifdef __cplusplus
33 extern "C" {
34 #if 0
35 } /* satisfy cc-mode */
36 #endif
37 #endif
38 
39 #define ONIGURUMA
40 #define ONIGURUMA_VERSION_MAJOR 5
41 #define ONIGURUMA_VERSION_MINOR 9
42 #define ONIGURUMA_VERSION_TEENY 2
43 
44 #ifdef __cplusplus
45 # ifndef HAVE_PROTOTYPES
46 # define HAVE_PROTOTYPES 1
47 # endif
48 # ifndef HAVE_STDARG_PROTOTYPES
49 # define HAVE_STDARG_PROTOTYPES 1
50 # endif
51 #endif
52 
53 /* escape Mac OS X/Xcode 2.4/gcc 4.0.1 problem */
54 #if defined(__APPLE__) && defined(__GNUC__) && __GNUC__ >= 4
55 # ifndef HAVE_STDARG_PROTOTYPES
56 # define HAVE_STDARG_PROTOTYPES 1
57 # endif
58 #endif
59 
60 #ifdef HAVE_STDARG_H
61 # ifndef HAVE_STDARG_PROTOTYPES
62 # define HAVE_STDARG_PROTOTYPES 1
63 # endif
64 #endif
65 
66 #ifndef P_
67 #if defined(__STDC__) || defined(_WIN32)
68 # define P_(args) args
69 #else
70 # define P_(args) ()
71 #endif
72 #endif
73 
74 #ifndef PV_
75 #ifdef HAVE_STDARG_PROTOTYPES
76 # define PV_(args) args
77 #else
78 # define PV_(args) ()
79 #endif
80 #endif
81 
82 #ifndef ONIG_EXTERN
83 #ifdef RUBY_EXTERN
84 #define ONIG_EXTERN RUBY_EXTERN
85 #else
86 #if defined(_WIN32) && !defined(__GNUC__)
87 #if defined(EXPORT) || defined(RUBY_EXPORT)
88 #define ONIG_EXTERN extern __declspec(dllexport)
89 #else
90 #define ONIG_EXTERN extern __declspec(dllimport)
91 #endif
92 #endif
93 #endif
94 #endif
95 
96 #ifndef ONIG_EXTERN
97 #define ONIG_EXTERN extern
98 #endif
99 
100 #if defined __GNUC__ && __GNUC__ >= 4
101 #pragma GCC visibility push(default)
102 #endif
103 
104 /* PART: character encoding */
105 
106 #ifndef ONIG_ESCAPE_UCHAR_COLLISION
107 #define UChar OnigUChar
108 #endif
109 
110 typedef unsigned char OnigUChar;
111 typedef unsigned int OnigCodePoint;
112 typedef unsigned int OnigCtype;
113 typedef size_t OnigDistance;
114 
115 #define ONIG_INFINITE_DISTANCE ~((OnigDistance )0)
116 
117 typedef unsigned int OnigCaseFoldType; /* case fold flag */
118 
120 
121 /* #define ONIGENC_CASE_FOLD_HIRAGANA_KATAKANA (1<<1) */
122 /* #define ONIGENC_CASE_FOLD_KATAKANA_WIDTH (1<<2) */
123 #define ONIGENC_CASE_FOLD_TURKISH_AZERI (1<<20)
124 #define INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR (1<<30)
125 
126 #define ONIGENC_CASE_FOLD_MIN INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR
127 #define ONIGENC_CASE_FOLD_DEFAULT OnigDefaultCaseFoldFlag
128 
129 
130 #define ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN 3
131 #define ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM 13
132 /* 13 => Unicode:0x1ffc */
133 
134 /* code range */
135 #define ONIGENC_CODE_RANGE_NUM(range) ((int )range[0])
136 #define ONIGENC_CODE_RANGE_FROM(range,i) range[((i)*2) + 1]
137 #define ONIGENC_CODE_RANGE_TO(range,i) range[((i)*2) + 2]
138 
139 typedef struct {
140  int byte_len; /* argument(original) character(s) byte length */
141  int code_len; /* number of code */
144 
145 typedef struct {
153 
154 typedef int (*OnigApplyAllCaseFoldFunc)(OnigCodePoint from, OnigCodePoint* to, int to_len, void* arg);
155 
156 typedef struct OnigEncodingTypeST {
157  int (*precise_mbc_enc_len)(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc);
158  const char* name;
161  int (*is_mbc_newline)(const OnigUChar* p, const OnigUChar* end, struct OnigEncodingTypeST* enc);
162  OnigCodePoint (*mbc_to_code)(const OnigUChar* p, const OnigUChar* end, struct OnigEncodingTypeST* enc);
165  int (*mbc_case_fold)(OnigCaseFoldType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, struct OnigEncodingTypeST* enc);
169  int (*is_code_ctype)(OnigCodePoint code, OnigCtype ctype, struct OnigEncodingTypeST* enc);
170  int (*get_ctype_code_range)(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[], struct OnigEncodingTypeST* enc);
171  OnigUChar* (*left_adjust_char_head)(const OnigUChar* start, const OnigUChar* p, const OnigUChar* end, struct OnigEncodingTypeST* enc);
172  int (*is_allowed_reverse_match)(const OnigUChar* p, const OnigUChar* end, struct OnigEncodingTypeST* enc);
175 
177 
179 
180 #define ONIG_ENCODING_ASCII (&OnigEncodingASCII)
181 
182 #define ONIG_ENCODING_UNDEF ((OnigEncoding )0)
183 
184 
185 /* work size */
186 #define ONIGENC_CODE_TO_MBC_MAXLEN 7
187 #define ONIGENC_MBC_CASE_FOLD_MAXLEN 18
188 /* 18: 6(max-byte) * 3(case-fold chars) */
189 
190 /* character types */
191 #define ONIGENC_CTYPE_NEWLINE 0
192 #define ONIGENC_CTYPE_ALPHA 1
193 #define ONIGENC_CTYPE_BLANK 2
194 #define ONIGENC_CTYPE_CNTRL 3
195 #define ONIGENC_CTYPE_DIGIT 4
196 #define ONIGENC_CTYPE_GRAPH 5
197 #define ONIGENC_CTYPE_LOWER 6
198 #define ONIGENC_CTYPE_PRINT 7
199 #define ONIGENC_CTYPE_PUNCT 8
200 #define ONIGENC_CTYPE_SPACE 9
201 #define ONIGENC_CTYPE_UPPER 10
202 #define ONIGENC_CTYPE_XDIGIT 11
203 #define ONIGENC_CTYPE_WORD 12
204 #define ONIGENC_CTYPE_ALNUM 13 /* alpha || digit */
205 #define ONIGENC_CTYPE_ASCII 14
206 #define ONIGENC_MAX_STD_CTYPE ONIGENC_CTYPE_ASCII
207 #define ONIGENC_CTYPE_SPECIAL_MASK 256
208 #define ONIGENC_CTYPE_S /* [\t\n\v\f\r\s] */ \
209  ONIGENC_CTYPE_SPECIAL_MASK | ONIGENC_CTYPE_SPACE
210 #define ONIGENC_CTYPE_D /* [0-9] */ \
211  ONIGENC_CTYPE_SPECIAL_MASK | ONIGENC_CTYPE_DIGIT
212 #define ONIGENC_CTYPE_W /* [0-9A-Za-z_] */ \
213  ONIGENC_CTYPE_SPECIAL_MASK | ONIGENC_CTYPE_WORD
214 #define ONIGENC_CTYPE_SPECIAL_P(ctype) ((ctype) & ONIGENC_CTYPE_SPECIAL_MASK)
215 
216 
217 #define onig_enc_len(enc,p,e) ONIGENC_MBC_ENC_LEN(enc, p, e)
218 
219 #define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF)
220 #define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1)
221 #define ONIGENC_IS_MBC_HEAD(enc,p,e) (ONIGENC_MBC_ENC_LEN(enc,p,e) != 1)
222 #define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128)
223 #define ONIGENC_IS_CODE_ASCII(code) ((code) < 128)
224 #define ONIGENC_IS_MBC_WORD(enc,s,end) \
225  ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE(enc,s,end))
226 
227 
228 #define ONIGENC_NAME(enc) ((enc)->name)
229 
230 #define ONIGENC_MBC_CASE_FOLD(enc,flag,pp,end,buf) \
231  (enc)->mbc_case_fold(flag,(const OnigUChar** )pp,end,buf,enc)
232 #define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
233  (enc)->is_allowed_reverse_match(s,end,enc)
234 #define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s,end) \
235  (enc)->left_adjust_char_head(start, s, end, enc)
236 #define ONIGENC_APPLY_ALL_CASE_FOLD(enc,case_fold_flag,f,arg) \
237  (enc)->apply_all_case_fold(case_fold_flag,f,arg,enc)
238 #define ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc,case_fold_flag,p,end,acs) \
239  (enc)->get_case_fold_codes_by_str(case_fold_flag,p,end,acs,enc)
240 #define ONIGENC_STEP_BACK(enc,start,s,end,n) \
241  onigenc_step_back((enc),(start),(s),(end),(n))
242 
243 #define ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n) (n)
244 #define ONIGENC_MBCLEN_CHARFOUND_P(r) (0 < (r))
245 #define ONIGENC_MBCLEN_CHARFOUND_LEN(r) (r)
246 
247 #define ONIGENC_CONSTRUCT_MBCLEN_INVALID() (-1)
248 #define ONIGENC_MBCLEN_INVALID_P(r) ((r) == -1)
249 
250 #define ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n) (-1-(n))
251 #define ONIGENC_MBCLEN_NEEDMORE_P(r) ((r) < -1)
252 #define ONIGENC_MBCLEN_NEEDMORE_LEN(r) (-1-(r))
253 
254 #define ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e) (enc)->precise_mbc_enc_len(p,e,enc)
255 
257 int onigenc_mbclen_approximate P_((const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc));
258 
259 #define ONIGENC_MBC_ENC_LEN(enc,p,e) onigenc_mbclen_approximate(p,e,enc)
260 #define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len)
261 #define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc)
262 #define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len)
263 #define ONIGENC_IS_MBC_NEWLINE(enc,p,end) (enc)->is_mbc_newline((p),(end),enc)
264 #define ONIGENC_MBC_TO_CODE(enc,p,end) (enc)->mbc_to_code((p),(end),enc)
265 #define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code,enc)
266 #define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf,enc)
267 #define ONIGENC_PROPERTY_NAME_TO_CTYPE(enc,p,end) \
268  (enc)->property_name_to_ctype(enc,p,end)
269 
270 #define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->is_code_ctype(code,ctype,enc)
271 
272 #define ONIGENC_IS_CODE_NEWLINE(enc,code) \
273  ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_NEWLINE)
274 #define ONIGENC_IS_CODE_GRAPH(enc,code) \
275  ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_GRAPH)
276 #define ONIGENC_IS_CODE_PRINT(enc,code) \
277  ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PRINT)
278 #define ONIGENC_IS_CODE_ALNUM(enc,code) \
279  ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALNUM)
280 #define ONIGENC_IS_CODE_ALPHA(enc,code) \
281  ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALPHA)
282 #define ONIGENC_IS_CODE_LOWER(enc,code) \
283  ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_LOWER)
284 #define ONIGENC_IS_CODE_UPPER(enc,code) \
285  ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_UPPER)
286 #define ONIGENC_IS_CODE_CNTRL(enc,code) \
287  ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_CNTRL)
288 #define ONIGENC_IS_CODE_PUNCT(enc,code) \
289  ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PUNCT)
290 #define ONIGENC_IS_CODE_SPACE(enc,code) \
291  ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_SPACE)
292 #define ONIGENC_IS_CODE_BLANK(enc,code) \
293  ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_BLANK)
294 #define ONIGENC_IS_CODE_DIGIT(enc,code) \
295  ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_DIGIT)
296 #define ONIGENC_IS_CODE_XDIGIT(enc,code) \
297  ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_XDIGIT)
298 #define ONIGENC_IS_CODE_WORD(enc,code) \
299  ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD)
300 
301 #define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbout,ranges) \
302  (enc)->get_ctype_code_range(ctype,sbout,ranges,enc)
303 
305 OnigUChar* onigenc_step_back P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end, int n));
306 
307 
308 /* encoding API */
310 int onigenc_init P_((void));
316 void onigenc_set_default_caseconv_table P_((const OnigUChar* table));
318 OnigUChar* onigenc_get_right_adjust_char_head_with_prev P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end, const OnigUChar** prev));
320 OnigUChar* onigenc_get_prev_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end));
322 OnigUChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end));
324 OnigUChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end));
326 int onigenc_strlen P_((OnigEncoding enc, const OnigUChar* p, const OnigUChar* end));
328 int onigenc_strlen_null P_((OnigEncoding enc, const OnigUChar* p));
331 
332 
333 
334 /* PART: regular expression */
335 
336 /* config parameters */
337 #define ONIG_NREGION 10
338 #define ONIG_MAX_BACKREF_NUM 1000
339 #define ONIG_MAX_REPEAT_NUM 100000
340 #define ONIG_MAX_MULTI_BYTE_RANGES_NUM 10000
341 /* constants */
342 #define ONIG_MAX_ERROR_MESSAGE_LEN 90
343 
344 typedef unsigned int OnigOptionType;
345 
346 #define ONIG_OPTION_DEFAULT ONIG_OPTION_NONE
347 
348 /* options */
349 #define ONIG_OPTION_NONE 0U
350 #define ONIG_OPTION_IGNORECASE 1U
351 #define ONIG_OPTION_EXTEND (ONIG_OPTION_IGNORECASE << 1)
352 #define ONIG_OPTION_MULTILINE (ONIG_OPTION_EXTEND << 1)
353 #define ONIG_OPTION_SINGLELINE (ONIG_OPTION_MULTILINE << 1)
354 #define ONIG_OPTION_FIND_LONGEST (ONIG_OPTION_SINGLELINE << 1)
355 #define ONIG_OPTION_FIND_NOT_EMPTY (ONIG_OPTION_FIND_LONGEST << 1)
356 #define ONIG_OPTION_NEGATE_SINGLELINE (ONIG_OPTION_FIND_NOT_EMPTY << 1)
357 #define ONIG_OPTION_DONT_CAPTURE_GROUP (ONIG_OPTION_NEGATE_SINGLELINE << 1)
358 #define ONIG_OPTION_CAPTURE_GROUP (ONIG_OPTION_DONT_CAPTURE_GROUP << 1)
359 /* options (search time) */
360 #define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1)
361 #define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1)
362 #define ONIG_OPTION_POSIX_REGION (ONIG_OPTION_NOTEOL << 1)
363 #define ONIG_OPTION_MAXBIT ONIG_OPTION_POSIX_REGION /* limit */
364 
365 #define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt))
366 #define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt))
367 #define ONIG_IS_OPTION_ON(options,option) ((options) & (option))
368 
369 /* syntax */
370 typedef struct {
371  unsigned int op;
372  unsigned int op2;
373  unsigned int behavior;
374  OnigOptionType options; /* default option */
377 
388 
389 /* predefined syntaxes (see regsyntax.c) */
390 #define ONIG_SYNTAX_ASIS (&OnigSyntaxASIS)
391 #define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic)
392 #define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended)
393 #define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs)
394 #define ONIG_SYNTAX_GREP (&OnigSyntaxGrep)
395 #define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex)
396 #define ONIG_SYNTAX_JAVA (&OnigSyntaxJava)
397 #define ONIG_SYNTAX_PERL (&OnigSyntaxPerl)
398 #define ONIG_SYNTAX_PERL_NG (&OnigSyntaxPerl_NG)
399 #define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby)
400 
401 /* default syntax */
403 #define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax
404 
405 /* syntax (operators) */
406 #define ONIG_SYN_OP_VARIABLE_META_CHARACTERS (1U<<0)
407 #define ONIG_SYN_OP_DOT_ANYCHAR (1U<<1) /* . */
408 #define ONIG_SYN_OP_ASTERISK_ZERO_INF (1U<<2) /* * */
409 #define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF (1U<<3)
410 #define ONIG_SYN_OP_PLUS_ONE_INF (1U<<4) /* + */
411 #define ONIG_SYN_OP_ESC_PLUS_ONE_INF (1U<<5)
412 #define ONIG_SYN_OP_QMARK_ZERO_ONE (1U<<6) /* ? */
413 #define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE (1U<<7)
414 #define ONIG_SYN_OP_BRACE_INTERVAL (1U<<8) /* {lower,upper} */
415 #define ONIG_SYN_OP_ESC_BRACE_INTERVAL (1U<<9) /* \{lower,upper\} */
416 #define ONIG_SYN_OP_VBAR_ALT (1U<<10) /* | */
417 #define ONIG_SYN_OP_ESC_VBAR_ALT (1U<<11) /* \| */
418 #define ONIG_SYN_OP_LPAREN_SUBEXP (1U<<12) /* (...) */
419 #define ONIG_SYN_OP_ESC_LPAREN_SUBEXP (1U<<13) /* \(...\) */
420 #define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (1U<<14) /* \A, \Z, \z */
421 #define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (1U<<15) /* \G */
422 #define ONIG_SYN_OP_DECIMAL_BACKREF (1U<<16) /* \num */
423 #define ONIG_SYN_OP_BRACKET_CC (1U<<17) /* [...] */
424 #define ONIG_SYN_OP_ESC_W_WORD (1U<<18) /* \w, \W */
425 #define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END (1U<<19) /* <. > */
426 #define ONIG_SYN_OP_ESC_B_WORD_BOUND (1U<<20) /* \b, \B */
427 #define ONIG_SYN_OP_ESC_S_WHITE_SPACE (1U<<21) /* \s, \S */
428 #define ONIG_SYN_OP_ESC_D_DIGIT (1U<<22) /* \d, \D */
429 #define ONIG_SYN_OP_LINE_ANCHOR (1U<<23) /* ^, $ */
430 #define ONIG_SYN_OP_POSIX_BRACKET (1U<<24) /* [:xxxx:] */
431 #define ONIG_SYN_OP_QMARK_NON_GREEDY (1U<<25) /* ??,*?,+?,{n,m}? */
432 #define ONIG_SYN_OP_ESC_CONTROL_CHARS (1U<<26) /* \n,\r,\t,\a ... */
433 #define ONIG_SYN_OP_ESC_C_CONTROL (1U<<27) /* \cx */
434 #define ONIG_SYN_OP_ESC_OCTAL3 (1U<<28) /* \OOO */
435 #define ONIG_SYN_OP_ESC_X_HEX2 (1U<<29) /* \xHH */
436 #define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1U<<30) /* \x{7HHHHHHH} */
437 
438 #define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1U<<0) /* \Q...\E */
439 #define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1U<<1) /* (?...) */
440 #define ONIG_SYN_OP2_OPTION_PERL (1U<<2) /* (?imsx),(?-imsx) */
441 #define ONIG_SYN_OP2_OPTION_RUBY (1U<<3) /* (?imx), (?-imx) */
442 #define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (1U<<4) /* ?+,*+,++ */
443 #define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL (1U<<5) /* {n,m}+ */
444 #define ONIG_SYN_OP2_CCLASS_SET_OP (1U<<6) /* [...&&..[..]..] */
445 #define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (1U<<7) /* (?<name>...) */
446 #define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (1U<<8) /* \k<name> */
447 #define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (1U<<9) /* \g<name>, \g<n> */
448 #define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY (1U<<10) /* (?@..),(?@<x>..) */
449 #define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1U<<11) /* \C-x */
450 #define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (1U<<12) /* \M-x */
451 #define ONIG_SYN_OP2_ESC_V_VTAB (1U<<13) /* \v as VTAB */
452 #define ONIG_SYN_OP2_ESC_U_HEX4 (1U<<14) /* \uHHHH */
453 #define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1U<<15) /* \`, \' */
454 #define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (1U<<16) /* \p{...}, \P{...} */
455 #define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1U<<17) /* \p{^..}, \P{^..} */
456 /* #define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1U<<18) */
457 #define ONIG_SYN_OP2_ESC_H_XDIGIT (1U<<19) /* \h, \H */
458 #define ONIG_SYN_OP2_INEFFECTIVE_ESCAPE (1U<<20) /* \ */
459 
460 /* syntax (behavior) */
461 #define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1U<<31) /* not implemented */
462 #define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (1U<<0) /* ?, *, +, {n,m} */
463 #define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (1U<<1) /* error or ignore */
464 #define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1U<<2) /* ...)... */
465 #define ONIG_SYN_ALLOW_INVALID_INTERVAL (1U<<3) /* {??? */
466 #define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (1U<<4) /* {,n} => {0,n} */
467 #define ONIG_SYN_STRICT_CHECK_BACKREF (1U<<5) /* /(\1)/,/\1()/ ..*/
468 #define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1U<<6) /* (?<=a|bc) */
469 #define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1U<<7) /* see doc/RE */
470 #define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1U<<8) /* (?<x>)(?<x>) */
471 #define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (1U<<9) /* a{n}?=(?:a{n})? */
472 
473 /* syntax (behavior) in char class [...] */
474 #define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1U<<20) /* [^...] */
475 #define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1U<<21) /* [..\w..] etc.. */
476 #define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1U<<22)
477 #define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1U<<23) /* [0-9-a]=[0-9\-a] */
478 /* syntax (behavior) warning */
479 #define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1U<<24) /* [,-,] */
480 #define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1U<<25) /* (?:a*)+ */
481 #define ONIG_SYN_WARN_CC_DUP (1U<<26) /* [aa] */
482 
483 /* meta character specifiers (onig_set_meta_char()) */
484 #define ONIG_META_CHAR_ESCAPE 0
485 #define ONIG_META_CHAR_ANYCHAR 1
486 #define ONIG_META_CHAR_ANYTIME 2
487 #define ONIG_META_CHAR_ZERO_OR_ONE_TIME 3
488 #define ONIG_META_CHAR_ONE_OR_MORE_TIME 4
489 #define ONIG_META_CHAR_ANYCHAR_ANYTIME 5
490 
491 #define ONIG_INEFFECTIVE_META_CHAR 0
492 
493 /* error codes */
494 #define ONIG_IS_PATTERN_ERROR(ecode) ((ecode) <= -100 && (ecode) > -1000)
495 /* normal return */
496 #define ONIG_NORMAL 0
497 #define ONIG_MISMATCH -1
498 #define ONIG_NO_SUPPORT_CONFIG -2
499 
500 /* internal error */
501 #define ONIGERR_MEMORY -5
502 #define ONIGERR_TYPE_BUG -6
503 #define ONIGERR_PARSER_BUG -11
504 #define ONIGERR_STACK_BUG -12
505 #define ONIGERR_UNDEFINED_BYTECODE -13
506 #define ONIGERR_UNEXPECTED_BYTECODE -14
507 #define ONIGERR_MATCH_STACK_LIMIT_OVER -15
508 #define ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED -21
509 #define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22
510 /* general error */
511 #define ONIGERR_INVALID_ARGUMENT -30
512 /* syntax error */
513 #define ONIGERR_END_PATTERN_AT_LEFT_BRACE -100
514 #define ONIGERR_END_PATTERN_AT_LEFT_BRACKET -101
515 #define ONIGERR_EMPTY_CHAR_CLASS -102
516 #define ONIGERR_PREMATURE_END_OF_CHAR_CLASS -103
517 #define ONIGERR_END_PATTERN_AT_ESCAPE -104
518 #define ONIGERR_END_PATTERN_AT_META -105
519 #define ONIGERR_END_PATTERN_AT_CONTROL -106
520 #define ONIGERR_META_CODE_SYNTAX -108
521 #define ONIGERR_CONTROL_CODE_SYNTAX -109
522 #define ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE -110
523 #define ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE -111
524 #define ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS -112
525 #define ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED -113
526 #define ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID -114
527 #define ONIGERR_NESTED_REPEAT_OPERATOR -115
528 #define ONIGERR_UNMATCHED_CLOSE_PARENTHESIS -116
529 #define ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS -117
530 #define ONIGERR_END_PATTERN_IN_GROUP -118
531 #define ONIGERR_UNDEFINED_GROUP_OPTION -119
532 #define ONIGERR_INVALID_POSIX_BRACKET_TYPE -121
533 #define ONIGERR_INVALID_LOOK_BEHIND_PATTERN -122
534 #define ONIGERR_INVALID_REPEAT_RANGE_PATTERN -123
535 /* values error (syntax error) */
536 #define ONIGERR_TOO_BIG_NUMBER -200
537 #define ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE -201
538 #define ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE -202
539 #define ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS -203
540 #define ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE -204
541 #define ONIGERR_TOO_MANY_MULTI_BYTE_RANGES -205
542 #define ONIGERR_TOO_SHORT_MULTI_BYTE_STRING -206
543 #define ONIGERR_TOO_BIG_BACKREF_NUMBER -207
544 #define ONIGERR_INVALID_BACKREF -208
545 #define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED -209
546 #define ONIGERR_TOO_LONG_WIDE_CHAR_VALUE -212
547 #define ONIGERR_EMPTY_GROUP_NAME -214
548 #define ONIGERR_INVALID_GROUP_NAME -215
549 #define ONIGERR_INVALID_CHAR_IN_GROUP_NAME -216
550 #define ONIGERR_UNDEFINED_NAME_REFERENCE -217
551 #define ONIGERR_UNDEFINED_GROUP_REFERENCE -218
552 #define ONIGERR_MULTIPLEX_DEFINED_NAME -219
553 #define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL -220
554 #define ONIGERR_NEVER_ENDING_RECURSION -221
555 #define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222
556 #define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223
557 #define ONIGERR_INVALID_CODE_POINT_VALUE -400
558 #define ONIGERR_INVALID_WIDE_CHAR_VALUE -400
559 #define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401
560 #define ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION -402
561 #define ONIGERR_INVALID_COMBINATION_OF_OPTIONS -403
562 
563 /* errors related to thread */
564 #define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001
565 
566 
567 /* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */
568 #define ONIG_MAX_CAPTURE_HISTORY_GROUP 31
569 #define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \
570  ((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i])
571 
573  int group; /* group number */
574  int beg;
575  int end;
580 
581 /* match result region type */
582 struct re_registers {
584  int num_regs;
585  int* beg;
586  int* end;
587  /* extended */
588  OnigCaptureTreeNode* history_root; /* capture history tree root */
589 };
590 
591 /* capture tree traverse */
592 #define ONIG_TRAVERSE_CALLBACK_AT_FIRST 1
593 #define ONIG_TRAVERSE_CALLBACK_AT_LAST 2
594 #define ONIG_TRAVERSE_CALLBACK_AT_BOTH \
595  ( ONIG_TRAVERSE_CALLBACK_AT_FIRST | ONIG_TRAVERSE_CALLBACK_AT_LAST )
596 
597 
598 #define ONIG_REGION_NOTPOS -1
599 
600 typedef struct re_registers OnigRegion;
601 
602 typedef struct {
606 } OnigErrorInfo;
607 
608 typedef struct {
609  int lower;
610  int upper;
612 
613 typedef void (*OnigWarnFunc) P_((const char* s));
614 extern void onig_null_warn P_((const char* s));
615 #define ONIG_NULL_WARN onig_null_warn
616 
617 #define ONIG_CHAR_TABLE_SIZE 256
618 
619 /* regex_t state */
620 #define ONIG_STATE_NORMAL 0
621 #define ONIG_STATE_SEARCHING 1
622 #define ONIG_STATE_COMPILING -1
623 #define ONIG_STATE_MODIFY -2
624 
625 #define ONIG_STATE(reg) \
626  ((reg)->state > 0 ? ONIG_STATE_SEARCHING : (reg)->state)
627 
628 typedef struct re_pattern_buffer {
629  /* common members of BBuf(bytes-buffer) */
630  unsigned char* p; /* compiled pattern */
631  unsigned int used; /* used space for p */
632  unsigned int alloc; /* allocated space for p */
633 
634  int state; /* normal, searching, compiling */
635  int num_mem; /* used memory(...) num counted from 1 */
636  int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
637  int num_null_check; /* OP_NULL_CHECK_START/END id counter */
638  int num_comb_exp_check; /* combination explosion check */
639  int num_call; /* number of subexp call */
640  unsigned int capture_history; /* (?@...) flag (1-31) */
641  unsigned int bt_mem_start; /* need backtrack flag */
642  unsigned int bt_mem_end; /* need backtrack flag */
646 
648  OnigOptionType options;
651  void* name_table;
652 
653  /* optimization info (string search, char-map and anchors) */
654  int optimize; /* optimize flag */
655  int threshold_len; /* search str-length for apply optimize */
656  int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
657  OnigDistance anchor_dmin; /* (SEMI_)END_BUF anchor distance */
658  OnigDistance anchor_dmax; /* (SEMI_)END_BUF anchor distance */
659  int sub_anchor; /* start-anchor for exact or map */
660  unsigned char *exact;
661  unsigned char *exact_end;
662  unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */
663  int *int_map; /* BM skip for exact_len > 255 */
664  int *int_map_backward; /* BM skip for backward search */
665  OnigDistance dmin; /* min-distance of exact or map */
666  OnigDistance dmax; /* max-distance of exact or map */
667 
668  /* regex_t link chain */
669  struct re_pattern_buffer* chain; /* escape compile-conflict */
670 } OnigRegexType;
671 
673 
674 #ifndef ONIG_ESCAPE_REGEX_T_COLLISION
676 #endif
677 
678 
679 typedef struct {
684  OnigOptionType option;
687 
688 /* Oniguruma Native API */
690 int onig_init P_((void));
692 int onig_error_code_to_str PV_((OnigUChar* s, int err_code, ...));
694 void onig_set_warn_func P_((OnigWarnFunc f));
696 void onig_set_verb_warn_func P_((OnigWarnFunc f));
698 int onig_new P_((OnigRegex*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax, OnigErrorInfo* einfo));
700 int onig_reg_init P_((regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, const OnigSyntaxType* syntax));
702 int onig_new_without_alloc P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
704 int onig_new_deluxe P_((OnigRegex* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
706 void onig_free P_((OnigRegex));
708 void onig_free_body P_((OnigRegex));
710 int onig_recompile P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
712 int onig_recompile_deluxe P_((OnigRegex reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
714 long onig_search P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option));
716 long onig_match P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option));
718 OnigRegion* onig_region_new P_((void));
720 void onig_region_init P_((OnigRegion* region));
722 void onig_region_free P_((OnigRegion* region, int free_self));
724 void onig_region_copy P_((OnigRegion* to, OnigRegion* from));
726 void onig_region_clear P_((OnigRegion* region));
728 int onig_region_resize P_((OnigRegion* region, int n));
730 int onig_region_set P_((OnigRegion* region, int at, int beg, int end));
732 int onig_name_to_group_numbers P_((OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, int** nums));
734 int onig_name_to_backref_number P_((OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, OnigRegion *region));
736 int onig_foreach_name P_((OnigRegex reg, int (*func)(const OnigUChar*, const OnigUChar*,int,int*,OnigRegex,void*), void* arg));
738 int onig_number_of_names P_((OnigRegex reg));
740 int onig_number_of_captures P_((OnigRegex reg));
742 int onig_number_of_capture_histories P_((OnigRegex reg));
744 OnigCaptureTreeNode* onig_get_capture_tree P_((OnigRegion* region));
746 int onig_capture_tree_traverse P_((OnigRegion* region, int at, int(*callback_func)(int,int,int,int,int,void*), void* arg));
748 int onig_noname_group_capture_is_active P_((OnigRegex reg));
750 OnigEncoding onig_get_encoding P_((OnigRegex reg));
752 OnigOptionType onig_get_options P_((OnigRegex reg));
756 const OnigSyntaxType* onig_get_syntax P_((OnigRegex reg));
758 int onig_set_default_syntax P_((const OnigSyntaxType* syntax));
760 void onig_copy_syntax P_((OnigSyntaxType* to, const OnigSyntaxType* from));
762 unsigned int onig_get_syntax_op P_((OnigSyntaxType* syntax));
764 unsigned int onig_get_syntax_op2 P_((OnigSyntaxType* syntax));
766 unsigned int onig_get_syntax_behavior P_((OnigSyntaxType* syntax));
768 OnigOptionType onig_get_syntax_options P_((OnigSyntaxType* syntax));
770 void onig_set_syntax_op P_((OnigSyntaxType* syntax, unsigned int op));
772 void onig_set_syntax_op2 P_((OnigSyntaxType* syntax, unsigned int op2));
774 void onig_set_syntax_behavior P_((OnigSyntaxType* syntax, unsigned int behavior));
776 void onig_set_syntax_options P_((OnigSyntaxType* syntax, OnigOptionType options));
778 int onig_set_meta_char P_((OnigSyntaxType* syntax, unsigned int what, OnigCodePoint code));
786 unsigned int onig_get_match_stack_limit_size P_((void));
788 int onig_set_match_stack_limit_size P_((unsigned int size));
790 int onig_end P_((void));
792 const char* onig_version P_((void));
794 const char* onig_copyright P_((void));
795 
796 #if defined __GNUC__ && __GNUC__ >= 4
797 #pragma GCC visibility pop
798 #endif
799 
800 #ifdef __cplusplus
801 #if 0
802 { /* satisfy cc-mode */
803 #endif
804 }
805 #endif
806 
807 #endif /* ONIGURUMA_H */
808