1 /* Definitions for data structures and routines for the regular
2 expression library, version 0.12.
3 Copyright (C) 1985,89,90,91,92,93,95,96,97,98 Free Software Foundation, Inc.
4
5 This file is part of the GNU C Library. Its master source is NOT part of
6 the C library, however. The master source lives in /gd/gnu/lib.
7
8 The GNU C Library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Library General Public License as
10 published by the Free Software Foundation; either version 2 of the
11 License, or (at your option) any later version.
12
13 The GNU C Library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Library General Public License for more details.
17
18 You should have received a copy of the GNU Library General Public
19 License along with the GNU C Library; see the file LGPL. If not,
20 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
22 /* Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto)
23 Last change: May 21, 1993 by t^2 */
24 /* modified for Ruby by matz@netlab.co.jp */
25
26 #ifndef REGEX_H
27 #define REGEX_H
28
29 /* symbol mangling for ruby */
30 #ifdef RUBY
31 # define re_adjust_startpos ruby_re_adjust_startpos
32 # define re_compile_fastmap ruby_re_compile_fastmap
33 # define re_compile_pattern ruby_re_compile_pattern
34 # define re_copy_registers ruby_re_copy_registers
35 # define re_free_pattern ruby_re_free_pattern
36 # define re_free_registers ruby_re_free_registers
37 # define re_match ruby_re_match
38 # define re_mbcinit ruby_re_mbcinit
39 # define re_search ruby_re_search
40 # define re_set_casetable ruby_re_set_casetable
41 # define register_info_type ruby_register_info_type
42 #endif
43
44 #include <stddef.h>
45
46 /* Define number of parens for which we record the beginnings and ends.
47 This affects how much space the `struct re_registers' type takes up. */
48 #ifndef RE_NREGS
49 #define RE_NREGS 10
50 #endif
51
52 #define BYTEWIDTH 8
53
54 #define RE_REG_MAX ((1<<BYTEWIDTH)-1)
55
56 /* Maximum number of duplicates an interval can allow. */
57 #ifndef RE_DUP_MAX
58 #define RE_DUP_MAX ((1 << 15) - 1)
59 #endif
60
61
62 /* If this bit is set, then character classes are supported; they are:
63 [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
64 [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
65 If not set, then character classes are not supported. */
66 #define RE_CHAR_CLASSES (1L << 9)
67
68 /* match will be done case insensetively */
69 #define RE_OPTION_IGNORECASE (1L)
70 /* perl-style extended pattern available */
71 #define RE_OPTION_EXTENDED (RE_OPTION_IGNORECASE<<1)
72 /* newline will be included for . */
73 #define RE_OPTION_MULTILINE (RE_OPTION_EXTENDED<<1)
74 /* ^ and $ ignore newline */
75 #define RE_OPTION_SINGLELINE (RE_OPTION_MULTILINE<<1)
76 /* search for longest match, in accord with POSIX regexp */
77 #define RE_OPTION_LONGEST (RE_OPTION_SINGLELINE<<1)
78
79 #define RE_MAY_IGNORECASE (RE_OPTION_LONGEST<<1)
80 #define RE_OPTIMIZE_ANCHOR (RE_MAY_IGNORECASE<<1)
81 #define RE_OPTIMIZE_EXACTN (RE_OPTIMIZE_ANCHOR<<1)
82 #define RE_OPTIMIZE_NO_BM (RE_OPTIMIZE_EXACTN<<1)
83 #define RE_OPTIMIZE_BMATCH (RE_OPTIMIZE_NO_BM<<1)
84
85 /* For multi-byte char support */
86 #define MBCTYPE_ASCII 0
87 #define MBCTYPE_EUC 1
88 #define MBCTYPE_SJIS 2
89 #define MBCTYPE_UTF8 3
90
91 #if defined IMPORT || defined USEIMPORTLIB
92 extern __declspec(dllimport)
93 #elif defined EXPORT
94 extern __declspec(dllexport)
95 #else
96 extern
97 #endif
98 const unsigned char *re_mbctab;
99 #if defined(__STDC__)
100 void re_mbcinit (int);
101 #else
102 void re_mbcinit ();
103 #endif
104
105 #undef ismbchar
106 #define ismbchar(c) re_mbctab[(unsigned char)(c)]
107 #define mbclen(c) (re_mbctab[(unsigned char)(c)]+1)
108
109 /* Structure used in re_match() */
110
111 typedef union
112 {
113 unsigned char *word;
114 struct {
115 unsigned is_active : 1;
116 unsigned matched_something : 1;
117 } bits;
118 } register_info_type;
119
120 /* This data structure is used to represent a compiled pattern. */
121
122 struct re_pattern_buffer
123 {
124 char *buffer; /* Space holding the compiled pattern commands. */
125 int allocated; /* Size of space that `buffer' points to. */
126 int used; /* Length of portion of buffer actually occupied */
127 char *fastmap; /* Pointer to fastmap, if any, or zero if none. */
128 /* re_search uses the fastmap, if there is one,
129 to skip over totally implausible characters. */
130 char *must; /* Pointer to exact pattern which strings should have
131 to be matched. */
132 int *must_skip; /* Pointer to exact pattern skip table for bm_search */
133 long options; /* Flags for options such as extended_pattern. */
134 long re_nsub; /* Number of subexpressions found by the compiler. */
135 char fastmap_accurate;
136 /* Set to zero when a new pattern is stored,
137 set to one when the fastmap is updated from it. */
138 char can_be_null; /* Set to one by compiling fastmap
139 if this pattern might match the null string.
140 It does not necessarily match the null string
141 in that case, but if this is zero, it cannot.
142 2 as value means can match null string
143 but at end of range or before a character
144 listed in the fastmap. */
145
146 /* stack & working area for re_match() */
147 unsigned char **regstart;
148 unsigned char **regend;
149 unsigned char **old_regstart;
150 unsigned char **old_regend;
151 register_info_type *reg_info;
152 unsigned char **best_regstart;
153 unsigned char **best_regend;
154 };
155
156 typedef struct re_pattern_buffer regex_t;
157
158 /* Structure to store register contents data in.
159
160 Pass the address of such a structure as an argument to re_match, etc.,
161 if you want this information back.
162
163 For i from 1 to RE_NREGS - 1, start[i] records the starting index in
164 the string of where the ith subexpression matched, and end[i] records
165 one after the ending index. start[0] and end[0] are analogous, for
166 the entire pattern. */
167
168 struct re_registers
169 {
170 int allocated;
171 int num_regs;
172 int *beg;
173 int *end;
174 };
175
176 /* Type for byte offsets within the string. POSIX mandates this. */
177 typedef size_t regoff_t;
178
179 /* POSIX specification for registers. Aside from the different names than
180 `re_registers', POSIX uses an array of structures, instead of a
181 structure of arrays. */
182 typedef struct
183 {
184 regoff_t rm_so; /* Byte offset from string's start to substring's start. */
185 regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
186 } regmatch_t;
187
188 #ifdef __STDC__
189
190 extern char *re_compile_pattern (const char *, int, struct re_pattern_buffer *);
191 void re_free_pattern (struct re_pattern_buffer *);
192 /* Is this really advertised? */
193 extern int re_adjust_startpos (struct re_pattern_buffer *, const char*, int, int, int);
194 extern void re_compile_fastmap (struct re_pattern_buffer *);
195 extern int re_search (struct re_pattern_buffer *, const char*, int, int, int,
196 struct re_registers *);
197 extern int re_match (struct re_pattern_buffer *, const char *, int, int,
198 struct re_registers *);
199 extern void re_set_casetable (const char *table);
200 extern void re_copy_registers (struct re_registers*, struct re_registers*);
201 extern void re_free_registers (struct re_registers*);
202
203 #ifndef RUBY
204 /* 4.2 bsd compatibility. */
205 extern char *re_comp (const char *);
206 extern int re_exec (const char *);
207 #endif
208
209 #else /* !__STDC__ */
210
211 extern char *re_compile_pattern ();
212 void re_free_regexp ();
213 /* Is this really advertised? */
214 extern int re_adjust_startpos ();
215 extern void re_compile_fastmap ();
216 extern int re_search ();
217 extern int re_match ();
218 extern void re_set_casetable ();
219 extern void re_copy_registers ();
220 extern void re_free_registers ();
221
222 #endif /* __STDC__ */
223
224 #endif /* !REGEX_H */