re.c
DEFINITIONS
This source file includes following functions.
- rb_memcicmp
- rb_memcmp
- kcode_euc
- kcode_sjis
- kcode_utf8
- kcode_none
- kcode_set_option
- kcode_reset_option
- rb_reg_mbclen2
- rb_reg_check
- rb_reg_expr_str
- rb_reg_desc
- rb_reg_source
- rb_reg_inspect
- rb_reg_to_s
- rb_reg_raise
- rb_reg_casefold_p
- rb_reg_options_m
- rb_reg_kcode_m
- make_regexp
- match_alloc
- match_clone
- match_become
- match_size
- match_offset
- match_begin
- match_end
- rb_match_busy
- rb_reg_prepare_re
- rb_reg_adjust_startpos
- rb_reg_search
- rb_reg_nth_defined
- rb_reg_nth_match
- rb_reg_last_match
- rb_reg_match_pre
- rb_reg_match_post
- rb_reg_match_last
- last_match_getter
- prematch_getter
- postmatch_getter
- last_paren_match_getter
- match_to_a
- match_aref
- match_select
- match_to_s
- match_string
- rb_reg_initialize
- rb_reg_s_alloc
- rb_reg_new
- rb_reg_regcomp
- rb_reg_cur_kcode
- rb_reg_equal
- rb_reg_match
- rb_reg_match2
- rb_reg_match_m
- rb_reg_initialize_m
- rb_reg_quote
- rb_reg_s_quote
- rb_kcode
- rb_reg_get_kcode
- rb_reg_options
- rb_reg_become
- rb_reg_regsub
- rb_get_kcode
- kcode_getter
- rb_set_kcode
- kcode_setter
- ignorecase_getter
- ignorecase_setter
- match_getter
- match_setter
- rb_reg_s_last_match
- Init_Regexp
1
2
3
4
5
6
7
8
9
10
11
12 #include "ruby.h"
13 #include "re.h"
14 #include <ctype.h>
15
16 static VALUE rb_eRegexpError;
17
18 #define BEG(no) regs->beg[no]
19 #define END(no) regs->end[no]
20
21 #if 'a' == 97
22 static const char casetable[] = {
23 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
24 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
25 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
26 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
27
28 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
29
30 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
31
32 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
33
34 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
35
36 '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
37
38 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
39
40 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
41
42 '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
43
44 '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
45
46 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
47
48 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
49
50 '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
51 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
52 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
53 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
54 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
55 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
56 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
57 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
58 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
59 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
60 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
61 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
62 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
63 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
64 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
65 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
66 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
67 };
68 #else
69 # error >>> "You lose. You will need a translation table for your character set." <<<
70 #endif
71
72 #define MIN(a,b) (((a)>(b))?(b):(a))
73
74 int
75 rb_memcicmp(p1, p2, len)
76 char *p1, *p2;
77 long len;
78 {
79 int tmp;
80
81 while (len--) {
82 if (tmp = casetable[(unsigned)*p1++] - casetable[(unsigned)*p2++])
83 return tmp;
84 }
85 return 0;
86 }
87
88 int
89 rb_memcmp(p1, p2, len)
90 char *p1, *p2;
91 long len;
92 {
93 if (!ruby_ignorecase) {
94 return memcmp(p1, p2, len);
95 }
96 return rb_memcicmp(p1, p2, len);
97 }
98
99 #define REG_CASESTATE FL_USER0
100 #define KCODE_NONE 0
101 #define KCODE_EUC FL_USER1
102 #define KCODE_SJIS FL_USER2
103 #define KCODE_UTF8 FL_USER3
104 #define KCODE_FIXED FL_USER4
105 #define KCODE_MASK (KCODE_EUC|KCODE_SJIS|KCODE_UTF8)
106
107 static int reg_kcode = DEFAULT_KCODE;
108
109 static void
110 kcode_euc(re)
111 struct RRegexp *re;
112 {
113 FL_UNSET(re, KCODE_MASK);
114 FL_SET(re, KCODE_EUC);
115 FL_SET(re, KCODE_FIXED);
116 }
117
118 static void
119 kcode_sjis(re)
120 struct RRegexp *re;
121 {
122 FL_UNSET(re, KCODE_MASK);
123 FL_SET(re, KCODE_SJIS);
124 FL_SET(re, KCODE_FIXED);
125 }
126
127 static void
128 kcode_utf8(re)
129 struct RRegexp *re;
130 {
131 FL_UNSET(re, KCODE_MASK);
132 FL_SET(re, KCODE_UTF8);
133 FL_SET(re, KCODE_FIXED);
134 }
135
136 static void
137 kcode_none(re)
138 struct RRegexp *re;
139 {
140 FL_UNSET(re, KCODE_MASK);
141 FL_SET(re, KCODE_FIXED);
142 }
143
144 static int curr_kcode;
145
146 static void
147 kcode_set_option(re)
148 VALUE re;
149 {
150 if (!FL_TEST(re, KCODE_FIXED)) return;
151
152 curr_kcode = RBASIC(re)->flags & KCODE_MASK;
153 if (reg_kcode == curr_kcode) return;
154 switch (curr_kcode) {
155 case KCODE_NONE:
156 re_mbcinit(MBCTYPE_ASCII);
157 break;
158 case KCODE_EUC:
159 re_mbcinit(MBCTYPE_EUC);
160 break;
161 case KCODE_SJIS:
162 re_mbcinit(MBCTYPE_SJIS);
163 break;
164 case KCODE_UTF8:
165 re_mbcinit(MBCTYPE_UTF8);
166 break;
167 }
168 }
169
170 static void
171 kcode_reset_option()
172 {
173 if (reg_kcode == curr_kcode) return;
174 switch (reg_kcode) {
175 case KCODE_NONE:
176 re_mbcinit(MBCTYPE_ASCII);
177 break;
178 case KCODE_EUC:
179 re_mbcinit(MBCTYPE_EUC);
180 break;
181 case KCODE_SJIS:
182 re_mbcinit(MBCTYPE_SJIS);
183 break;
184 case KCODE_UTF8:
185 re_mbcinit(MBCTYPE_UTF8);
186 break;
187 }
188 }
189
190 int
191 rb_reg_mbclen2(c, re)
192 unsigned int c;
193 VALUE re;
194 {
195 int len;
196
197 if (!FL_TEST(re, KCODE_FIXED))
198 return mbclen(c);
199 kcode_set_option(re);
200 len = mbclen(c);
201 kcode_reset_option();
202 return len;
203 }
204
205 static void
206 rb_reg_check(re)
207 VALUE re;
208 {
209 if (!RREGEXP(re)->ptr || !RREGEXP(re)->str) {
210 rb_raise(rb_eTypeError, "uninitialized Regexp");
211 }
212 }
213
214 extern int ruby_in_compile;
215
216 static void
217 rb_reg_expr_str(str, s, len)
218 VALUE str;
219 const char *s;
220 long len;
221 {
222 const char *p, *pend;
223 int need_escape = 0;
224
225 p = s; pend = p + len;
226 while (p<pend) {
227 if (*p == '/' || (!ISPRINT(*p) && !ismbchar(*p))) {
228 need_escape = 1;
229 break;
230 }
231 p += mbclen(*p);
232 }
233 if (!need_escape) {
234 rb_str_buf_cat(str, s, len);
235 }
236 else {
237 p = s;
238 while (p<pend) {
239 if (*p == '\\') {
240 int n = mbclen(p[1]) + 1;
241 rb_str_buf_cat(str, p, n);
242 p += n;
243 continue;
244 }
245 else if (*p == '/') {
246 char c = '\\';
247 rb_str_buf_cat(str, &c, 1);
248 rb_str_buf_cat(str, p, 1);
249 }
250 else if (ismbchar(*p)) {
251 rb_str_buf_cat(str, p, mbclen(*p));
252 p += mbclen(*p);
253 continue;
254 }
255 else if (ISPRINT(*p)) {
256 rb_str_buf_cat(str, p, 1);
257 }
258 else if (!ISSPACE(*p)) {
259 char b[8];
260
261 sprintf(b, "\\%03o", *p & 0377);
262 rb_str_buf_cat(str, b, 4);
263 }
264 else {
265 rb_str_buf_cat(str, p, 1);
266 }
267 p++;
268 }
269 }
270 }
271
272 static VALUE
273 rb_reg_desc(s, len, re)
274 const char *s;
275 long len;
276 VALUE re;
277 {
278 VALUE str = rb_str_buf_new2("/");
279
280 rb_reg_expr_str(str, s, len);
281 rb_str_buf_cat2(str, "/");
282 if (re) {
283 rb_reg_check(re);
284 if (RREGEXP(re)->ptr->options & RE_OPTION_MULTILINE)
285 rb_str_buf_cat2(str, "m");
286 if (RREGEXP(re)->ptr->options & RE_OPTION_IGNORECASE)
287 rb_str_buf_cat2(str, "i");
288 if (RREGEXP(re)->ptr->options & RE_OPTION_EXTENDED)
289 rb_str_buf_cat2(str, "x");
290
291 if (FL_TEST(re, KCODE_FIXED)) {
292 switch ((RBASIC(re)->flags & KCODE_MASK)) {
293 case KCODE_NONE:
294 rb_str_buf_cat2(str, "n");
295 break;
296 case KCODE_EUC:
297 rb_str_buf_cat2(str, "e");
298 break;
299 case KCODE_SJIS:
300 rb_str_buf_cat2(str, "s");
301 break;
302 case KCODE_UTF8:
303 rb_str_buf_cat2(str, "u");
304 break;
305 }
306 }
307 }
308 OBJ_INFECT(str, re);
309 return str;
310 }
311
312 static VALUE
313 rb_reg_source(re)
314 VALUE re;
315 {
316 VALUE str;
317
318 rb_reg_check(re);
319 str = rb_str_new(RREGEXP(re)->str,RREGEXP(re)->len);
320 if (OBJ_TAINTED(re)) OBJ_TAINT(str);
321 return str;
322 }
323
324 static VALUE
325 rb_reg_inspect(re)
326 VALUE re;
327 {
328 rb_reg_check(re);
329 return rb_reg_desc(RREGEXP(re)->str, RREGEXP(re)->len, re);
330 }
331
332 static VALUE
333 rb_reg_to_s(re)
334 VALUE re;
335 {
336 int options;
337 const int embeddable = RE_OPTION_MULTILINE|RE_OPTION_IGNORECASE|RE_OPTION_EXTENDED;
338 long len;
339 const char* ptr;
340 VALUE str = rb_str_buf_new2("(?");
341
342 rb_reg_check(re);
343
344 options = RREGEXP(re)->ptr->options;
345 ptr = RREGEXP(re)->str;
346 len = RREGEXP(re)->len;
347 again:
348 if (len >= 4 && ptr[0] == '(' && ptr[1] == '?') {
349 int err = 1;
350 ptr += 2;
351 if ((len -= 2) > 0) {
352 do {
353 if (*ptr == 'm') {
354 options |= RE_OPTION_MULTILINE;
355 }
356 else if (*ptr == 'i') {
357 options |= RE_OPTION_IGNORECASE;
358 }
359 else if (*ptr == 'x') {
360 options |= RE_OPTION_EXTENDED;
361 }
362 else break;
363 ++ptr;
364 } while (--len > 0);
365 }
366 if (len > 1 && *ptr == '-') {
367 ++ptr;
368 --len;
369 do {
370 if (*ptr == 'm') {
371 options &= ~RE_OPTION_MULTILINE;
372 }
373 else if (*ptr == 'i') {
374 options &= ~RE_OPTION_IGNORECASE;
375 }
376 else if (*ptr == 'x') {
377 options &= ~RE_OPTION_EXTENDED;
378 }
379 else break;
380 ++ptr;
381 } while (--len > 0);
382 }
383 if (*ptr == ')') {
384 --len;
385 ++ptr;
386 goto again;
387 }
388 if (*ptr == ':' && ptr[len-1] == ')') {
389 Regexp *rp;
390 kcode_set_option(re);
391 rp = ALLOC(Regexp);
392 MEMZERO((char *)rp, Regexp, 1);
393 err = re_compile_pattern(++ptr, len -= 2, rp) != 0;
394 kcode_reset_option();
395 re_free_pattern(rp);
396 }
397 if (err) {
398 options = RREGEXP(re)->ptr->options;
399 ptr = RREGEXP(re)->str;
400 len = RREGEXP(re)->len;
401 }
402 }
403
404 if (options & RE_OPTION_MULTILINE) rb_str_buf_cat2(str, "m");
405 if (options & RE_OPTION_IGNORECASE) rb_str_buf_cat2(str, "i");
406 if (options & RE_OPTION_EXTENDED) rb_str_buf_cat2(str, "x");
407
408 if ((options & embeddable) != embeddable) {
409 rb_str_buf_cat2(str, "-");
410 if (!(options & RE_OPTION_MULTILINE)) rb_str_buf_cat2(str, "m");
411 if (!(options & RE_OPTION_IGNORECASE)) rb_str_buf_cat2(str, "i");
412 if (!(options & RE_OPTION_EXTENDED)) rb_str_buf_cat2(str, "x");
413 }
414
415 rb_str_buf_cat2(str, ":");
416 rb_reg_expr_str(str, ptr, len);
417 rb_str_buf_cat2(str, ")");
418
419 OBJ_INFECT(str, re);
420 return str;
421 }
422
423 static void
424 rb_reg_raise(s, len, err, re)
425 const char *s;
426 long len;
427 const char *err;
428 VALUE re;
429 {
430 VALUE desc = rb_reg_desc(s, len, re);
431
432 if (ruby_in_compile)
433 rb_compile_error("%s: %s", err, RSTRING(desc)->ptr);
434 else
435 rb_raise(rb_eRegexpError, "%s: %s", err, RSTRING(desc)->ptr);
436 }
437
438 static VALUE
439 rb_reg_casefold_p(re)
440 VALUE re;
441 {
442 rb_reg_check(re);
443 if (RREGEXP(re)->ptr->options & RE_OPTION_IGNORECASE) return Qtrue;
444 return Qfalse;
445 }
446
447 static VALUE
448 rb_reg_options_m(re)
449 VALUE re;
450 {
451 rb_reg_check(re);
452 return INT2NUM(RREGEXP(re)->ptr->options);
453 }
454
455 static VALUE
456 rb_reg_kcode_m(re)
457 VALUE re;
458 {
459 char *kcode;
460
461 if (FL_TEST(re, KCODE_FIXED)) {
462 switch (RBASIC(re)->flags & KCODE_MASK) {
463 case KCODE_NONE:
464 kcode = "none"; break;
465 case KCODE_EUC:
466 kcode = "euc"; break;
467 case KCODE_SJIS:
468 kcode = "sjis"; break;
469 case KCODE_UTF8:
470 kcode = "utf8"; break;
471 default:
472 rb_bug("unknow kcode - should not happen");
473 break;
474 }
475 return rb_str_new2(kcode);
476 }
477 return Qnil;
478 }
479
480 static Regexp*
481 make_regexp(s, len, flags)
482 const char *s;
483 long len;
484 int flags;
485 {
486 Regexp *rp;
487 char *err;
488
489
490
491
492
493
494
495
496 rp = ALLOC(Regexp);
497 MEMZERO((char *)rp, Regexp, 1);
498 rp->buffer = ALLOC_N(char, 16);
499 rp->allocated = 16;
500 rp->fastmap = ALLOC_N(char, 256);
501 if (flags) {
502 rp->options = flags;
503 }
504 err = re_compile_pattern(s, len, rp);
505
506 if (err != NULL) {
507 rb_reg_raise(s, len, err, 0);
508 }
509 return rp;
510 }
511
512 static VALUE rb_cMatch;
513
514 static VALUE
515 match_alloc(klass)
516 VALUE klass;
517 {
518 NEWOBJ(match, struct RMatch);
519 OBJSETUP(match, klass, T_MATCH);
520
521 match->str = 0;
522 match->regs = 0;
523 match->regs = ALLOC(struct re_registers);
524 MEMZERO(match->regs, struct re_registers, 1);
525
526 return (VALUE)match;
527 }
528
529 static VALUE
530 match_clone(match)
531 VALUE match;
532 {
533 NEWOBJ(clone, struct RMatch);
534 CLONESETUP(clone, match);
535
536 clone->str = RMATCH(match)->str;
537 clone->regs = 0;
538
539 clone->regs = ALLOC(struct re_registers);
540 clone->regs->allocated = 0;
541 re_copy_registers(clone->regs, RMATCH(match)->regs);
542
543 return (VALUE)clone;
544 }
545
546 static VALUE
547 match_become(obj, orig)
548 VALUE obj, orig;
549 {
550 if (obj == orig) return obj;
551
552 if (!rb_obj_is_instance_of(orig, rb_obj_class(obj))) {
553 rb_raise(rb_eTypeError, "wrong argument class");
554 }
555 RMATCH(obj)->str = RMATCH(orig)->str;
556 re_free_registers(RMATCH(obj)->regs);
557 RMATCH(obj)->regs->allocated = 0;
558 re_copy_registers(RMATCH(obj)->regs, RMATCH(orig)->regs);
559
560 return obj;
561 }
562
563 static VALUE
564 match_size(match)
565 VALUE match;
566 {
567 return INT2FIX(RMATCH(match)->regs->num_regs);
568 }
569
570 static VALUE
571 match_offset(match, n)
572 VALUE match, n;
573 {
574 int i = NUM2INT(n);
575
576 if (i < 0 || RMATCH(match)->regs->num_regs <= i)
577 rb_raise(rb_eIndexError, "index %d out of matches", i);
578
579 if (RMATCH(match)->regs->beg[i] < 0)
580 return rb_assoc_new(Qnil, Qnil);
581
582 return rb_assoc_new(INT2FIX(RMATCH(match)->regs->beg[i]),
583 INT2FIX(RMATCH(match)->regs->end[i]));
584 }
585
586 static VALUE
587 match_begin(match, n)
588 VALUE match, n;
589 {
590 int i = NUM2INT(n);
591
592 if (i < 0 || RMATCH(match)->regs->num_regs <= i)
593 rb_raise(rb_eIndexError, "index %d out of matches", i);
594
595 if (RMATCH(match)->regs->beg[i] < 0)
596 return Qnil;
597
598 return INT2FIX(RMATCH(match)->regs->beg[i]);
599 }
600
601 static VALUE
602 match_end(match, n)
603 VALUE match, n;
604 {
605 int i = NUM2INT(n);
606
607 if (i < 0 || RMATCH(match)->regs->num_regs <= i)
608 rb_raise(rb_eIndexError, "index %d out of matches", i);
609
610 if (RMATCH(match)->regs->beg[i] < 0)
611 return Qnil;
612
613 return INT2FIX(RMATCH(match)->regs->end[i]);
614 }
615
616 #define MATCH_BUSY FL_USER2
617
618 void
619 rb_match_busy(match)
620 VALUE match;
621 {
622 FL_SET(match, MATCH_BUSY);
623 }
624
625 int ruby_ignorecase;
626 static int may_need_recompile;
627
628 static void
629 rb_reg_prepare_re(re)
630 VALUE re;
631 {
632 int need_recompile = 0;
633 int state;
634
635 rb_reg_check(re);
636 state = FL_TEST(re, REG_CASESTATE);
637
638 if (ruby_ignorecase && !state) {
639 FL_SET(re, REG_CASESTATE);
640 RREGEXP(re)->ptr->options |= RE_OPTION_IGNORECASE;
641 need_recompile = 1;
642 }
643 if (!ruby_ignorecase && state) {
644 FL_UNSET(re, REG_CASESTATE);
645 RREGEXP(re)->ptr->options &= ~RE_OPTION_IGNORECASE;
646 need_recompile = 1;
647 }
648
649 if (!FL_TEST(re, KCODE_FIXED) &&
650 (RBASIC(re)->flags & KCODE_MASK) != reg_kcode) {
651 need_recompile = 1;
652 RBASIC(re)->flags &= ~KCODE_MASK;
653 RBASIC(re)->flags |= reg_kcode;
654 }
655
656 if (need_recompile) {
657 char *err;
658
659 if (FL_TEST(re, KCODE_FIXED))
660 kcode_set_option(re);
661 rb_reg_check(re);
662 RREGEXP(re)->ptr->fastmap_accurate = 0;
663 err = re_compile_pattern(RREGEXP(re)->str, RREGEXP(re)->len, RREGEXP(re)->ptr);
664 if (err != NULL) {
665 rb_reg_raise(RREGEXP(re)->str, RREGEXP(re)->len, err, re);
666 }
667 }
668 }
669
670 long
671 rb_reg_adjust_startpos(re, str, pos, reverse)
672 VALUE re, str;
673 long pos, reverse;
674 {
675 long range;
676
677 rb_reg_check(re);
678 if (may_need_recompile) rb_reg_prepare_re(re);
679
680 if (FL_TEST(re, KCODE_FIXED))
681 kcode_set_option(re);
682 else if (reg_kcode != curr_kcode)
683 kcode_reset_option();
684
685 if (reverse) {
686 range = -pos;
687 }
688 else {
689 range = RSTRING(str)->len - pos;
690 }
691 return re_adjust_startpos(RREGEXP(re)->ptr,
692 RSTRING(str)->ptr, RSTRING(str)->len,
693 pos, range);
694 }
695
696 long
697 rb_reg_search(re, str, pos, reverse)
698 VALUE re, str;
699 long pos, reverse;
700 {
701 long result;
702 VALUE match;
703 static struct re_registers regs;
704 long range;
705
706 if (pos > RSTRING(str)->len || pos < 0) {
707 rb_backref_set(Qnil);
708 return -1;
709 }
710
711 rb_reg_check(re);
712 if (may_need_recompile) rb_reg_prepare_re(re);
713
714 if (FL_TEST(re, KCODE_FIXED))
715 kcode_set_option(re);
716 else if (reg_kcode != curr_kcode)
717 kcode_reset_option();
718
719 if (reverse) {
720 range = -pos;
721 }
722 else {
723 range = RSTRING(str)->len - pos;
724 }
725 result = re_search(RREGEXP(re)->ptr,RSTRING(str)->ptr,RSTRING(str)->len,
726 pos, range, ®s);
727
728 if (FL_TEST(re, KCODE_FIXED))
729 kcode_reset_option();
730
731 if (result == -2) {
732 rb_reg_raise(RREGEXP(re)->str, RREGEXP(re)->len,
733 "Stack overflow in regexp matcher", re);
734 }
735
736 if (result < 0) {
737 rb_backref_set(Qnil);
738 return result;
739 }
740
741 match = rb_backref_get();
742 if (NIL_P(match) || FL_TEST(match, MATCH_BUSY)) {
743 match = match_alloc(rb_cMatch);
744 }
745 else {
746 if (rb_safe_level() >= 3)
747 OBJ_TAINT(match);
748 else
749 FL_UNSET(match, FL_TAINT);
750 }
751
752 re_copy_registers(RMATCH(match)->regs, ®s);
753 RMATCH(match)->str = rb_str_new4(str);
754 rb_backref_set(match);
755
756 OBJ_INFECT(match, re);
757 OBJ_INFECT(match, str);
758 return result;
759 }
760
761 VALUE
762 rb_reg_nth_defined(nth, match)
763 int nth;
764 VALUE match;
765 {
766 if (NIL_P(match)) return Qnil;
767 if (nth >= RMATCH(match)->regs->num_regs) {
768 return Qnil;
769 }
770 if (nth < 0) {
771 nth += RMATCH(match)->regs->num_regs;
772 if (nth <= 0) return Qnil;
773 }
774 if (RMATCH(match)->BEG(nth) == -1) return Qfalse;
775 return Qtrue;
776 }
777
778 VALUE
779 rb_reg_nth_match(nth, match)
780 int nth;
781 VALUE match;
782 {
783 VALUE str;
784 long start, end, len;
785
786 if (NIL_P(match)) return Qnil;
787 if (nth >= RMATCH(match)->regs->num_regs) {
788 return Qnil;
789 }
790 if (nth < 0) {
791 nth += RMATCH(match)->regs->num_regs;
792 if (nth <= 0) return Qnil;
793 }
794 start = RMATCH(match)->BEG(nth);
795 if (start == -1) return Qnil;
796 end = RMATCH(match)->END(nth);
797 len = end - start;
798 str = rb_str_new(RSTRING(RMATCH(match)->str)->ptr + start, len);
799 if (OBJ_TAINTED(match)) OBJ_TAINT(str);
800 return str;
801 }
802
803 VALUE
804 rb_reg_last_match(match)
805 VALUE match;
806 {
807 return rb_reg_nth_match(0, match);
808 }
809
810 VALUE
811 rb_reg_match_pre(match)
812 VALUE match;
813 {
814 VALUE str;
815
816 if (NIL_P(match)) return Qnil;
817 if (RMATCH(match)->BEG(0) == -1) return Qnil;
818 str = rb_str_new(RSTRING(RMATCH(match)->str)->ptr, RMATCH(match)->BEG(0));
819 if (OBJ_TAINTED(match)) OBJ_TAINT(str);
820 return str;
821 }
822
823 VALUE
824 rb_reg_match_post(match)
825 VALUE match;
826 {
827 VALUE str;
828
829 if (NIL_P(match)) return Qnil;
830 if (RMATCH(match)->BEG(0) == -1) return Qnil;
831 str = rb_str_new(RSTRING(RMATCH(match)->str)->ptr+RMATCH(match)->END(0),
832 RSTRING(RMATCH(match)->str)->len-RMATCH(match)->END(0));
833 if (OBJ_TAINTED(match)) OBJ_TAINT(str);
834 return str;
835 }
836
837 VALUE
838 rb_reg_match_last(match)
839 VALUE match;
840 {
841 int i;
842
843 if (NIL_P(match)) return Qnil;
844 if (RMATCH(match)->BEG(0) == -1) return Qnil;
845
846 for (i=RMATCH(match)->regs->num_regs-1; RMATCH(match)->BEG(i) == -1 && i > 0; i--)
847 ;
848 if (i == 0) return Qnil;
849 return rb_reg_nth_match(i, match);
850 }
851
852 static VALUE
853 last_match_getter()
854 {
855 return rb_reg_last_match(rb_backref_get());
856 }
857
858 static VALUE
859 prematch_getter()
860 {
861 return rb_reg_match_pre(rb_backref_get());
862 }
863
864 static VALUE
865 postmatch_getter()
866 {
867 return rb_reg_match_post(rb_backref_get());
868 }
869
870 static VALUE
871 last_paren_match_getter()
872 {
873 return rb_reg_match_last(rb_backref_get());
874 }
875
876 static VALUE
877 match_to_a(match)
878 VALUE match;
879 {
880 struct re_registers *regs = RMATCH(match)->regs;
881 VALUE ary = rb_ary_new2(regs->num_regs);
882 char *ptr = RSTRING(RMATCH(match)->str)->ptr;
883 int i;
884 int taint = OBJ_TAINTED(match);
885
886 for (i=0; i<regs->num_regs; i++) {
887 if (regs->beg[i] == -1) {
888 rb_ary_push(ary, Qnil);
889 }
890 else {
891 VALUE str = rb_str_new(ptr+regs->beg[i], regs->end[i]-regs->beg[i]);
892 if (taint) OBJ_TAINT(str);
893 rb_ary_push(ary, str);
894 }
895 }
896 return ary;
897 }
898
899 static VALUE
900 match_aref(argc, argv, match)
901 int argc;
902 VALUE *argv;
903 VALUE match;
904 {
905 VALUE idx, rest;
906
907 rb_scan_args(argc, argv, "11", &idx, &rest);
908
909 if (!NIL_P(rest) || !FIXNUM_P(idx) || FIX2INT(idx) < 0) {
910 return rb_ary_aref(argc, argv, match_to_a(match));
911 }
912 return rb_reg_nth_match(FIX2INT(idx), match);
913 }
914
915 static VALUE
916 match_select(argc, argv, match)
917 int argc;
918 VALUE *argv;
919 VALUE match;
920 {
921 struct re_registers *regs = RMATCH(match)->regs;
922 char *ptr = RSTRING(RMATCH(match)->str)->ptr;
923 VALUE result = rb_ary_new();
924 int i;
925 long idx;
926 int taint = OBJ_TAINTED(match);
927
928 for (i=0; i<argc; i++) {
929 idx = NUM2LONG(argv[i]);
930 if (idx < 0) idx += regs->num_regs;
931 if (idx < 0 || regs->num_regs <= idx) {
932 rb_ary_push(result, Qnil);
933 }
934 else {
935 VALUE str = rb_str_new(ptr+regs->beg[idx], regs->end[idx]-regs->beg[idx]);
936 if (taint) OBJ_TAINT(str);
937 rb_ary_push(result, str);
938 }
939 }
940 return result;
941 }
942
943 static VALUE
944 match_to_s(match)
945 VALUE match;
946 {
947 VALUE str = rb_reg_last_match(match);
948
949 if (NIL_P(str)) str = rb_str_new(0,0);
950 if (OBJ_TAINTED(match)) OBJ_TAINT(str);
951 if (OBJ_TAINTED(RMATCH(match)->str)) OBJ_TAINT(str);
952 return str;
953 }
954
955 static VALUE
956 match_string(match)
957 VALUE match;
958 {
959 return RMATCH(match)->str;
960 }
961
962 VALUE rb_cRegexp;
963
964 static void
965 rb_reg_initialize(obj, s, len, options)
966 VALUE obj;
967 const char *s;
968 long len;
969 int options;
970
971
972
973
974
975
976 {
977 struct RRegexp *re = RREGEXP(obj);
978
979 if (re->ptr) re_free_pattern(re->ptr);
980 if (re->str) free(re->str);
981 re->ptr = 0;
982 re->str = 0;
983
984 switch (options & ~0xf) {
985 case 0:
986 default:
987 FL_SET(re, reg_kcode);
988 break;
989 case 16:
990 kcode_none(re);
991 break;
992 case 32:
993 kcode_euc(re);
994 break;
995 case 48:
996 kcode_sjis(re);
997 break;
998 case 64:
999 kcode_utf8(re);
1000 break;
1001 }
1002
1003 if (options & ~0xf) {
1004 kcode_set_option((VALUE)re);
1005 }
1006 if (ruby_ignorecase) {
1007 options |= RE_OPTION_IGNORECASE;
1008 FL_SET(re, REG_CASESTATE);
1009 }
1010 re->ptr = make_regexp(s, len, options & 0xf);
1011 re->str = ALLOC_N(char, len+1);
1012 memcpy(re->str, s, len);
1013 re->str[len] = '\0';
1014 re->len = len;
1015 if (options & ~0xf) {
1016 kcode_reset_option();
1017 }
1018 }
1019
1020 static VALUE
1021 rb_reg_s_alloc(klass)
1022 VALUE klass;
1023 {
1024 NEWOBJ(re, struct RRegexp);
1025 OBJSETUP(re, klass, T_REGEXP);
1026
1027 re->ptr = 0;
1028 re->len = 0;
1029 re->str = 0;
1030
1031 return (VALUE)re;
1032 }
1033
1034 VALUE
1035 rb_reg_new(s, len, options)
1036 const char *s;
1037 long len;
1038 int options;
1039 {
1040 VALUE re = rb_reg_s_alloc(rb_cRegexp);
1041
1042 rb_reg_initialize(re, s, len, options);
1043 return (VALUE)re;
1044 }
1045
1046 static int case_cache;
1047 static int kcode_cache;
1048 static VALUE reg_cache;
1049
1050 VALUE
1051 rb_reg_regcomp(str)
1052 VALUE str;
1053 {
1054 if (reg_cache && RREGEXP(reg_cache)->len == RSTRING(str)->len
1055 && case_cache == ruby_ignorecase
1056 && kcode_cache == reg_kcode
1057 && memcmp(RREGEXP(reg_cache)->str, RSTRING(str)->ptr, RSTRING(str)->len) == 0)
1058 return reg_cache;
1059
1060 case_cache = ruby_ignorecase;
1061 kcode_cache = reg_kcode;
1062 return reg_cache = rb_reg_new(RSTRING(str)->ptr, RSTRING(str)->len,
1063 ruby_ignorecase);
1064 }
1065
1066 static int
1067 rb_reg_cur_kcode(re)
1068 VALUE re;
1069 {
1070 if (FL_TEST(re, KCODE_FIXED)) {
1071 return RBASIC(re)->flags & KCODE_MASK;
1072 }
1073 return 0;
1074 }
1075
1076 static VALUE
1077 rb_reg_equal(re1, re2)
1078 VALUE re1, re2;
1079 {
1080 if (re1 == re2) return Qtrue;
1081 if (TYPE(re2) != T_REGEXP) return Qfalse;
1082 rb_reg_check(re1); rb_reg_check(re2);
1083 if (RREGEXP(re1)->len != RREGEXP(re2)->len) return Qfalse;
1084 if (memcmp(RREGEXP(re1)->str, RREGEXP(re2)->str, RREGEXP(re1)->len) == 0 &&
1085 rb_reg_cur_kcode(re1) == rb_reg_cur_kcode(re2) &&
1086 RREGEXP(re1)->ptr->options == RREGEXP(re2)->ptr->options) {
1087 return Qtrue;
1088 }
1089 return Qfalse;
1090 }
1091
1092 VALUE
1093 rb_reg_match(re, str)
1094 VALUE re, str;
1095 {
1096 long start;
1097
1098 if (NIL_P(str)) {
1099 rb_backref_set(Qnil);
1100 return Qnil;
1101 }
1102 StringValue(str);
1103 start = rb_reg_search(re, str, 0, 0);
1104 if (start < 0) {
1105 return Qnil;
1106 }
1107 return LONG2FIX(start);
1108 }
1109
1110 VALUE
1111 rb_reg_match2(re)
1112 VALUE re;
1113 {
1114 long start;
1115 VALUE line = rb_lastline_get();
1116
1117 if (TYPE(line) != T_STRING) {
1118 rb_backref_set(Qnil);
1119 return Qnil;
1120 }
1121
1122 start = rb_reg_search(re, line, 0, 0);
1123 if (start < 0) {
1124 return Qnil;
1125 }
1126 return LONG2FIX(start);
1127 }
1128
1129 static VALUE
1130 rb_reg_match_m(re, str)
1131 VALUE re, str;
1132 {
1133 VALUE result = rb_reg_match(re, str);
1134
1135 if (NIL_P(result)) return Qnil;
1136 result = rb_backref_get();
1137 rb_match_busy(result);
1138 return result;
1139 }
1140
1141 static VALUE
1142 rb_reg_initialize_m(argc, argv, self)
1143 int argc;
1144 VALUE *argv;
1145 VALUE self;
1146 {
1147 VALUE src;
1148 int flags = 0;
1149
1150 if (argc == 0 || argc > 3) {
1151 rb_raise(rb_eArgError, "wrong number of argument");
1152 }
1153 if (argc >= 2) {
1154 if (FIXNUM_P(argv[1])) flags = FIX2INT(argv[1]);
1155 else if (RTEST(argv[1])) flags = RE_OPTION_IGNORECASE;
1156 }
1157 if (argc == 3) {
1158 char *kcode = StringValuePtr(argv[2]);
1159
1160 switch (kcode[0]) {
1161 case 'n': case 'N':
1162 flags |= 16;
1163 break;
1164 case 'e': case 'E':
1165 flags |= 32;
1166 break;
1167 case 's': case 'S':
1168 flags |= 48;
1169 break;
1170 case 'u': case 'U':
1171 flags |= 64;
1172 break;
1173 default:
1174 break;
1175 }
1176 }
1177
1178 rb_check_frozen(self);
1179 src = argv[0];
1180 if (TYPE(src) == T_REGEXP) {
1181 rb_reg_check(src);
1182 rb_reg_initialize(self, RREGEXP(src)->str, RREGEXP(src)->len, flags);
1183 }
1184 else {
1185 StringValue(src);
1186 rb_reg_initialize(self, RSTRING(src)->ptr, RSTRING(src)->len, flags);
1187 }
1188 return self;
1189 }
1190
1191 VALUE
1192 rb_reg_quote(str)
1193 VALUE str;
1194 {
1195 char *s, *send, *t;
1196 VALUE tmp;
1197 int c;
1198
1199 s = RSTRING(str)->ptr;
1200 send = s + RSTRING(str)->len;
1201 for (; s < send; s++) {
1202 c = *s;
1203 if (ismbchar(c)) {
1204 int n = mbclen(c);
1205
1206 while (n-- && s < send)
1207 s++;
1208 s--;
1209 continue;
1210 }
1211 switch (c) {
1212 case '[': case ']': case '{': case '}':
1213 case '(': case ')': case '|': case '-':
1214 case '*': case '.': case '\\':
1215 case '?': case '+': case '^': case '$':
1216 case ' ': case '#':
1217 goto meta_found;
1218 }
1219 }
1220 return str;
1221
1222 meta_found:
1223 tmp = rb_str_new(0, RSTRING(str)->len*2);
1224 t = RSTRING(tmp)->ptr;
1225
1226 memcpy(t, RSTRING(str)->ptr, s - RSTRING(str)->ptr);
1227 t += s - RSTRING(str)->ptr;
1228
1229 for (; s < send; s++) {
1230 c = *s;
1231 if (ismbchar(c)) {
1232 int n = mbclen(c);
1233
1234 while (n-- && s < send)
1235 *t++ = *s++;
1236 s--;
1237 continue;
1238 }
1239 switch (c) {
1240 case '[': case ']': case '{': case '}':
1241 case '(': case ')': case '|': case '-':
1242 case '*': case '.': case '\\':
1243 case '?': case '+': case '^': case '$':
1244 case ' ': case '#':
1245 *t++ = '\\';
1246 break;
1247 }
1248 *t++ = c;
1249 }
1250 rb_str_resize(tmp, t - RSTRING(tmp)->ptr);
1251 OBJ_INFECT(tmp, str);
1252 return tmp;
1253 }
1254
1255 static VALUE
1256 rb_reg_s_quote(argc, argv)
1257 int argc;
1258 VALUE *argv;
1259 {
1260 VALUE str, kcode;
1261 int kcode_saved = reg_kcode;
1262
1263 rb_scan_args(argc, argv, "11", &str, &kcode);
1264 if (!NIL_P(kcode)) {
1265 rb_set_kcode(StringValuePtr(kcode));
1266 curr_kcode = reg_kcode;
1267 reg_kcode = kcode_saved;
1268 }
1269 StringValue(str);
1270 str = rb_reg_quote(str);
1271 kcode_reset_option();
1272 return str;
1273 }
1274
1275 int
1276 rb_kcode()
1277 {
1278 switch (reg_kcode) {
1279 case KCODE_EUC:
1280 return MBCTYPE_EUC;
1281 case KCODE_SJIS:
1282 return MBCTYPE_SJIS;
1283 case KCODE_UTF8:
1284 return MBCTYPE_UTF8;
1285 case KCODE_NONE:
1286 return MBCTYPE_ASCII;
1287 }
1288 rb_bug("wrong reg_kcode value (0x%x)", reg_kcode);
1289 }
1290
1291 static int
1292 rb_reg_get_kcode(re)
1293 VALUE re;
1294 {
1295 switch (RBASIC(re)->flags & KCODE_MASK) {
1296 case KCODE_NONE:
1297 return 16;
1298 case KCODE_EUC:
1299 return 32;
1300 case KCODE_SJIS:
1301 return 48;
1302 case KCODE_UTF8:
1303 return 64;
1304 default:
1305 return 0;
1306 }
1307 }
1308
1309 int
1310 rb_reg_options(re)
1311 VALUE re;
1312 {
1313 int options = 0;
1314
1315 rb_reg_check(re);
1316 if (RREGEXP(re)->ptr->options & RE_OPTION_IGNORECASE)
1317 options |= RE_OPTION_IGNORECASE;
1318 if (RREGEXP(re)->ptr->options & RE_OPTION_MULTILINE)
1319 options |= RE_OPTION_MULTILINE;
1320 if (RREGEXP(re)->ptr->options & RE_OPTION_EXTENDED)
1321 options |= RE_OPTION_EXTENDED;
1322 if (FL_TEST(re, KCODE_FIXED)) {
1323 options |= rb_reg_get_kcode(re);
1324 }
1325 return options;
1326 }
1327
1328 static VALUE
1329 rb_reg_become(copy, re)
1330 VALUE copy, re;
1331 {
1332 if (copy == re) return copy;
1333 rb_check_frozen(copy);
1334
1335 if (!rb_obj_is_instance_of(re, rb_obj_class(copy))) {
1336 rb_raise(rb_eTypeError, "wrong argument type");
1337 }
1338 RREGEXP(copy)->ptr = 0;
1339 RREGEXP(copy)->len = 0;
1340 RREGEXP(copy)->str = 0;
1341 rb_reg_check(re);
1342 rb_reg_initialize(copy, RREGEXP(re)->str, RREGEXP(re)->len,
1343 rb_reg_options(re));
1344 return copy;
1345 }
1346
1347 VALUE
1348 rb_reg_regsub(str, src, regs)
1349 VALUE str, src;
1350 struct re_registers *regs;
1351 {
1352 VALUE val = 0;
1353 char *p, *s, *e, c;
1354 int no;
1355
1356 p = s = RSTRING(str)->ptr;
1357 e = s + RSTRING(str)->len;
1358
1359 while (s < e) {
1360 char *ss = s;
1361
1362 c = *s++;
1363 if (ismbchar(c)) {
1364 s += mbclen(c) - 1;
1365 continue;
1366 }
1367 if (c != '\\' || s == e) continue;
1368
1369 if (!val) {
1370 val = rb_str_buf_new(ss-p);
1371 rb_str_buf_cat(val, p, ss-p);
1372 }
1373 else {
1374 rb_str_buf_cat(val, p, ss-p);
1375 }
1376
1377 c = *s++;
1378 p = s;
1379 switch (c) {
1380 case '0': case '1': case '2': case '3': case '4':
1381 case '5': case '6': case '7': case '8': case '9':
1382 no = c - '0';
1383 break;
1384 case '&':
1385 no = 0;
1386 break;
1387
1388 case '`':
1389 rb_str_buf_cat(val, RSTRING(src)->ptr, BEG(0));
1390 continue;
1391
1392 case '\'':
1393 rb_str_buf_cat(val, RSTRING(src)->ptr+END(0), RSTRING(src)->len-END(0));
1394 continue;
1395
1396 case '+':
1397 no = regs->num_regs-1;
1398 while (BEG(no) == -1 && no > 0) no--;
1399 if (no == 0) continue;
1400 break;
1401
1402 case '\\':
1403 rb_str_buf_cat(val, s-1, 1);
1404 continue;
1405
1406 default:
1407 rb_str_buf_cat(val, s-2, 2);
1408 continue;
1409 }
1410
1411 if (no >= 0) {
1412 if (no >= regs->num_regs) continue;
1413 if (BEG(no) == -1) continue;
1414 rb_str_buf_cat(val, RSTRING(src)->ptr+BEG(no), END(no)-BEG(no));
1415 }
1416 }
1417
1418 if (p < e) {
1419 if (!val) {
1420 val = rb_str_buf_new(e-p);
1421 rb_str_buf_cat(val, p, e-p);
1422 }
1423 else {
1424 rb_str_buf_cat(val, p, e-p);
1425 }
1426 }
1427 if (!val) return str;
1428
1429 return val;
1430 }
1431
1432 const char*
1433 rb_get_kcode()
1434 {
1435 switch (reg_kcode) {
1436 case KCODE_SJIS:
1437 return "SJIS";
1438 case KCODE_EUC:
1439 return "EUC";
1440 case KCODE_UTF8:
1441 return "UTF8";
1442 default:
1443 return "NONE";
1444 }
1445 }
1446
1447 static VALUE
1448 kcode_getter()
1449 {
1450 return rb_str_new2(rb_get_kcode());
1451 }
1452
1453 void
1454 rb_set_kcode(code)
1455 const char *code;
1456 {
1457 if (code == 0) goto set_no_conversion;
1458
1459 switch (code[0]) {
1460 case 'E':
1461 case 'e':
1462 reg_kcode = KCODE_EUC;
1463 re_mbcinit(MBCTYPE_EUC);
1464 break;
1465 case 'S':
1466 case 's':
1467 reg_kcode = KCODE_SJIS;
1468 re_mbcinit(MBCTYPE_SJIS);
1469 break;
1470 case 'U':
1471 case 'u':
1472 reg_kcode = KCODE_UTF8;
1473 re_mbcinit(MBCTYPE_UTF8);
1474 break;
1475 default:
1476 case 'N':
1477 case 'n':
1478 case 'A':
1479 case 'a':
1480 set_no_conversion:
1481 reg_kcode = KCODE_NONE;
1482 re_mbcinit(MBCTYPE_ASCII);
1483 break;
1484 }
1485 }
1486
1487 static void
1488 kcode_setter(val)
1489 VALUE val;
1490 {
1491 may_need_recompile = 1;
1492 rb_set_kcode(StringValuePtr(val));
1493 }
1494
1495 static VALUE
1496 ignorecase_getter()
1497 {
1498 return ruby_ignorecase?Qtrue:Qfalse;
1499 }
1500
1501 static void
1502 ignorecase_setter(val, id)
1503 VALUE val;
1504 ID id;
1505 {
1506 rb_warn("modifying %s is deperecated", rb_id2name(id));
1507 may_need_recompile = 1;
1508 ruby_ignorecase = RTEST(val);
1509 }
1510
1511 static VALUE
1512 match_getter()
1513 {
1514 VALUE match = rb_backref_get();
1515
1516 if (NIL_P(match)) return Qnil;
1517 rb_match_busy(match);
1518 return match;
1519 }
1520
1521 static void
1522 match_setter(val)
1523 VALUE val;
1524 {
1525 if (!NIL_P(val)) {
1526 Check_Type(val, T_MATCH);
1527 }
1528 rb_backref_set(val);
1529 }
1530
1531 static VALUE
1532 rb_reg_s_last_match(argc, argv)
1533 int argc;
1534 VALUE *argv;
1535 {
1536 VALUE nth;
1537
1538 if (rb_scan_args(argc, argv, "01", &nth) == 1) {
1539 return rb_reg_nth_match(NUM2INT(nth), rb_backref_get());
1540 }
1541 return match_getter();
1542 }
1543
1544 void
1545 Init_Regexp()
1546 {
1547 rb_eRegexpError = rb_define_class("RegexpError", rb_eStandardError);
1548
1549 re_set_casetable(casetable);
1550 #if DEFAULT_KCODE == KCODE_EUC
1551 re_mbcinit(MBCTYPE_EUC);
1552 #else
1553 #if DEFAULT_KCODE == KCODE_SJIS
1554 re_mbcinit(MBCTYPE_SJIS);
1555 #else
1556 #if DEFAULT_KCODE == KCODE_UTF8
1557 re_mbcinit(MBCTYPE_UTF8);
1558 #else
1559 re_mbcinit(MBCTYPE_ASCII);
1560 #endif
1561 #endif
1562 #endif
1563
1564 rb_define_virtual_variable("$~", match_getter, match_setter);
1565 rb_define_virtual_variable("$&", last_match_getter, 0);
1566 rb_define_virtual_variable("$`", prematch_getter, 0);
1567 rb_define_virtual_variable("$'", postmatch_getter, 0);
1568 rb_define_virtual_variable("$+", last_paren_match_getter, 0);
1569
1570 rb_define_virtual_variable("$=", ignorecase_getter, ignorecase_setter);
1571 rb_define_virtual_variable("$KCODE", kcode_getter, kcode_setter);
1572 rb_define_virtual_variable("$-K", kcode_getter, kcode_setter);
1573
1574 rb_cRegexp = rb_define_class("Regexp", rb_cObject);
1575 rb_define_singleton_method(rb_cRegexp, "allocate", rb_reg_s_alloc, 0);
1576 rb_define_singleton_method(rb_cRegexp, "compile", rb_class_new_instance, -1);
1577 rb_define_singleton_method(rb_cRegexp, "quote", rb_reg_s_quote, -1);
1578 rb_define_singleton_method(rb_cRegexp, "escape", rb_reg_s_quote, -1);
1579 rb_define_singleton_method(rb_cRegexp, "last_match", rb_reg_s_last_match, -1);
1580
1581 rb_define_method(rb_cRegexp, "initialize", rb_reg_initialize_m, -1);
1582 rb_define_method(rb_cRegexp, "become", rb_reg_become, 1);
1583 rb_define_method(rb_cRegexp, "==", rb_reg_equal, 1);
1584 rb_define_method(rb_cRegexp, "=~", rb_reg_match, 1);
1585 rb_define_method(rb_cRegexp, "===", rb_reg_match, 1);
1586 rb_define_method(rb_cRegexp, "~", rb_reg_match2, 0);
1587 rb_define_method(rb_cRegexp, "match", rb_reg_match_m, 1);
1588 rb_define_method(rb_cRegexp, "to_s", rb_reg_to_s, 0);
1589 rb_define_method(rb_cRegexp, "inspect", rb_reg_inspect, 0);
1590 rb_define_method(rb_cRegexp, "source", rb_reg_source, 0);
1591 rb_define_method(rb_cRegexp, "casefold?", rb_reg_casefold_p, 0);
1592 rb_define_method(rb_cRegexp, "options", rb_reg_options_m, 0);
1593 rb_define_method(rb_cRegexp, "kcode", rb_reg_kcode_m, 0);
1594
1595 rb_define_const(rb_cRegexp, "IGNORECASE", INT2FIX(RE_OPTION_IGNORECASE));
1596 rb_define_const(rb_cRegexp, "EXTENDED", INT2FIX(RE_OPTION_EXTENDED));
1597 rb_define_const(rb_cRegexp, "MULTILINE", INT2FIX(RE_OPTION_MULTILINE));
1598
1599 rb_global_variable(®_cache);
1600
1601 rb_cMatch = rb_define_class("MatchData", rb_cObject);
1602 rb_define_global_const("MatchingData", rb_cMatch);
1603 rb_define_singleton_method(rb_cMatch, "allocate", match_alloc, 0);
1604 rb_undef_method(CLASS_OF(rb_cMatch), "new");
1605
1606 rb_define_method(rb_cMatch, "become", match_become, 1);
1607 rb_define_method(rb_cMatch, "size", match_size, 0);
1608 rb_define_method(rb_cMatch, "length", match_size, 0);
1609 rb_define_method(rb_cMatch, "offset", match_offset, 1);
1610 rb_define_method(rb_cMatch, "begin", match_begin, 1);
1611 rb_define_method(rb_cMatch, "end", match_end, 1);
1612 rb_define_method(rb_cMatch, "to_a", match_to_a, 0);
1613 rb_define_method(rb_cMatch, "to_ary", match_to_a, 0);
1614 rb_define_method(rb_cMatch, "[]", match_aref, -1);
1615 rb_define_method(rb_cMatch, "select", match_select, -1);
1616 rb_define_method(rb_cMatch, "pre_match", rb_reg_match_pre, 0);
1617 rb_define_method(rb_cMatch, "post_match", rb_reg_match_post, 0);
1618 rb_define_method(rb_cMatch, "to_s", match_to_s, 0);
1619 rb_define_method(rb_cMatch, "inspect", rb_any_to_s, 0);
1620 rb_define_method(rb_cMatch, "string", match_string, 0);
1621 }