string.c
DEFINITIONS
This source file includes following functions.
- rb_str_s_alloc
- str_new
- rb_str_new
- rb_str_new2
- rb_tainted_str_new
- rb_tainted_str_new2
- str_new3
- rb_str_new3
- rb_str_new4
- rb_str_new5
- rb_str_buf_new
- rb_str_buf_new2
- rb_str_to_str
- rb_str_shared_replace
- rb_str_associate
- rb_str_associated
- rb_obj_as_string
- rb_str_dup
- rb_str_init
- rb_str_length
- rb_str_empty
- rb_str_plus
- rb_str_times
- rb_str_format
- str_independent
- str_make_independent
- rb_str_modify
- rb_string_value
- rb_string_value_ptr
- rb_str_substr
- rb_str_freeze
- rb_str_dup_frozen
- rb_str_resize
- rb_str_buf_cat
- rb_str_buf_cat2
- rb_str_cat
- rb_str_cat2
- rb_str_buf_append
- rb_str_append
- rb_str_concat
- rb_str_hash
- rb_str_hash_m
- rb_str_cmp
- rb_str_equal
- rb_str_eql
- rb_str_cmp_m
- rb_str_casecmp
- rb_str_index
- rb_str_index_m
- rb_str_rindex
- rb_str_rindex_m
- rb_str_match
- rb_str_match2
- rb_str_match_m
- succ_char
- rb_str_succ
- rb_str_succ_bang
- rb_str_upto
- rb_str_upto_m
- rb_str_subpat
- rb_str_aref
- rb_str_aref_m
- rb_str_update
- rb_str_subpat_set
- rb_str_aset
- rb_str_aset_m
- rb_str_insert
- rb_str_slice_bang
- get_pat
- rb_str_sub_bang
- rb_str_sub
- str_gsub
- rb_str_gsub_bang
- rb_str_gsub
- rb_str_replace
- uscore_get
- rb_f_sub_bang
- rb_f_sub
- rb_f_gsub_bang
- rb_f_gsub
- rb_str_reverse_bang
- rb_str_reverse
- rb_str_include
- rb_str_to_i
- rb_str_to_f
- rb_str_to_s
- rb_str_inspect
- rb_str_dump
- rb_str_upcase_bang
- rb_str_upcase
- rb_str_downcase_bang
- rb_str_downcase
- rb_str_capitalize_bang
- rb_str_capitalize
- rb_str_swapcase_bang
- rb_str_swapcase
- trnext
- tr_trans
- rb_str_tr_bang
- rb_str_tr
- tr_setup_table
- rb_str_delete_bang
- rb_str_delete
- rb_str_squeeze_bang
- rb_str_squeeze
- rb_str_tr_s_bang
- rb_str_tr_s
- rb_str_count
- rb_str_split_m
- rb_str_split
- rb_f_split
- rb_str_each_line
- rb_str_each_byte
- rb_str_chop_bang
- rb_str_chop
- rb_f_chop_bang
- rb_f_chop
- rb_str_chomp_bang
- rb_str_chomp
- rb_f_chomp_bang
- rb_f_chomp
- rb_str_lstrip_bang
- rb_str_lstrip
- rb_str_rstrip_bang
- rb_str_rstrip
- rb_str_strip_bang
- rb_str_strip
- scan_once
- rb_str_scan
- rb_f_scan
- rb_str_hex
- rb_str_oct
- rb_str_crypt
- rb_str_intern
- rb_str_sum
- rb_str_ljust
- rb_str_rjust
- rb_str_center
- rb_str_setter
- Init_String
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 #include "ruby.h"
16 #include "re.h"
17
18 #define BEG(no) regs->beg[no]
19 #define END(no) regs->end[no]
20
21 #include <math.h>
22 #include <ctype.h>
23
24 #ifdef HAVE_UNISTD_H
25 #include <unistd.h>
26 #endif
27
28 VALUE rb_cString;
29
30 #define STR_ASSOC FL_USER3
31
32 #define RESIZE_CAPA(str,capacity) do {\
33 REALLOC_N(RSTRING(str)->ptr, char, (capacity)+1);\
34 RSTRING(str)->aux.capa = (capacity);\
35 } while (0)
36
37 VALUE rb_fs;
38
39 static VALUE
40 rb_str_s_alloc(klass)
41 VALUE klass;
42 {
43 NEWOBJ(str, struct RString);
44 OBJSETUP(str, klass, T_STRING);
45
46 str->ptr = 0;
47 str->len = 0;
48 str->aux.capa = 0;
49
50 return (VALUE)str;
51 }
52
53 static VALUE
54 str_new(klass, ptr, len)
55 VALUE klass;
56 const char *ptr;
57 long len;
58 {
59 VALUE str;
60
61 if (len < 0) {
62 rb_raise(rb_eArgError, "negative string size (or size too big)");
63 }
64
65 str = rb_obj_alloc(klass);
66 RSTRING(str)->len = len;
67 RSTRING(str)->aux.capa = len;
68 RSTRING(str)->ptr = ALLOC_N(char,len+1);
69 if (ptr) {
70 memcpy(RSTRING(str)->ptr, ptr, len);
71 }
72 else {
73 MEMZERO(RSTRING(str)->ptr, char, len);
74 }
75 RSTRING(str)->ptr[len] = '\0';
76 return str;
77 }
78
79 VALUE
80 rb_str_new(ptr, len)
81 const char *ptr;
82 long len;
83 {
84 return str_new(rb_cString, ptr, len);
85 }
86
87 VALUE
88 rb_str_new2(ptr)
89 const char *ptr;
90 {
91 if (!ptr) {
92 rb_raise(rb_eArgError, "NULL pointer given");
93 }
94 return rb_str_new(ptr, strlen(ptr));
95 }
96
97 VALUE
98 rb_tainted_str_new(ptr, len)
99 const char *ptr;
100 long len;
101 {
102 VALUE str = rb_str_new(ptr, len);
103
104 OBJ_TAINT(str);
105 return str;
106 }
107
108 VALUE
109 rb_tainted_str_new2(ptr)
110 const char *ptr;
111 {
112 VALUE str = rb_str_new2(ptr);
113
114 OBJ_TAINT(str);
115 return str;
116 }
117
118 static VALUE
119 str_new3(klass, str)
120 VALUE klass, str;
121 {
122 VALUE str2 = rb_obj_alloc(klass);
123
124 RSTRING(str2)->len = RSTRING(str)->len;
125 RSTRING(str2)->ptr = RSTRING(str)->ptr;
126 RSTRING(str2)->aux.shared = str;
127 FL_SET(str2, ELTS_SHARED);
128 OBJ_INFECT(str2, str);
129
130 return str2;
131 }
132
133 VALUE
134 rb_str_new3(str)
135 VALUE str;
136 {
137 return str_new3(rb_obj_class(str), str);
138 }
139
140 VALUE
141 rb_str_new4(orig)
142 VALUE orig;
143 {
144 VALUE klass, str;
145
146 klass = rb_obj_class(orig);
147 if (FL_TEST(orig, ELTS_SHARED)) {
148 str = str_new3(klass, RSTRING(orig)->aux.shared);
149 }
150 else if (FL_TEST(orig, STR_ASSOC)) {
151 str = str_new(klass, RSTRING(orig)->ptr, RSTRING(orig)->len);
152 }
153 else {
154 str = rb_obj_alloc(klass);
155
156 RSTRING(str)->len = RSTRING(orig)->len;
157 RSTRING(str)->ptr = RSTRING(orig)->ptr;
158 RSTRING(orig)->aux.shared = str;
159 FL_SET(orig, ELTS_SHARED);
160 }
161 OBJ_INFECT(str, orig);
162 OBJ_FREEZE(str);
163 return str;
164 }
165
166 VALUE
167 rb_str_new5(obj, ptr, len)
168 VALUE obj;
169 const char *ptr;
170 long len;
171 {
172 return str_new(rb_obj_class(obj), ptr, len);
173 }
174
175 #define STR_BUF_MIN_SIZE 128
176
177 VALUE
178 rb_str_buf_new(capa)
179 long capa;
180 {
181 VALUE str = rb_obj_alloc(rb_cString);
182
183 if (capa < STR_BUF_MIN_SIZE) {
184 capa = STR_BUF_MIN_SIZE;
185 }
186 RSTRING(str)->ptr = 0;
187 RSTRING(str)->len = 0;
188 RSTRING(str)->aux.capa = capa;
189 RSTRING(str)->ptr = ALLOC_N(char, capa+1);
190 RSTRING(str)->ptr[0] = '\0';
191
192 return str;
193 }
194
195 VALUE
196 rb_str_buf_new2(ptr)
197 const char *ptr;
198 {
199 VALUE str;
200 long len = strlen(ptr);
201
202 str = rb_str_buf_new(len);
203 rb_str_buf_cat(str, ptr, len);
204
205 return str;
206 }
207
208 VALUE
209 rb_str_to_str(str)
210 VALUE str;
211 {
212 return rb_convert_type(str, T_STRING, "String", "to_str");
213 }
214
215 static void
216 rb_str_shared_replace(str, str2)
217 VALUE str, str2;
218 {
219 if (str == str2) return;
220 if (!FL_TEST(str, ELTS_SHARED)) free(RSTRING(str)->ptr);
221 if (NIL_P(str2)) {
222 RSTRING(str)->ptr = 0;
223 RSTRING(str)->len = 0;
224 RSTRING(str)->aux.capa = 0;
225 return;
226 }
227 RSTRING(str)->ptr = RSTRING(str2)->ptr;
228 RSTRING(str)->len = RSTRING(str2)->len;
229 if (FL_TEST(str2, ELTS_SHARED|STR_ASSOC)) {
230 FL_SET(str, RBASIC(str2)->flags & (ELTS_SHARED|STR_ASSOC));
231 RSTRING(str)->aux.shared = RSTRING(str2)->aux.shared;
232 }
233 else {
234 RSTRING(str)->aux.capa = RSTRING(str2)->aux.capa;
235 }
236 RSTRING(str2)->ptr = 0;
237 RSTRING(str2)->len = 0;
238 RSTRING(str2)->aux.capa = 0;
239 FL_UNSET(str, ELTS_SHARED|STR_ASSOC);
240 if (OBJ_TAINTED(str2)) OBJ_TAINT(str);
241 }
242
243 void
244 rb_str_associate(str, add)
245 VALUE str, add;
246 {
247 if (FL_TEST(str, STR_ASSOC)) {
248
249 rb_ary_concat(RSTRING(str)->aux.shared, add);
250 }
251 else {
252 if (FL_TEST(str, ELTS_SHARED)) {
253 rb_str_modify(str);
254 }
255 else if (RSTRING(str)->aux.shared) {
256
257 if (RSTRING(str)->aux.capa != RSTRING(str)->len) {
258 RESIZE_CAPA(str, RSTRING(str)->len);
259 }
260 }
261 RSTRING(str)->aux.shared = add;
262 FL_UNSET(str, ELTS_SHARED);
263 FL_SET(str, STR_ASSOC);
264 }
265 }
266
267 VALUE
268 rb_str_associated(str)
269 VALUE str;
270 {
271 if (FL_TEST(str, STR_ASSOC)) {
272 return RSTRING(str)->aux.shared;
273 }
274 return Qfalse;
275 }
276
277 static ID id_to_s;
278
279 VALUE
280 rb_obj_as_string(obj)
281 VALUE obj;
282 {
283 VALUE str;
284
285 if (TYPE(obj) == T_STRING) {
286 return obj;
287 }
288 str = rb_funcall(obj, id_to_s, 0);
289 if (TYPE(str) != T_STRING)
290 return rb_any_to_s(obj);
291 if (OBJ_TAINTED(obj)) OBJ_TAINT(str);
292 return str;
293 }
294
295 static VALUE rb_str_replace _((VALUE, VALUE));
296
297 VALUE
298 rb_str_dup(str)
299 VALUE str;
300 {
301 VALUE dup = rb_str_s_alloc(rb_cString);
302 rb_str_replace(dup, str);
303 return dup;
304 }
305
306 static VALUE
307 rb_str_init(argc, argv, str)
308 int argc;
309 VALUE *argv;
310 VALUE str;
311 {
312 VALUE orig;
313
314 if (rb_scan_args(argc, argv, "01", &orig) == 1)
315 rb_str_replace(str, orig);
316 return str;
317 }
318
319 static VALUE
320 rb_str_length(str)
321 VALUE str;
322 {
323 return LONG2NUM(RSTRING(str)->len);
324 }
325
326 static VALUE
327 rb_str_empty(str)
328 VALUE str;
329 {
330 if (RSTRING(str)->len == 0)
331 return Qtrue;
332 return Qfalse;
333 }
334
335 VALUE
336 rb_str_plus(str1, str2)
337 VALUE str1, str2;
338 {
339 VALUE str3;
340
341 StringValue(str2);
342 str3 = rb_str_new(0, RSTRING(str1)->len+RSTRING(str2)->len);
343 memcpy(RSTRING(str3)->ptr, RSTRING(str1)->ptr, RSTRING(str1)->len);
344 memcpy(RSTRING(str3)->ptr + RSTRING(str1)->len,
345 RSTRING(str2)->ptr, RSTRING(str2)->len);
346 RSTRING(str3)->ptr[RSTRING(str3)->len] = '\0';
347
348 if (OBJ_TAINTED(str1) || OBJ_TAINTED(str2))
349 OBJ_TAINT(str3);
350 return str3;
351 }
352
353 VALUE
354 rb_str_times(str, times)
355 VALUE str;
356 VALUE times;
357 {
358 VALUE str2;
359 long i, len;
360
361 len = NUM2LONG(times);
362 if (len == 0) return rb_str_new5(str,0,0);
363 if (len < 0) {
364 rb_raise(rb_eArgError, "negative argument");
365 }
366 if (LONG_MAX/len < RSTRING(str)->len) {
367 rb_raise(rb_eArgError, "argument too big");
368 }
369
370 str2 = rb_str_new5(str,0, RSTRING(str)->len*len);
371 for (i=0; i<len; i++) {
372 memcpy(RSTRING(str2)->ptr+(i*RSTRING(str)->len),
373 RSTRING(str)->ptr, RSTRING(str)->len);
374 }
375 RSTRING(str2)->ptr[RSTRING(str2)->len] = '\0';
376
377 OBJ_INFECT(str2, str);
378
379 return str2;
380 }
381
382 static VALUE
383 rb_str_format(str, arg)
384 VALUE str, arg;
385 {
386 VALUE *argv;
387
388 if (TYPE(arg) == T_ARRAY) {
389 argv = ALLOCA_N(VALUE, RARRAY(arg)->len + 1);
390 argv[0] = str;
391 MEMCPY(argv+1, RARRAY(arg)->ptr, VALUE, RARRAY(arg)->len);
392 return rb_f_sprintf(RARRAY(arg)->len+1, argv);
393 }
394
395 argv = ALLOCA_N(VALUE, 2);
396 argv[0] = str;
397 argv[1] = arg;
398 return rb_f_sprintf(2, argv);
399 }
400
401 static int
402 str_independent(str)
403 VALUE str;
404 {
405 if (OBJ_FROZEN(str)) rb_error_frozen("string");
406 if (!OBJ_TAINTED(str) && rb_safe_level() >= 4)
407 rb_raise(rb_eSecurityError, "Insecure: can't modify string");
408 if (!FL_TEST(str, ELTS_SHARED)) return 1;
409 return 0;
410 }
411
412 static void
413 str_make_independent(str)
414 VALUE str;
415 {
416 char *ptr;
417
418 ptr = ALLOC_N(char, RSTRING(str)->len+1);
419 if (RSTRING(str)->ptr) {
420 memcpy(ptr, RSTRING(str)->ptr, RSTRING(str)->len);
421 }
422 ptr[RSTRING(str)->len] = 0;
423 RSTRING(str)->ptr = ptr;
424 RSTRING(str)->aux.capa = RSTRING(str)->len;
425 FL_UNSET(str, ELTS_SHARED|STR_ASSOC);
426 }
427
428 void
429 rb_str_modify(str)
430 VALUE str;
431 {
432 if (!str_independent(str))
433 str_make_independent(str);
434 }
435
436 VALUE
437 rb_string_value(ptr)
438 volatile VALUE *ptr;
439 {
440 return *ptr = rb_str_to_str(*ptr);
441 }
442
443 char *
444 rb_string_value_ptr(ptr)
445 volatile VALUE *ptr;
446 {
447 VALUE s = *ptr;
448 if (TYPE(s) != T_STRING) {
449 s = rb_str_to_str(s);
450 *ptr = s;
451 }
452 if (!RSTRING(s)->ptr) {
453 str_make_independent(s);
454 }
455 return RSTRING(s)->ptr;
456 }
457
458 VALUE
459 rb_str_substr(str, beg, len)
460 VALUE str;
461 long beg, len;
462 {
463 VALUE str2;
464
465 if (len < 0) return Qnil;
466 if (beg > RSTRING(str)->len) return Qnil;
467 if (beg < 0) {
468 beg += RSTRING(str)->len;
469 if (beg < 0) return Qnil;
470 }
471 if (beg + len > RSTRING(str)->len) {
472 len = RSTRING(str)->len - beg;
473 }
474 if (len < 0) {
475 len = 0;
476 }
477 if (len == 0) return rb_str_new5(str,0,0);
478
479 str2 = rb_str_new5(str,RSTRING(str)->ptr+beg, len);
480 OBJ_INFECT(str2, str);
481
482 return str2;
483 }
484
485 VALUE
486 rb_str_freeze(str)
487 VALUE str;
488 {
489 return rb_obj_freeze(str);
490 }
491
492 VALUE
493 rb_str_dup_frozen(str)
494 VALUE str;
495 {
496 if (FL_TEST(str, ELTS_SHARED)) {
497 OBJ_FREEZE(RSTRING(str)->aux.shared);
498 return RSTRING(str)->aux.shared;
499 }
500 if (OBJ_FROZEN(str)) return str;
501 str = rb_str_dup(str);
502 OBJ_FREEZE(str);
503 return str;
504 }
505
506 VALUE
507 rb_str_resize(str, len)
508 VALUE str;
509 long len;
510 {
511 if (len < 0) {
512 rb_raise(rb_eArgError, "negative string size (or size too big)");
513 }
514
515 if (len != RSTRING(str)->len) {
516 rb_str_modify(str);
517
518 if (RSTRING(str)->len < len || RSTRING(str)->len - len > 1024) {
519 RESIZE_CAPA(str, len);
520 }
521 RSTRING(str)->len = len;
522 RSTRING(str)->ptr[len] = '\0';
523 }
524 return str;
525 }
526
527 VALUE
528 rb_str_buf_cat(str, ptr, len)
529 VALUE str;
530 const char *ptr;
531 long len;
532 {
533 long capa, total;
534
535 if (FL_TEST(str, ELTS_SHARED)) {
536 rb_str_modify(str);
537 }
538 capa = RSTRING(str)->aux.capa;
539 total = RSTRING(str)->len+len;
540 if (capa <= total) {
541 while (total > capa) {
542 capa = (capa + 1) * 2;
543 }
544 RESIZE_CAPA(str, capa);
545 }
546 memcpy(RSTRING(str)->ptr + RSTRING(str)->len, ptr, len);
547 RSTRING(str)->len = total;
548 RSTRING(str)->ptr[total] = '\0';
549
550 return str;
551 }
552
553 VALUE
554 rb_str_buf_cat2(str, ptr)
555 VALUE str;
556 const char *ptr;
557 {
558 return rb_str_buf_cat(str, ptr, strlen(ptr));
559 }
560
561 VALUE
562 rb_str_cat(str, ptr, len)
563 VALUE str;
564 const char *ptr;
565 long len;
566 {
567 rb_str_modify(str);
568 if (len > 0) {
569 if (!FL_TEST(str, ELTS_SHARED) && !FL_TEST(str, STR_ASSOC)) {
570 return rb_str_buf_cat(str, ptr, len);
571 }
572 RESIZE_CAPA(str, RSTRING(str)->len + len);
573 if (ptr) {
574 memcpy(RSTRING(str)->ptr + RSTRING(str)->len, ptr, len);
575 }
576 else {
577 MEMZERO(RSTRING(str)->ptr + RSTRING(str)->len, char, len);
578 }
579 RSTRING(str)->len += len;
580 RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
581 }
582
583 return str;
584 }
585
586 VALUE
587 rb_str_cat2(str, ptr)
588 VALUE str;
589 const char *ptr;
590 {
591 return rb_str_cat(str, ptr, strlen(ptr));
592 }
593
594 VALUE
595 rb_str_buf_append(str, str2)
596 VALUE str, str2;
597 {
598 long capa, len;
599
600 if (FL_TEST(str, ELTS_SHARED)) {
601 rb_str_modify(str);
602 }
603 capa = RSTRING(str)->aux.capa;
604
605 len = RSTRING(str)->len+RSTRING(str2)->len;
606 if (capa <= len) {
607 while (len > capa) {
608 capa = (capa + 1) * 2;
609 }
610 RESIZE_CAPA(str, capa);
611 }
612 memcpy(RSTRING(str)->ptr + RSTRING(str)->len,
613 RSTRING(str2)->ptr, RSTRING(str2)->len);
614 RSTRING(str)->len += RSTRING(str2)->len;
615 RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
616
617 return str;
618 }
619
620 VALUE
621 rb_str_append(str, str2)
622 VALUE str, str2;
623 {
624 long len;
625
626 StringValue(str2);
627 rb_str_modify(str);
628 if (RSTRING(str2)->len > 0) {
629 len = RSTRING(str)->len+RSTRING(str2)->len;
630 if (!FL_TEST(str, ELTS_SHARED) && !FL_TEST(str, STR_ASSOC)) {
631 rb_str_buf_append(str, str2);
632 OBJ_INFECT(str, str2);
633 return str;
634 }
635 RESIZE_CAPA(str, len);
636 memcpy(RSTRING(str)->ptr + RSTRING(str)->len,
637 RSTRING(str2)->ptr, RSTRING(str2)->len);
638 RSTRING(str)->len += RSTRING(str2)->len;
639 RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
640 }
641 OBJ_INFECT(str, str2);
642
643 return str;
644 }
645
646 VALUE
647 rb_str_concat(str1, str2)
648 VALUE str1, str2;
649 {
650 if (FIXNUM_P(str2)) {
651 int i = FIX2INT(str2);
652 if (0 <= i && i <= 0xff) {
653 char c = i;
654 return rb_str_cat(str1, &c, 1);
655 }
656 }
657 str1 = rb_str_append(str1, str2);
658
659 return str1;
660 }
661
662 int
663 rb_str_hash(str)
664 VALUE str;
665 {
666 register long len = RSTRING(str)->len;
667 register char *p = RSTRING(str)->ptr;
668 register int key = 0;
669
670 #ifdef HASH_ELFHASH
671 register unsigned int g;
672
673 while (len--) {
674 key = (key << 4) + *p++;
675 if (g = key & 0xF0000000)
676 key ^= g >> 24;
677 key &= ~g;
678 }
679 #elif HASH_PERL
680 while (len--) {
681 key = key*33 + *p++;
682 }
683 key = key + (key>>5);
684 #else
685 while (len--) {
686 key = key*65599 + *p;
687 p++;
688 }
689 key = key + (key>>5);
690 #endif
691 return key;
692 }
693
694 static VALUE
695 rb_str_hash_m(str)
696 VALUE str;
697 {
698 int key = rb_str_hash(str);
699 return INT2FIX(key);
700 }
701
702 #define lesser(a,b) (((a)>(b))?(b):(a))
703
704 int
705 rb_str_cmp(str1, str2)
706 VALUE str1, str2;
707 {
708 long len;
709 int retval;
710
711 len = lesser(RSTRING(str1)->len, RSTRING(str2)->len);
712 retval = rb_memcmp(RSTRING(str1)->ptr, RSTRING(str2)->ptr, len);
713 if (retval == 0) {
714 if (RSTRING(str1)->len == RSTRING(str2)->len) return 0;
715 if (RSTRING(str1)->len > RSTRING(str2)->len) return 1;
716 return -1;
717 }
718 if (retval > 0) return 1;
719 return -1;
720 }
721
722 static VALUE
723 rb_str_equal(str1, str2)
724 VALUE str1, str2;
725 {
726 if (str1 == str2) return Qtrue;
727 if (TYPE(str2) != T_STRING) {
728 str2 = rb_check_convert_type(str2, T_STRING, "String", "to_str");
729 if (NIL_P(str2)) return Qfalse;
730 }
731
732 if (RSTRING(str1)->len == RSTRING(str2)->len
733 && rb_str_cmp(str1, str2) == 0) {
734 return Qtrue;
735 }
736 return Qfalse;
737 }
738
739 static VALUE
740 rb_str_eql(str1, str2)
741 VALUE str1, str2;
742 {
743 if (TYPE(str2) != T_STRING || RSTRING(str1)->len != RSTRING(str2)->len)
744 return Qfalse;
745
746 if (memcmp(RSTRING(str1)->ptr, RSTRING(str2)->ptr,
747 lesser(RSTRING(str1)->len, RSTRING(str2)->len)) == 0)
748 return Qtrue;
749
750 return Qfalse;
751 }
752
753 static VALUE
754 rb_str_cmp_m(str1, str2)
755 VALUE str1, str2;
756 {
757 int result;
758
759 StringValue(str2);
760 result = rb_str_cmp(str1, str2);
761 return INT2FIX(result);
762 }
763
764 static VALUE
765 rb_str_casecmp(str1, str2)
766 VALUE str1, str2;
767 {
768 long len;
769 int retval;
770
771 StringValue(str2);
772 len = lesser(RSTRING(str1)->len, RSTRING(str2)->len);
773 retval = rb_memcicmp(RSTRING(str1)->ptr, RSTRING(str2)->ptr, len);
774 if (retval == 0) {
775 if (RSTRING(str1)->len == RSTRING(str2)->len) return INT2FIX(0);
776 if (RSTRING(str1)->len > RSTRING(str2)->len) return INT2FIX(1);
777 return INT2FIX(-1);
778 }
779 if (retval == 0) return INT2FIX(0);
780 if (retval > 0) return INT2FIX(1);
781 return INT2FIX(-1);
782 }
783
784 static long
785 rb_str_index(str, sub, offset)
786 VALUE str, sub;
787 long offset;
788 {
789 char *s, *e, *p;
790 long len;
791
792 if (offset < 0) {
793 offset += RSTRING(str)->len;
794 if (offset < 0) return -1;
795 }
796 if (RSTRING(str)->len - offset < RSTRING(sub)->len) return -1;
797 s = RSTRING(str)->ptr+offset;
798 p = RSTRING(sub)->ptr;
799 len = RSTRING(sub)->len;
800 if (len == 0) return offset;
801 e = RSTRING(str)->ptr + RSTRING(str)->len - len + 1;
802 while (s < e) {
803 if (rb_memcmp(s, p, len) == 0) {
804 return (s-(RSTRING(str)->ptr));
805 }
806 s++;
807 }
808 return -1;
809 }
810
811 static VALUE
812 rb_str_index_m(argc, argv, str)
813 int argc;
814 VALUE *argv;
815 VALUE str;
816 {
817 VALUE sub;
818 VALUE initpos;
819 long pos;
820
821 if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) {
822 pos = NUM2LONG(initpos);
823 }
824 else {
825 pos = 0;
826 }
827 if (pos < 0) {
828 pos += RSTRING(str)->len;
829 if (pos < 0) {
830 if (TYPE(sub) == T_REGEXP) {
831 rb_backref_set(Qnil);
832 }
833 return Qnil;
834 }
835 }
836
837 switch (TYPE(sub)) {
838 case T_REGEXP:
839 pos = rb_reg_adjust_startpos(sub, str, pos, 0);
840 pos = rb_reg_search(sub, str, pos, 0);
841 break;
842
843 case T_STRING:
844 pos = rb_str_index(str, sub, pos);
845 break;
846
847 case T_FIXNUM:
848 {
849 int c = FIX2INT(sub);
850 long len = RSTRING(str)->len;
851 char *p = RSTRING(str)->ptr;
852
853 for (;pos<len;pos++) {
854 if (p[pos] == c) return LONG2NUM(pos);
855 }
856 return Qnil;
857 }
858
859 default:
860 rb_raise(rb_eTypeError, "type mismatch: %s given",
861 rb_class2name(CLASS_OF(sub)));
862 }
863
864 if (pos == -1) return Qnil;
865 return LONG2NUM(pos);
866 }
867
868 static long
869 rb_str_rindex(str, sub, pos)
870 VALUE str, sub;
871 long pos;
872 {
873 long len = RSTRING(sub)->len;
874 char *s, *sbeg, *t;
875
876
877 if (RSTRING(str)->len < len) return -1;
878 if (RSTRING(str)->len - pos < len) {
879 pos = RSTRING(str)->len - len;
880 }
881 sbeg = RSTRING(str)->ptr;
882 s = RSTRING(str)->ptr + pos;
883 t = RSTRING(sub)->ptr;
884 if (len) {
885 while (sbeg <= s) {
886 if (rb_memcmp(s, t, len) == 0) {
887 return s - RSTRING(str)->ptr;
888 }
889 s--;
890 }
891 return -1;
892 }
893 else {
894 return pos;
895 }
896 }
897
898 static VALUE
899 rb_str_rindex_m(argc, argv, str)
900 int argc;
901 VALUE *argv;
902 VALUE str;
903 {
904 VALUE sub;
905 VALUE position;
906 long pos;
907
908 if (rb_scan_args(argc, argv, "11", &sub, &position) == 2) {
909 pos = NUM2LONG(position);
910 if (pos < 0) {
911 pos += RSTRING(str)->len;
912 if (pos < 0) {
913 if (TYPE(sub) == T_REGEXP) {
914 rb_backref_set(Qnil);
915 }
916 return Qnil;
917 }
918 }
919 if (pos > RSTRING(str)->len) pos = RSTRING(str)->len;
920 }
921 else {
922 pos = RSTRING(str)->len;
923 }
924
925 switch (TYPE(sub)) {
926 case T_REGEXP:
927 if (RREGEXP(sub)->len) {
928 pos = rb_reg_adjust_startpos(sub, str, pos, 1);
929 pos = rb_reg_search(sub, str, pos, 1);
930 }
931 if (pos >= 0) return LONG2NUM(pos);
932 break;
933
934 case T_STRING:
935 pos = rb_str_rindex(str, sub, pos);
936 if (pos >= 0) return LONG2NUM(pos);
937 break;
938
939 case T_FIXNUM:
940 {
941 int c = FIX2INT(sub);
942 char *p = RSTRING(str)->ptr + pos;
943 char *pbeg = RSTRING(str)->ptr;
944
945 while (pbeg <= p) {
946 if (*p == c) return LONG2NUM(p - RSTRING(str)->ptr);
947 p--;
948 }
949 return Qnil;
950 }
951
952 default:
953 rb_raise(rb_eTypeError, "type mismatch: %s given",
954 rb_class2name(CLASS_OF(sub)));
955 }
956 return Qnil;
957 }
958
959 static VALUE
960 rb_str_match(x, y)
961 VALUE x, y;
962 {
963 long start;
964
965 switch (TYPE(y)) {
966 case T_REGEXP:
967 return rb_reg_match(y, x);
968
969 case T_STRING:
970 start = rb_str_index(x, y, 0);
971 if (start == -1) {
972 return Qnil;
973 }
974 return LONG2NUM(start);
975
976 default:
977 return rb_funcall(y, rb_intern("=~"), 1, x);
978 }
979 }
980
981 static VALUE
982 rb_str_match2(str)
983 VALUE str;
984 {
985 StringValue(str);
986 return rb_reg_match2(rb_reg_regcomp(rb_reg_quote(str)));
987 }
988
989 static VALUE get_pat _((VALUE, int));
990
991 static VALUE
992 rb_str_match_m(str, re)
993 VALUE str, re;
994 {
995 return rb_funcall(get_pat(re, 0), rb_intern("match"), 1, str);
996 }
997
998 static char
999 succ_char(s)
1000 char *s;
1001 {
1002 char c = *s;
1003
1004
1005 if ('0' <= c && c < '9') (*s)++;
1006 else if (c == '9') {
1007 *s = '0';
1008 return '1';
1009 }
1010
1011 else if ('a' <= c && c < 'z') (*s)++;
1012 else if (c == 'z') {
1013 return *s = 'a';
1014 }
1015
1016 else if ('A' <= c && c < 'Z') (*s)++;
1017 else if (c == 'Z') {
1018 return *s = 'A';
1019 }
1020 return 0;
1021 }
1022
1023 static VALUE
1024 rb_str_succ(orig)
1025 VALUE orig;
1026 {
1027 VALUE str;
1028 char *sbeg, *s;
1029 int c = -1;
1030 long n = 0;
1031
1032 str = rb_str_new5(orig,RSTRING(orig)->ptr, RSTRING(orig)->len);
1033 OBJ_INFECT(str, orig);
1034 if (RSTRING(str)->len == 0) return str;
1035
1036 sbeg = RSTRING(str)->ptr; s = sbeg + RSTRING(str)->len - 1;
1037
1038 while (sbeg <= s) {
1039 if (ISALNUM(*s)) {
1040 if ((c = succ_char(s)) == 0) break;
1041 n = s - sbeg;
1042 }
1043 s--;
1044 }
1045 if (c == -1) {
1046 sbeg = RSTRING(str)->ptr; s = sbeg + RSTRING(str)->len - 1;
1047 c = '\001';
1048 while (sbeg <= s) {
1049 if ((*s += 1) != 0) break;
1050 s--;
1051 }
1052 }
1053 if (s < sbeg) {
1054 RESIZE_CAPA(str, RSTRING(str)->len + 1);
1055 s = RSTRING(str)->ptr + n;
1056 memmove(s+1, s, RSTRING(str)->len - n);
1057 *s = c;
1058 RSTRING(str)->len += 1;
1059 RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
1060 }
1061
1062 return str;
1063 }
1064
1065 static VALUE
1066 rb_str_succ_bang(str)
1067 VALUE str;
1068 {
1069 rb_str_modify(str);
1070 rb_str_shared_replace(str, rb_str_succ(str));
1071
1072 return str;
1073 }
1074
1075 VALUE
1076 rb_str_upto(beg, end, excl)
1077 VALUE beg, end;
1078 int excl;
1079 {
1080 VALUE current;
1081 ID succ = rb_intern("succ");
1082
1083 StringValue(end);
1084 current = beg;
1085 while (rb_str_cmp(current, end) <= 0) {
1086 rb_yield(current);
1087 if (!excl && rb_str_equal(current, end)) break;
1088 current = rb_funcall(current, succ, 0, 0);
1089 if (excl && rb_str_equal(current, end)) break;
1090 if (RSTRING(current)->len > RSTRING(end)->len)
1091 break;
1092 }
1093
1094 return beg;
1095 }
1096
1097 static VALUE
1098 rb_str_upto_m(beg, end)
1099 VALUE beg, end;
1100 {
1101 return rb_str_upto(beg, end, 0);
1102 }
1103
1104 static VALUE
1105 rb_str_subpat(str, re, nth)
1106 VALUE str, re;
1107 int nth;
1108 {
1109 if (rb_reg_search(re, str, 0, 0) >= 0) {
1110 return rb_reg_nth_match(nth, rb_backref_get());
1111 }
1112 return Qnil;
1113 }
1114
1115 static VALUE
1116 rb_str_aref(str, indx)
1117 VALUE str;
1118 VALUE indx;
1119 {
1120 long idx;
1121
1122 switch (TYPE(indx)) {
1123 case T_FIXNUM:
1124 idx = FIX2LONG(indx);
1125
1126 num_index:
1127 if (idx < 0) {
1128 idx = RSTRING(str)->len + idx;
1129 }
1130 if (idx < 0 || RSTRING(str)->len <= idx) {
1131 return Qnil;
1132 }
1133 return INT2FIX(RSTRING(str)->ptr[idx] & 0xff);
1134
1135 case T_REGEXP:
1136 return rb_str_subpat(str, indx, 0);
1137
1138 case T_STRING:
1139 if (rb_str_index(str, indx, 0) != -1)
1140 return rb_str_dup(indx);
1141 return Qnil;
1142
1143 default:
1144
1145 {
1146 long beg, len;
1147 switch (rb_range_beg_len(indx, &beg, &len, RSTRING(str)->len, 0)) {
1148 case Qfalse:
1149 break;
1150 case Qnil:
1151 return Qnil;
1152 default:
1153 return rb_str_substr(str, beg, len);
1154 }
1155 }
1156 idx = NUM2LONG(indx);
1157 goto num_index;
1158 }
1159 return Qnil;
1160 }
1161
1162 static VALUE
1163 rb_str_aref_m(argc, argv, str)
1164 int argc;
1165 VALUE *argv;
1166 VALUE str;
1167 {
1168 if (argc == 2) {
1169 if (TYPE(argv[0]) == T_REGEXP) {
1170 return rb_str_subpat(str, argv[0], NUM2INT(argv[1]));
1171 }
1172 return rb_str_substr(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]));
1173 }
1174 if (argc != 1) {
1175 rb_raise(rb_eArgError, "wrong number of arguments(%d for 1)", argc);
1176 }
1177 return rb_str_aref(str, argv[0]);
1178 }
1179
1180 void
1181 rb_str_update(str, beg, len, val)
1182 VALUE str;
1183 long beg, len;
1184 VALUE val;
1185 {
1186 if (len < 0) rb_raise(rb_eIndexError, "negative length %ld", len);
1187 if (RSTRING(str)->len < beg) {
1188 out_of_range:
1189 rb_raise(rb_eIndexError, "index %ld out of string", beg);
1190 }
1191 if (beg < 0) {
1192 if (-beg > RSTRING(str)->len) {
1193 goto out_of_range;
1194 }
1195 beg += RSTRING(str)->len;
1196 }
1197 if (RSTRING(str)->len < beg + len) {
1198 len = RSTRING(str)->len - beg;
1199 }
1200
1201 StringValue(val);
1202 if (len < RSTRING(val)->len) {
1203
1204 RESIZE_CAPA(str, RSTRING(str)->len + RSTRING(val)->len - len);
1205 }
1206
1207 if (RSTRING(val)->len != len) {
1208 memmove(RSTRING(str)->ptr + beg + RSTRING(val)->len,
1209 RSTRING(str)->ptr + beg + len,
1210 RSTRING(str)->len - (beg + len));
1211 }
1212 if (RSTRING(str)->len < beg && len < 0) {
1213 MEMZERO(RSTRING(str)->ptr + RSTRING(str)->len, char, -len);
1214 }
1215 if (RSTRING(val)->len > 0) {
1216 memmove(RSTRING(str)->ptr+beg, RSTRING(val)->ptr, RSTRING(val)->len);
1217 }
1218 RSTRING(str)->len += RSTRING(val)->len - len;
1219 RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
1220 OBJ_INFECT(str, val);
1221 }
1222
1223 static void
1224 rb_str_subpat_set(str, re, nth, val)
1225 VALUE str, re;
1226 int nth;
1227 VALUE val;
1228 {
1229 VALUE match;
1230 long start, end, len;
1231
1232 if (rb_reg_search(re, str, 0, 0) < 0) {
1233 rb_raise(rb_eIndexError, "regexp not matched");
1234 }
1235 match = rb_backref_get();
1236 if (nth >= RMATCH(match)->regs->num_regs) {
1237 out_of_range:
1238 rb_raise(rb_eIndexError, "index %d out of regexp", nth);
1239 }
1240 if (nth < 0) {
1241 if (-nth >= RMATCH(match)->regs->num_regs) {
1242 goto out_of_range;
1243 }
1244 nth += RMATCH(match)->regs->num_regs;
1245 }
1246
1247 start = RMATCH(match)->BEG(nth);
1248 if (start == -1) {
1249 rb_raise(rb_eIndexError, "regexp group %d not matched", nth);
1250 }
1251 end = RMATCH(match)->END(nth);
1252 len = end - start;
1253 rb_str_modify(str);
1254 rb_str_update(str, start, len, val);
1255 }
1256
1257 static VALUE
1258 rb_str_aset(str, indx, val)
1259 VALUE str;
1260 VALUE indx, val;
1261 {
1262 long idx, beg;
1263
1264 switch (TYPE(indx)) {
1265 case T_FIXNUM:
1266 num_index:
1267 idx = NUM2LONG(indx);
1268 if (RSTRING(str)->len <= idx) {
1269 out_of_range:
1270 rb_raise(rb_eIndexError, "index %ld out of string", idx);
1271 }
1272 if (idx < 0) {
1273 if (-idx > RSTRING(str)->len)
1274 goto out_of_range;
1275 idx += RSTRING(str)->len;
1276 }
1277 if (FIXNUM_P(val)) {
1278 if (RSTRING(str)->len == idx) {
1279 RSTRING(str)->len += 1;
1280 RESIZE_CAPA(str, RSTRING(str)->len);
1281 }
1282 RSTRING(str)->ptr[idx] = NUM2INT(val) & 0xff;
1283 }
1284 else {
1285 rb_str_update(str, idx, 1, val);
1286 }
1287 return val;
1288
1289 case T_REGEXP:
1290 rb_str_subpat_set(str, indx, 0, val);
1291 return val;
1292
1293 case T_STRING:
1294 beg = rb_str_index(str, indx, 0);
1295 if (beg < 0) {
1296 rb_raise(rb_eIndexError, "string not matched");
1297 }
1298 rb_str_update(str, beg, RSTRING(indx)->len, val);
1299 return val;
1300
1301 default:
1302
1303 {
1304 long beg, len;
1305 if (rb_range_beg_len(indx, &beg, &len, RSTRING(str)->len, 2)) {
1306 rb_str_update(str, beg, len, val);
1307 return val;
1308 }
1309 }
1310 idx = NUM2LONG(indx);
1311 goto num_index;
1312 }
1313 }
1314
1315 static VALUE
1316 rb_str_aset_m(argc, argv, str)
1317 int argc;
1318 VALUE *argv;
1319 VALUE str;
1320 {
1321 rb_str_modify(str);
1322 if (argc == 3) {
1323 if (TYPE(argv[0]) == T_REGEXP) {
1324 rb_str_subpat_set(str, argv[0], NUM2INT(argv[1]), argv[2]);
1325 }
1326 else {
1327 rb_str_update(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]), argv[2]);
1328 }
1329 return argv[2];
1330 }
1331 if (argc != 2) {
1332 rb_raise(rb_eArgError, "wrong number of arguments(%d for 2)", argc);
1333 }
1334 return rb_str_aset(str, argv[0], argv[1]);
1335 }
1336
1337 static VALUE
1338 rb_str_insert(str, idx, str2)
1339 VALUE str, idx, str2;
1340 {
1341 long pos = NUM2LONG(idx);
1342
1343 rb_str_modify(str);
1344 if (pos == -1) {
1345 pos = RSTRING(str)->len;
1346 }
1347 else if (pos < 0) {
1348 pos++;
1349 }
1350 rb_str_update(str, pos, 0, str2);
1351 return str;
1352 }
1353
1354 static VALUE
1355 rb_str_slice_bang(argc, argv, str)
1356 int argc;
1357 VALUE *argv;
1358 VALUE str;
1359 {
1360 VALUE result;
1361 VALUE buf[3];
1362 int i;
1363
1364 if (argc < 1 || 2 < argc) {
1365 rb_raise(rb_eArgError, "wrong number of arguments(%d for 1)", argc);
1366 }
1367 for (i=0; i<argc; i++) {
1368 buf[i] = argv[i];
1369 }
1370 buf[i] = rb_str_new(0,0);
1371 result = rb_str_aref_m(argc, buf, str);
1372 if (!NIL_P(result)) {
1373 rb_str_aset_m(argc+1, buf, str);
1374 }
1375 return result;
1376 }
1377
1378 static VALUE
1379 get_pat(pat, quote)
1380 VALUE pat;
1381 int quote;
1382 {
1383 VALUE val;
1384
1385 switch (TYPE(pat)) {
1386 case T_REGEXP:
1387 return pat;
1388
1389 case T_STRING:
1390 break;
1391
1392 default:
1393 val = rb_check_convert_type(pat, T_STRING, "String", "to_str");
1394 if (NIL_P(val)) {
1395 Check_Type(pat, T_REGEXP);
1396 }
1397 pat = val;
1398 }
1399
1400 if (quote) {
1401 val = rb_reg_quote(pat);
1402 #if RUBY_VERSION_CODE < 180
1403 if (val != pat && rb_str_cmp(val, pat) != 0) {
1404 rb_warn("string pattern instead of regexp; metacharacters no longer effective");
1405 }
1406 #endif
1407 pat = val;
1408 }
1409
1410 return rb_reg_regcomp(pat);
1411 }
1412
1413 static VALUE
1414 rb_str_sub_bang(argc, argv, str)
1415 int argc;
1416 VALUE *argv;
1417 VALUE str;
1418 {
1419 VALUE pat, repl, match;
1420 struct re_registers *regs;
1421 int iter = 0;
1422 int tainted = 0;
1423 long plen;
1424
1425 if (argc == 1 && rb_block_given_p()) {
1426 iter = 1;
1427 }
1428 else if (argc == 2) {
1429 repl = argv[1];
1430 StringValue(repl);
1431 if (OBJ_TAINTED(repl)) tainted = 1;
1432 }
1433 else {
1434 rb_raise(rb_eArgError, "wrong number of arguments(%d for 2)", argc);
1435 }
1436
1437 pat = get_pat(argv[0], 1);
1438 if (rb_reg_search(pat, str, 0, 0) >= 0) {
1439 rb_str_modify(str);
1440 match = rb_backref_get();
1441 regs = RMATCH(match)->regs;
1442
1443 if (iter) {
1444 rb_match_busy(match);
1445 repl = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
1446 rb_backref_set(match);
1447 }
1448 else {
1449 repl = rb_reg_regsub(repl, str, regs);
1450 }
1451 if (OBJ_TAINTED(repl)) tainted = 1;
1452 plen = END(0) - BEG(0);
1453 if (RSTRING(repl)->len > plen) {
1454 RESIZE_CAPA(str, RSTRING(str)->len + RSTRING(repl)->len - plen);
1455 }
1456 if (RSTRING(repl)->len != plen) {
1457 memmove(RSTRING(str)->ptr + BEG(0) + RSTRING(repl)->len,
1458 RSTRING(str)->ptr + BEG(0) + plen,
1459 RSTRING(str)->len - BEG(0) - plen);
1460 }
1461 memcpy(RSTRING(str)->ptr + BEG(0),
1462 RSTRING(repl)->ptr, RSTRING(repl)->len);
1463 RSTRING(str)->len += RSTRING(repl)->len - plen;
1464 RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
1465 if (tainted) OBJ_TAINT(str);
1466
1467 return str;
1468 }
1469 return Qnil;
1470 }
1471
1472 static VALUE
1473 rb_str_sub(argc, argv, str)
1474 int argc;
1475 VALUE *argv;
1476 VALUE str;
1477 {
1478 str = rb_str_dup(str);
1479 rb_str_sub_bang(argc, argv, str);
1480 return str;
1481 }
1482
1483 static VALUE
1484 str_gsub(argc, argv, str, bang)
1485 int argc;
1486 VALUE *argv;
1487 VALUE str;
1488 int bang;
1489 {
1490 VALUE pat, val, repl, match;
1491 struct re_registers *regs;
1492 long beg, n;
1493 long offset, blen, len;
1494 int iter = 0;
1495 char *buf, *bp, *cp;
1496 int tainted = 0;
1497
1498 if (argc == 1 && rb_block_given_p()) {
1499 iter = 1;
1500 }
1501 else if (argc == 2) {
1502 repl = argv[1];
1503 StringValue(repl);
1504 if (OBJ_TAINTED(repl)) tainted = 1;
1505 }
1506 else {
1507 rb_raise(rb_eArgError, "wrong number of arguments(%d for 2)", argc);
1508 }
1509
1510 pat = get_pat(argv[0], 1);
1511 offset=0; n=0;
1512 beg = rb_reg_search(pat, str, 0, 0);
1513 if (beg < 0) {
1514 if (bang) return Qnil;
1515 return rb_str_dup(str);
1516 }
1517
1518 blen = RSTRING(str)->len + 30;
1519 buf = ALLOC_N(char, blen);
1520 bp = buf;
1521 cp = RSTRING(str)->ptr;
1522
1523 while (beg >= 0) {
1524 n++;
1525 match = rb_backref_get();
1526 regs = RMATCH(match)->regs;
1527 if (iter) {
1528 rb_match_busy(match);
1529 val = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
1530 rb_backref_set(match);
1531 }
1532 else {
1533 val = rb_reg_regsub(repl, str, regs);
1534 }
1535 if (OBJ_TAINTED(val)) tainted = 1;
1536 len = (bp - buf) + (beg - offset) + RSTRING(val)->len + 3;
1537 if (blen < len) {
1538 while (blen < len) blen *= 2;
1539 len = bp - buf;
1540 REALLOC_N(buf, char, blen);
1541 bp = buf + len;
1542 }
1543 len = beg - offset;
1544 memcpy(bp, cp, len);
1545 bp += len;
1546 memcpy(bp, RSTRING(val)->ptr, RSTRING(val)->len);
1547 bp += RSTRING(val)->len;
1548 if (BEG(0) == END(0)) {
1549
1550
1551
1552
1553 len = mbclen2(RSTRING(str)->ptr[END(0)], pat);
1554 if (RSTRING(str)->len > END(0)) {
1555 memcpy(bp, RSTRING(str)->ptr+END(0), len);
1556 bp += len;
1557 }
1558 offset = END(0) + len;
1559 }
1560 else {
1561 offset = END(0);
1562 }
1563 cp = RSTRING(str)->ptr + offset;
1564 if (offset > RSTRING(str)->len) break;
1565 beg = rb_reg_search(pat, str, offset, 0);
1566 }
1567 if (RSTRING(str)->len > offset) {
1568 len = bp - buf;
1569 if (blen - len < RSTRING(str)->len - offset + 1) {
1570 REALLOC_N(buf, char, len + RSTRING(str)->len - offset + 1);
1571 bp = buf + len;
1572 }
1573 memcpy(bp, cp, RSTRING(str)->len - offset);
1574 bp += RSTRING(str)->len - offset;
1575 }
1576 rb_backref_set(match);
1577 if (bang) {
1578 if (str_independent(str)) {
1579 free(RSTRING(str)->ptr);
1580 }
1581 FL_UNSET(str, ELTS_SHARED|STR_ASSOC);
1582 }
1583 else {
1584 VALUE dup = rb_obj_alloc(rb_obj_class(str));
1585
1586 OBJ_INFECT(dup, str);
1587 str = dup;
1588 }
1589 RSTRING(str)->ptr = buf;
1590 RSTRING(str)->len = len = bp - buf;
1591 RSTRING(str)->ptr[len] = '\0';
1592 RSTRING(str)->aux.capa = len;
1593
1594 if (tainted) OBJ_TAINT(str);
1595 return str;
1596 }
1597
1598 static VALUE
1599 rb_str_gsub_bang(argc, argv, str)
1600 int argc;
1601 VALUE *argv;
1602 VALUE str;
1603 {
1604 return str_gsub(argc, argv, str, 1);
1605 }
1606
1607 static VALUE
1608 rb_str_gsub(argc, argv, str)
1609 int argc;
1610 VALUE *argv;
1611 VALUE str;
1612 {
1613 return str_gsub(argc, argv, str, 0);
1614 }
1615
1616 static VALUE
1617 rb_str_replace(str, str2)
1618 VALUE str, str2;
1619 {
1620 if (str == str2) return str;
1621
1622 StringValue(str2);
1623 if (FL_TEST(str2, ELTS_SHARED)) {
1624 if (str_independent(str)) {
1625 free(RSTRING(str)->ptr);
1626 }
1627 RSTRING(str)->len = RSTRING(str2)->len;
1628 RSTRING(str)->ptr = RSTRING(str2)->ptr;
1629 FL_SET(str, RBASIC(str2)->flags & (ELTS_SHARED|STR_ASSOC));
1630 RSTRING(str)->aux.shared = RSTRING(str2)->aux.shared;
1631 }
1632 else {
1633 rb_str_modify(str);
1634 rb_str_resize(str, RSTRING(str2)->len);
1635 memcpy(RSTRING(str)->ptr, RSTRING(str2)->ptr, RSTRING(str2)->len);
1636 if (FL_TEST(str2, STR_ASSOC)) {
1637 FL_SET(str, RBASIC(str2)->flags & (ELTS_SHARED|STR_ASSOC));
1638 RSTRING(str)->aux.shared = RSTRING(str2)->aux.shared;
1639 }
1640 }
1641
1642 OBJ_INFECT(str, str2);
1643 return str;
1644 }
1645
1646 static VALUE
1647 uscore_get()
1648 {
1649 VALUE line;
1650
1651 line = rb_lastline_get();
1652 if (TYPE(line) != T_STRING) {
1653 rb_raise(rb_eTypeError, "$_ value need to be String (%s given)",
1654 NIL_P(line) ? "nil" : rb_class2name(CLASS_OF(line)));
1655 }
1656 return line;
1657 }
1658
1659 static VALUE
1660 rb_f_sub_bang(argc, argv)
1661 int argc;
1662 VALUE *argv;
1663 {
1664 return rb_str_sub_bang(argc, argv, uscore_get());
1665 }
1666
1667 static VALUE
1668 rb_f_sub(argc, argv)
1669 int argc;
1670 VALUE *argv;
1671 {
1672 VALUE str = rb_str_dup(uscore_get());
1673
1674 if (NIL_P(rb_str_sub_bang(argc, argv, str)))
1675 return str;
1676 rb_lastline_set(str);
1677 return str;
1678 }
1679
1680 static VALUE
1681 rb_f_gsub_bang(argc, argv)
1682 int argc;
1683 VALUE *argv;
1684 {
1685 return rb_str_gsub_bang(argc, argv, uscore_get());
1686 }
1687
1688 static VALUE
1689 rb_f_gsub(argc, argv)
1690 int argc;
1691 VALUE *argv;
1692 {
1693 VALUE str = rb_str_dup(uscore_get());
1694
1695 if (NIL_P(rb_str_gsub_bang(argc, argv, str)))
1696 return str;
1697 rb_lastline_set(str);
1698 return str;
1699 }
1700
1701 static VALUE
1702 rb_str_reverse_bang(str)
1703 VALUE str;
1704 {
1705 char *s, *e;
1706 char c;
1707
1708 rb_str_modify(str);
1709 s = RSTRING(str)->ptr;
1710 e = s + RSTRING(str)->len - 1;
1711 while (s < e) {
1712 c = *s;
1713 *s++ = *e;
1714 *e-- = c;
1715 }
1716
1717 return str;
1718 }
1719
1720 static VALUE
1721 rb_str_reverse(str)
1722 VALUE str;
1723 {
1724 VALUE obj;
1725 char *s, *e, *p;
1726
1727 if (RSTRING(str)->len <= 1) return rb_str_dup(str);
1728
1729 obj = rb_str_new5(str, 0, RSTRING(str)->len);
1730 s = RSTRING(str)->ptr; e = s + RSTRING(str)->len - 1;
1731 p = RSTRING(obj)->ptr;
1732
1733 while (e >= s) {
1734 *p++ = *e--;
1735 }
1736 OBJ_INFECT(obj, str);
1737
1738 return obj;
1739 }
1740
1741 static VALUE
1742 rb_str_include(str, arg)
1743 VALUE str, arg;
1744 {
1745 long i;
1746
1747 if (FIXNUM_P(arg)) {
1748 int c = FIX2INT(arg);
1749 long len = RSTRING(str)->len;
1750 char *p = RSTRING(str)->ptr;
1751
1752 for (i=0; i<len; i++) {
1753 if (p[i] == c) {
1754 return Qtrue;
1755 }
1756 }
1757 return Qfalse;
1758 }
1759
1760 StringValue(arg);
1761 i = rb_str_index(str, arg, 0);
1762
1763 if (i == -1) return Qfalse;
1764 return Qtrue;
1765 }
1766
1767 static VALUE
1768 rb_str_to_i(argc, argv, str)
1769 int argc;
1770 VALUE *argv;
1771 VALUE str;
1772 {
1773 VALUE b;
1774 int base;
1775
1776 rb_scan_args(argc, argv, "01", &b);
1777 if (argc == 0) base = 10;
1778 else base = NUM2INT(b);
1779
1780 switch (base) {
1781 case 0: case 2: case 8: case 10: case 16:
1782 break;
1783 default:
1784 rb_raise(rb_eArgError, "illegal radix %d", base);
1785 }
1786 return rb_str_to_inum(str, base, Qfalse);
1787 }
1788
1789 static VALUE
1790 rb_str_to_f(str)
1791 VALUE str;
1792 {
1793 return rb_float_new(rb_str_to_dbl(str, Qfalse));
1794 }
1795
1796 static VALUE
1797 rb_str_to_s(str)
1798 VALUE str;
1799 {
1800 return str;
1801 }
1802
1803 VALUE
1804 rb_str_inspect(str)
1805 VALUE str;
1806 {
1807 char *p, *pend;
1808 VALUE result = rb_str_buf_new2("\"");
1809 char s[5];
1810
1811 p = RSTRING(str)->ptr; pend = p + RSTRING(str)->len;
1812 while (p < pend) {
1813 char c = *p++;
1814 if (ismbchar(c) && p < pend) {
1815 int len = mbclen(c);
1816 rb_str_buf_cat(result, p - 1, len);
1817 p += len - 1;
1818 }
1819 else if (c == '"'|| c == '\\') {
1820 s[0] = '\\'; s[1] = c;
1821 rb_str_buf_cat(result, s, 2);
1822 }
1823 else if (ISPRINT(c)) {
1824 s[0] = c;
1825 rb_str_buf_cat(result, s, 1);
1826 }
1827 else if (c == '\n') {
1828 s[0] = '\\'; s[1] = 'n';
1829 rb_str_buf_cat(result, s, 2);
1830 }
1831 else if (c == '\r') {
1832 s[0] = '\\'; s[1] = 'r';
1833 rb_str_buf_cat(result, s, 2);
1834 }
1835 else if (c == '\t') {
1836 s[0] = '\\'; s[1] = 't';
1837 rb_str_buf_cat(result, s, 2);
1838 }
1839 else if (c == '\f') {
1840 s[0] = '\\'; s[1] = 'f';
1841 rb_str_buf_cat(result, s, 2);
1842 }
1843 else if (c == '\013') {
1844 s[0] = '\\'; s[1] = 'v';
1845 rb_str_buf_cat(result, s, 2);
1846 }
1847 else if (c == '\007') {
1848 s[0] = '\\'; s[1] = 'a';
1849 rb_str_buf_cat(result, s, 2);
1850 }
1851 else if (c == 033) {
1852 s[0] = '\\'; s[1] = 'e';
1853 rb_str_buf_cat(result, s, 2);
1854 }
1855 else {
1856 sprintf(s, "\\%03o", c & 0377);
1857 rb_str_buf_cat2(result, s);
1858 }
1859 }
1860 rb_str_buf_cat2(result, "\"");
1861
1862 OBJ_INFECT(result, str);
1863 return result;
1864 }
1865
1866 static VALUE
1867 rb_str_dump(str)
1868 VALUE str;
1869 {
1870 long len;
1871 char *p, *pend;
1872 char *q, *qend;
1873 VALUE result;
1874
1875 len = 2;
1876 p = RSTRING(str)->ptr; pend = p + RSTRING(str)->len;
1877 while (p < pend) {
1878 char c = *p++;
1879 switch (c) {
1880 case '"': case '\\':
1881 case '\n': case '\r':
1882 case '\t': case '\f': case '#':
1883 case '\013': case '\007': case '\033':
1884 len += 2;
1885 break;
1886
1887 default:
1888 if (ISPRINT(c)) {
1889 len++;
1890 }
1891 else {
1892 len += 4;
1893 }
1894 break;
1895 }
1896 }
1897
1898 result = rb_str_new5(str, 0, len);
1899 p = RSTRING(str)->ptr; pend = p + RSTRING(str)->len;
1900 q = RSTRING(result)->ptr; qend = q + len;
1901
1902 *q++ = '"';
1903 while (p < pend) {
1904 char c = *p++;
1905
1906 if (c == '"' || c == '\\') {
1907 *q++ = '\\';
1908 *q++ = c;
1909 }
1910 else if (c == '#') {
1911 *q++ = '\\';
1912 *q++ = '#';
1913 }
1914 else if (ISPRINT(c)) {
1915 *q++ = c;
1916 }
1917 else if (c == '\n') {
1918 *q++ = '\\';
1919 *q++ = 'n';
1920 }
1921 else if (c == '\r') {
1922 *q++ = '\\';
1923 *q++ = 'r';
1924 }
1925 else if (c == '\t') {
1926 *q++ = '\\';
1927 *q++ = 't';
1928 }
1929 else if (c == '\f') {
1930 *q++ = '\\';
1931 *q++ = 'f';
1932 }
1933 else if (c == '\013') {
1934 *q++ = '\\';
1935 *q++ = 'v';
1936 }
1937 else if (c == '\007') {
1938 *q++ = '\\';
1939 *q++ = 'a';
1940 }
1941 else if (c == '\033') {
1942 *q++ = '\\';
1943 *q++ = 'e';
1944 }
1945 else {
1946 *q++ = '\\';
1947 sprintf(q, "%03o", c&0xff);
1948 q += 3;
1949 }
1950 }
1951 *q++ = '"';
1952
1953 OBJ_INFECT(result, str);
1954 return result;
1955 }
1956
1957 static VALUE
1958 rb_str_upcase_bang(str)
1959 VALUE str;
1960 {
1961 char *s, *send;
1962 int modify = 0;
1963
1964 rb_str_modify(str);
1965 s = RSTRING(str)->ptr; send = s + RSTRING(str)->len;
1966 while (s < send) {
1967 if (ismbchar(*s)) {
1968 s+=mbclen(*s) - 1;
1969 }
1970 else if (ISLOWER(*s)) {
1971 *s = toupper(*s);
1972 modify = 1;
1973 }
1974 s++;
1975 }
1976
1977 if (modify) return str;
1978 return Qnil;
1979 }
1980
1981 static VALUE
1982 rb_str_upcase(str)
1983 VALUE str;
1984 {
1985 str = rb_str_dup(str);
1986 rb_str_upcase_bang(str);
1987 return str;
1988 }
1989
1990 static VALUE
1991 rb_str_downcase_bang(str)
1992 VALUE str;
1993 {
1994 char *s, *send;
1995 int modify = 0;
1996
1997 rb_str_modify(str);
1998 s = RSTRING(str)->ptr; send = s + RSTRING(str)->len;
1999 while (s < send) {
2000 if (ismbchar(*s)) {
2001 s+=mbclen(*s) - 1;
2002 }
2003 else if (ISUPPER(*s)) {
2004 *s = tolower(*s);
2005 modify = 1;
2006 }
2007 s++;
2008 }
2009
2010 if (modify) return str;
2011 return Qnil;
2012 }
2013
2014 static VALUE
2015 rb_str_downcase(str)
2016 VALUE str;
2017 {
2018 str = rb_str_dup(str);
2019 rb_str_downcase_bang(str);
2020 return str;
2021 }
2022
2023 static VALUE
2024 rb_str_capitalize_bang(str)
2025 VALUE str;
2026 {
2027 char *s, *send;
2028 int modify = 0;
2029
2030 rb_str_modify(str);
2031 s = RSTRING(str)->ptr; send = s + RSTRING(str)->len;
2032 if (ISLOWER(*s)) {
2033 *s = toupper(*s);
2034 modify = 1;
2035 }
2036 while (++s < send) {
2037 if (ismbchar(*s)) {
2038 s+=mbclen(*s) - 1;
2039 }
2040 else if (ISUPPER(*s)) {
2041 *s = tolower(*s);
2042 modify = 1;
2043 }
2044 }
2045 if (modify) return str;
2046 return Qnil;
2047 }
2048
2049 static VALUE
2050 rb_str_capitalize(str)
2051 VALUE str;
2052 {
2053 str = rb_str_dup(str);
2054 rb_str_capitalize_bang(str);
2055 return str;
2056 }
2057
2058 static VALUE
2059 rb_str_swapcase_bang(str)
2060 VALUE str;
2061 {
2062 char *s, *send;
2063 int modify = 0;
2064
2065 rb_str_modify(str);
2066 s = RSTRING(str)->ptr; send = s + RSTRING(str)->len;
2067 while (s < send) {
2068 if (ismbchar(*s)) {
2069 s+=mbclen(*s) - 1;
2070 }
2071 else if (ISUPPER(*s)) {
2072 *s = tolower(*s);
2073 modify = 1;
2074 }
2075 else if (ISLOWER(*s)) {
2076 *s = toupper(*s);
2077 modify = 1;
2078 }
2079 s++;
2080 }
2081
2082 if (modify) return str;
2083 return Qnil;
2084 }
2085
2086 static VALUE
2087 rb_str_swapcase(str)
2088 VALUE str;
2089 {
2090 str = rb_str_dup(str);
2091 rb_str_swapcase_bang(str);
2092 return str;
2093 }
2094
2095 typedef unsigned char *USTR;
2096
2097 struct tr {
2098 int gen, now, max;
2099 char *p, *pend;
2100 };
2101
2102 static int
2103 trnext(t)
2104 struct tr *t;
2105 {
2106 for (;;) {
2107 if (!t->gen) {
2108 if (t->p == t->pend) return -1;
2109 if (t->p < t->pend - 1 && *t->p == '\\') {
2110 t->p++;
2111 }
2112 t->now = *(USTR)t->p++;
2113 if (t->p < t->pend - 1 && *t->p == '-') {
2114 t->p++;
2115 if (t->p < t->pend) {
2116 if (t->now > *(USTR)t->p) {
2117 t->p++;
2118 continue;
2119 }
2120 t->gen = 1;
2121 t->max = *(USTR)t->p++;
2122 }
2123 }
2124 return t->now;
2125 }
2126 else if (++t->now < t->max) {
2127 return t->now;
2128 }
2129 else {
2130 t->gen = 0;
2131 return t->max;
2132 }
2133 }
2134 }
2135
2136 static VALUE rb_str_delete_bang _((int,VALUE*,VALUE));
2137
2138 static VALUE
2139 tr_trans(str, src, repl, sflag)
2140 VALUE str, src, repl;
2141 int sflag;
2142 {
2143 struct tr trsrc, trrepl;
2144 int cflag = 0;
2145 int trans[256];
2146 int i, c, modify = 0;
2147 char *s, *send;
2148
2149 rb_str_modify(str);
2150 StringValue(src);
2151 StringValue(repl);
2152 if (RSTRING(str)->len == 0 || !RSTRING(str)->ptr) return Qnil;
2153 trsrc.p = RSTRING(src)->ptr; trsrc.pend = trsrc.p + RSTRING(src)->len;
2154 if (RSTRING(src)->len >= 2 && RSTRING(src)->ptr[0] == '^') {
2155 cflag++;
2156 trsrc.p++;
2157 }
2158 if (RSTRING(repl)->len == 0) {
2159 return rb_str_delete_bang(1, &src, str);
2160 }
2161 trrepl.p = RSTRING(repl)->ptr;
2162 trrepl.pend = trrepl.p + RSTRING(repl)->len;
2163 trsrc.gen = trrepl.gen = 0;
2164 trsrc.now = trrepl.now = 0;
2165 trsrc.max = trrepl.max = 0;
2166
2167 if (cflag) {
2168 for (i=0; i<256; i++) {
2169 trans[i] = 1;
2170 }
2171 while ((c = trnext(&trsrc)) >= 0) {
2172 trans[c & 0xff] = -1;
2173 }
2174 while ((c = trnext(&trrepl)) >= 0)
2175 ;
2176 for (i=0; i<256; i++) {
2177 if (trans[i] >= 0) {
2178 trans[i] = trrepl.now;
2179 }
2180 }
2181 }
2182 else {
2183 int r;
2184
2185 for (i=0; i<256; i++) {
2186 trans[i] = -1;
2187 }
2188 while ((c = trnext(&trsrc)) >= 0) {
2189 r = trnext(&trrepl);
2190 if (r == -1) r = trrepl.now;
2191 trans[c & 0xff] = r;
2192 }
2193 }
2194
2195 s = RSTRING(str)->ptr; send = s + RSTRING(str)->len;
2196 if (sflag) {
2197 char *t = s;
2198 int c0, last = -1;
2199
2200 while (s < send) {
2201 c0 = *s++;
2202 if ((c = trans[c0 & 0xff]) >= 0) {
2203 if (last == c) continue;
2204 last = c;
2205 *t++ = c & 0xff;
2206 modify = 1;
2207 }
2208 else {
2209 last = -1;
2210 *t++ = c0;
2211 }
2212 }
2213 if (RSTRING(str)->len > (t - RSTRING(str)->ptr)) {
2214 RSTRING(str)->len = (t - RSTRING(str)->ptr);
2215 modify = 1;
2216 *t = '\0';
2217 }
2218 }
2219 else {
2220 while (s < send) {
2221 if ((c = trans[*s & 0xff]) >= 0) {
2222 *s = c & 0xff;
2223 modify = 1;
2224 }
2225 s++;
2226 }
2227 }
2228
2229 if (modify) return str;
2230 return Qnil;
2231 }
2232
2233 static VALUE
2234 rb_str_tr_bang(str, src, repl)
2235 VALUE str, src, repl;
2236 {
2237 return tr_trans(str, src, repl, 0);
2238 }
2239
2240 static VALUE
2241 rb_str_tr(str, src, repl)
2242 VALUE str, src, repl;
2243 {
2244 str = rb_str_dup(str);
2245 tr_trans(str, src, repl, 0);
2246 return str;
2247 }
2248
2249 static void
2250 tr_setup_table(str, table, init)
2251 VALUE str;
2252 char table[256];
2253 int init;
2254 {
2255 char buf[256];
2256 struct tr tr;
2257 int i, c;
2258 int cflag = 0;
2259
2260 tr.p = RSTRING(str)->ptr; tr.pend = tr.p + RSTRING(str)->len;
2261 tr.gen = tr.now = tr.max = 0;
2262 if (RSTRING(str)->len > 1 && RSTRING(str)->ptr[0] == '^') {
2263 cflag = 1;
2264 tr.p++;
2265 }
2266
2267 if (init) {
2268 for (i=0; i<256; i++) {
2269 table[i] = 1;
2270 }
2271 }
2272 for (i=0; i<256; i++) {
2273 buf[i] = cflag;
2274 }
2275 while ((c = trnext(&tr)) >= 0) {
2276 buf[c & 0xff] = !cflag;
2277 }
2278 for (i=0; i<256; i++) {
2279 table[i] = table[i]&&buf[i];
2280 }
2281 }
2282
2283 static VALUE
2284 rb_str_delete_bang(argc, argv, str)
2285 int argc;
2286 VALUE *argv;
2287 VALUE str;
2288 {
2289 char *s, *send, *t;
2290 char squeez[256];
2291 int modify = 0;
2292 int init = 1;
2293 int i;
2294
2295 if (argc < 1) {
2296 rb_raise(rb_eArgError, "wrong number of arguments");
2297 }
2298 for (i=0; i<argc; i++) {
2299 VALUE s = argv[i];
2300
2301 StringValue(s);
2302 tr_setup_table(s, squeez, init);
2303 init = 0;
2304 }
2305
2306 rb_str_modify(str);
2307 s = t = RSTRING(str)->ptr;
2308 if (!s || RSTRING(str)->len == 0) return Qnil;
2309 send = s + RSTRING(str)->len;
2310 while (s < send) {
2311 if (squeez[*s & 0xff])
2312 modify = 1;
2313 else
2314 *t++ = *s;
2315 s++;
2316 }
2317 *t = '\0';
2318 RSTRING(str)->len = t - RSTRING(str)->ptr;
2319
2320 if (modify) return str;
2321 return Qnil;
2322 }
2323
2324 static VALUE
2325 rb_str_delete(argc, argv, str)
2326 int argc;
2327 VALUE *argv;
2328 VALUE str;
2329 {
2330 str = rb_str_dup(str);
2331 rb_str_delete_bang(argc, argv, str);
2332 return str;
2333 }
2334
2335 static VALUE
2336 rb_str_squeeze_bang(argc, argv, str)
2337 int argc;
2338 VALUE *argv;
2339 VALUE str;
2340 {
2341 char squeez[256];
2342 char *s, *send, *t;
2343 int c, save, modify = 0;
2344 int init = 1;
2345 int i;
2346
2347 if (argc == 0) {
2348 for (i=0; i<256; i++) {
2349 squeez[i] = 1;
2350 }
2351 }
2352 else {
2353 for (i=0; i<argc; i++) {
2354 VALUE s = argv[i];
2355
2356 StringValue(s);
2357 tr_setup_table(s, squeez, init);
2358 init = 0;
2359 }
2360 }
2361
2362 rb_str_modify(str);
2363 s = t = RSTRING(str)->ptr;
2364 if (!s || RSTRING(str)->len == 0) return Qnil;
2365 send = s + RSTRING(str)->len;
2366 save = -1;
2367 while (s < send) {
2368 c = *s++ & 0xff;
2369 if (c != save || !squeez[c]) {
2370 *t++ = save = c;
2371 }
2372 }
2373 *t = '\0';
2374 if (t - RSTRING(str)->ptr != RSTRING(str)->len) {
2375 RSTRING(str)->len = t - RSTRING(str)->ptr;
2376 modify = 1;
2377 }
2378
2379 if (modify) return str;
2380 return Qnil;
2381 }
2382
2383 static VALUE
2384 rb_str_squeeze(argc, argv, str)
2385 int argc;
2386 VALUE *argv;
2387 VALUE str;
2388 {
2389 str = rb_str_dup(str);
2390 rb_str_squeeze_bang(argc, argv, str);
2391 return str;
2392 }
2393
2394 static VALUE
2395 rb_str_tr_s_bang(str, src, repl)
2396 VALUE str, src, repl;
2397 {
2398 return tr_trans(str, src, repl, 1);
2399 }
2400
2401 static VALUE
2402 rb_str_tr_s(str, src, repl)
2403 VALUE str, src, repl;
2404 {
2405 str = rb_str_dup(str);
2406 tr_trans(str, src, repl, 1);
2407 return str;
2408 }
2409
2410 static VALUE
2411 rb_str_count(argc, argv, str)
2412 int argc;
2413 VALUE *argv;
2414 VALUE str;
2415 {
2416 char table[256];
2417 char *s, *send;
2418 int init = 1;
2419 int i;
2420
2421 if (argc < 1) {
2422 rb_raise(rb_eArgError, "wrong number of arguments");
2423 }
2424 for (i=0; i<argc; i++) {
2425 VALUE s = argv[i];
2426
2427 StringValue(s);
2428 tr_setup_table(s, table, init);
2429 init = 0;
2430 }
2431
2432 s = RSTRING(str)->ptr;
2433 if (!s || RSTRING(str)->len == 0) return Qnil;
2434 send = s + RSTRING(str)->len;
2435 i = 0;
2436 while (s < send) {
2437 if (table[*s++ & 0xff]) {
2438 i++;
2439 }
2440 }
2441 return INT2NUM(i);
2442 }
2443
2444 static VALUE
2445 rb_str_split_m(argc, argv, str)
2446 int argc;
2447 VALUE *argv;
2448 VALUE str;
2449 {
2450 VALUE spat;
2451 VALUE limit;
2452 int awk_split = Qfalse;
2453 long beg, end, i = 0;
2454 int lim = 0;
2455 VALUE result, tmp;
2456
2457 if (rb_scan_args(argc, argv, "02", &spat, &limit) == 2) {
2458 lim = NUM2INT(limit);
2459 if (lim <= 0) limit = Qnil;
2460 else if (lim == 1) return rb_ary_new3(1, str);
2461 i = 1;
2462 }
2463
2464 if (NIL_P(spat)) {
2465 if (!NIL_P(rb_fs)) {
2466 spat = rb_fs;
2467 goto fs_set;
2468 }
2469 awk_split = Qtrue;
2470 }
2471 else {
2472 fs_set:
2473 if (TYPE(spat) == T_STRING && RSTRING(spat)->len == 1) {
2474 if (RSTRING(spat)->ptr[0] == ' ') {
2475 awk_split = Qtrue;
2476 }
2477 else {
2478 spat = rb_reg_regcomp(rb_reg_quote(spat));
2479 }
2480 }
2481 else {
2482 spat = get_pat(spat, 1);
2483 }
2484 }
2485
2486 result = rb_ary_new();
2487 beg = 0;
2488 if (awk_split) {
2489 char *ptr = RSTRING(str)->ptr;
2490 long len = RSTRING(str)->len;
2491 char *eptr = ptr + len;
2492 int skip = 1;
2493
2494 for (end = beg = 0; ptr<eptr; ptr++) {
2495 if (skip) {
2496 if (ISSPACE(*ptr)) {
2497 beg++;
2498 }
2499 else {
2500 end = beg+1;
2501 skip = 0;
2502 }
2503 }
2504 else {
2505 if (ISSPACE(*ptr)) {
2506 rb_ary_push(result, rb_str_substr(str, beg, end-beg));
2507 skip = 1;
2508 beg = end + 1;
2509 if (!NIL_P(limit) && lim <= ++i) break;
2510 }
2511 else {
2512 end++;
2513 }
2514 }
2515 }
2516 }
2517 else {
2518 long start = beg;
2519 long idx;
2520 int last_null = 0;
2521 struct re_registers *regs;
2522
2523 while ((end = rb_reg_search(spat, str, start, 0)) >= 0) {
2524 regs = RMATCH(rb_backref_get())->regs;
2525 if (start == end && BEG(0) == END(0)) {
2526 if (last_null == 1) {
2527 rb_ary_push(result, rb_str_substr(str, beg, mbclen2(RSTRING(str)->ptr[beg],spat)));
2528 beg = start;
2529 }
2530 else {
2531 start += mbclen2(RSTRING(str)->ptr[start],spat);
2532 last_null = 1;
2533 continue;
2534 }
2535 }
2536 else {
2537 rb_ary_push(result, rb_str_substr(str, beg, end-beg));
2538 beg = start = END(0);
2539 }
2540 last_null = 0;
2541
2542 for (idx=1; idx < regs->num_regs; idx++) {
2543 if (BEG(idx) == -1) continue;
2544 if (BEG(idx) == END(idx))
2545 tmp = rb_str_new5(str, 0, 0);
2546 else
2547 tmp = rb_str_substr(str, BEG(idx), END(idx)-BEG(idx));
2548 rb_ary_push(result, tmp);
2549 }
2550 if (!NIL_P(limit) && lim <= ++i) break;
2551 }
2552 }
2553 if (!NIL_P(limit) || RSTRING(str)->len > beg || lim < 0) {
2554 if (RSTRING(str)->len == beg)
2555 tmp = rb_str_new5(str, 0, 0);
2556 else
2557 tmp = rb_str_substr(str, beg, RSTRING(str)->len-beg);
2558 rb_ary_push(result, tmp);
2559 }
2560 if (NIL_P(limit) && lim == 0) {
2561 while (RARRAY(result)->len > 0 &&
2562 RSTRING(RARRAY(result)->ptr[RARRAY(result)->len-1])->len == 0)
2563 rb_ary_pop(result);
2564 }
2565
2566 return result;
2567 }
2568
2569 VALUE
2570 rb_str_split(str, sep0)
2571 VALUE str;
2572 const char *sep0;
2573 {
2574 VALUE sep;
2575
2576 StringValue(str);
2577 sep = rb_str_new2(sep0);
2578 return rb_str_split_m(1, &sep, str);
2579 }
2580
2581 static VALUE
2582 rb_f_split(argc, argv)
2583 int argc;
2584 VALUE *argv;
2585 {
2586 return rb_str_split_m(argc, argv, uscore_get());
2587 }
2588
2589 static VALUE
2590 rb_str_each_line(argc, argv, str)
2591 int argc;
2592 VALUE *argv;
2593 VALUE str;
2594 {
2595 VALUE rs;
2596 int newline;
2597 char *p = RSTRING(str)->ptr, *pend = p + RSTRING(str)->len, *s;
2598 char *ptr = p;
2599 long len = RSTRING(str)->len, rslen;
2600 VALUE line;
2601
2602 if (rb_scan_args(argc, argv, "01", &rs) == 0) {
2603 rs = rb_rs;
2604 }
2605
2606 if (NIL_P(rs)) {
2607 rb_yield(str);
2608 return str;
2609 }
2610 StringValue(rs);
2611 rslen = RSTRING(rs)->len;
2612 if (rslen == 0) {
2613 newline = '\n';
2614 }
2615 else {
2616 newline = RSTRING(rs)->ptr[rslen-1];
2617 }
2618
2619 for (s = p, p += rslen; p < pend; p++) {
2620 if (rslen == 0 && *p == '\n') {
2621 if (*++p != '\n') continue;
2622 while (*p == '\n') p++;
2623 }
2624 if (p[-1] == newline &&
2625 (rslen <= 1 ||
2626 rb_memcmp(RSTRING(rs)->ptr, p-rslen, rslen) == 0)) {
2627 line = rb_str_new5(str, s, p - s);
2628 OBJ_INFECT(line, str);
2629 rb_yield(line);
2630 if (RSTRING(str)->ptr != ptr || RSTRING(str)->len != len)
2631 rb_raise(rb_eArgError, "string modified");
2632 s = p;
2633 }
2634 }
2635
2636 if (s != pend) {
2637 if (p > pend) p = pend;
2638 line = rb_str_new5(str, s, p - s);
2639 OBJ_INFECT(line, str);
2640 rb_yield(line);
2641 }
2642
2643 return str;
2644 }
2645
2646 static VALUE
2647 rb_str_each_byte(str)
2648 VALUE str;
2649 {
2650 long i;
2651
2652 for (i=0; i<RSTRING(str)->len; i++) {
2653 rb_yield(INT2FIX(RSTRING(str)->ptr[i] & 0xff));
2654 }
2655 return str;
2656 }
2657
2658 static VALUE
2659 rb_str_chop_bang(str)
2660 VALUE str;
2661 {
2662 if (RSTRING(str)->len > 0) {
2663 rb_str_modify(str);
2664 RSTRING(str)->len--;
2665 if (RSTRING(str)->ptr[RSTRING(str)->len] == '\n') {
2666 if (RSTRING(str)->len > 0 &&
2667 RSTRING(str)->ptr[RSTRING(str)->len-1] == '\r') {
2668 RSTRING(str)->len--;
2669 }
2670 }
2671 RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
2672 return str;
2673 }
2674 return Qnil;
2675 }
2676
2677 static VALUE
2678 rb_str_chop(str)
2679 VALUE str;
2680 {
2681 str = rb_str_dup(str);
2682 rb_str_chop_bang(str);
2683 return str;
2684 }
2685
2686 static VALUE
2687 rb_f_chop_bang(str)
2688 VALUE str;
2689 {
2690 return rb_str_chop_bang(uscore_get());
2691 }
2692
2693 static VALUE
2694 rb_f_chop()
2695 {
2696 VALUE str = uscore_get();
2697
2698 if (RSTRING(str)->len > 0) {
2699 str = rb_str_dup(str);
2700 rb_str_chop_bang(str);
2701 rb_lastline_set(str);
2702 }
2703 return str;
2704 }
2705
2706 static VALUE
2707 rb_str_chomp_bang(argc, argv, str)
2708 int argc;
2709 VALUE *argv;
2710 VALUE str;
2711 {
2712 VALUE rs;
2713 int newline;
2714 char *p = RSTRING(str)->ptr;
2715 long len = RSTRING(str)->len, rslen;
2716
2717 if (rb_scan_args(argc, argv, "01", &rs) == 0) {
2718 if (len == 0) return Qnil;
2719 rs = rb_rs;
2720 if (rs == rb_default_rs) {
2721 smart_chomp:
2722 rb_str_modify(str);
2723 if (RSTRING(str)->ptr[len-1] == '\n') {
2724 RSTRING(str)->len--;
2725 if (RSTRING(str)->len > 0 &&
2726 RSTRING(str)->ptr[RSTRING(str)->len-1] == '\r') {
2727 RSTRING(str)->len--;
2728 }
2729 }
2730 else if (RSTRING(str)->ptr[len-1] == '\r') {
2731 RSTRING(str)->len--;
2732 }
2733 else {
2734 return Qnil;
2735 }
2736 RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
2737 return str;
2738 }
2739 }
2740 if (NIL_P(rs)) return Qnil;
2741 if (len == 0) return Qnil;
2742
2743 StringValue(rs);
2744 rb_str_modify(str);
2745 rslen = RSTRING(rs)->len;
2746 if (rslen == 0) {
2747 while (len>0 && p[len-1] == '\n') {
2748 len--;
2749 if (len>0 && p[len-1] == '\r')
2750 len--;
2751 }
2752 if (len < RSTRING(str)->len) {
2753 rb_str_modify(str);
2754 RSTRING(str)->len = len;
2755 RSTRING(str)->ptr[len] = '\0';
2756 return str;
2757 }
2758 return Qnil;
2759 }
2760 if (rslen > len) return Qnil;
2761 newline = RSTRING(rs)->ptr[rslen-1];
2762 if (rslen == 1 && newline == '\n')
2763 goto smart_chomp;
2764
2765 if (p[len-1] == newline &&
2766 (rslen <= 1 ||
2767 rb_memcmp(RSTRING(rs)->ptr, p+len-rslen, rslen) == 0)) {
2768 rb_str_modify(str);
2769 RSTRING(str)->len -= rslen;
2770 RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
2771 return str;
2772 }
2773 return Qnil;
2774 }
2775
2776 static VALUE
2777 rb_str_chomp(argc, argv, str)
2778 int argc;
2779 VALUE *argv;
2780 VALUE str;
2781 {
2782 str = rb_str_dup(str);
2783 rb_str_chomp_bang(argc, argv, str);
2784 return str;
2785 }
2786
2787 static VALUE
2788 rb_f_chomp_bang(argc, argv)
2789 int argc;
2790 VALUE *argv;
2791 {
2792 return rb_str_chomp_bang(argc, argv, uscore_get());
2793 }
2794
2795 static VALUE
2796 rb_f_chomp(argc, argv)
2797 int argc;
2798 VALUE *argv;
2799 {
2800 VALUE str = uscore_get();
2801 VALUE dup = rb_str_dup(str);
2802
2803 if (NIL_P(rb_str_chomp_bang(argc, argv, dup)))
2804 return str;
2805 rb_lastline_set(dup);
2806 return dup;
2807 }
2808
2809 static VALUE
2810 rb_str_lstrip_bang(str)
2811 VALUE str;
2812 {
2813 char *s, *t, *e;
2814
2815 rb_str_modify(str);
2816 s = RSTRING(str)->ptr;
2817 if (!s || RSTRING(str)->len == 0) return Qnil;
2818 e = t = s + RSTRING(str)->len;
2819
2820 while (s < t && ISSPACE(*s)) s++;
2821
2822 RSTRING(str)->len = t-s;
2823 if (s > RSTRING(str)->ptr) {
2824 memmove(RSTRING(str)->ptr, s, RSTRING(str)->len);
2825 RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
2826 return str;
2827 }
2828 return Qnil;
2829 }
2830
2831 static VALUE
2832 rb_str_lstrip(str)
2833 VALUE str;
2834 {
2835 str = rb_str_dup(str);
2836 rb_str_lstrip_bang(str);
2837 return str;
2838 }
2839
2840 static VALUE
2841 rb_str_rstrip_bang(str)
2842 VALUE str;
2843 {
2844 char *s, *t, *e;
2845
2846 rb_str_modify(str);
2847 s = RSTRING(str)->ptr;
2848 if (!s || RSTRING(str)->len == 0) return Qnil;
2849 e = t = s + RSTRING(str)->len;
2850
2851
2852 while (s < t && ISSPACE(*(t-1))) t--;
2853
2854 RSTRING(str)->len = t-s;
2855 if (t < e) {
2856 RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
2857 return str;
2858 }
2859 return Qnil;
2860 }
2861
2862 static VALUE
2863 rb_str_rstrip(str)
2864 VALUE str;
2865 {
2866 str = rb_str_dup(str);
2867 rb_str_rstrip_bang(str);
2868 return str;
2869 }
2870
2871 static VALUE
2872 rb_str_strip_bang(str)
2873 VALUE str;
2874 {
2875 VALUE l = rb_str_lstrip_bang(str);
2876 VALUE r = rb_str_rstrip_bang(str);
2877
2878 if (NIL_P(l) && NIL_P(r)) return Qnil;
2879 return str;
2880 }
2881
2882 static VALUE
2883 rb_str_strip(str)
2884 VALUE str;
2885 {
2886 str = rb_str_dup(str);
2887 rb_str_strip_bang(str);
2888 return str;
2889 }
2890
2891 static VALUE
2892 scan_once(str, pat, start)
2893 VALUE str, pat;
2894 long *start;
2895 {
2896 VALUE result, match;
2897 struct re_registers *regs;
2898 long i;
2899
2900 if (rb_reg_search(pat, str, *start, 0) >= 0) {
2901 match = rb_backref_get();
2902 regs = RMATCH(match)->regs;
2903 if (BEG(0) == END(0)) {
2904
2905
2906
2907 *start = END(0)+mbclen2(RSTRING(str)->ptr[END(0)],pat);
2908 }
2909 else {
2910 *start = END(0);
2911 }
2912 if (regs->num_regs == 1) {
2913 return rb_reg_nth_match(0, match);
2914 }
2915 result = rb_ary_new2(regs->num_regs);
2916 for (i=1; i < regs->num_regs; i++) {
2917 rb_ary_push(result, rb_reg_nth_match(i, match));
2918 }
2919
2920 return result;
2921 }
2922 return Qnil;
2923 }
2924
2925 static VALUE
2926 rb_str_scan(str, pat)
2927 VALUE str, pat;
2928 {
2929 VALUE result;
2930 long start = 0;
2931 VALUE match = Qnil;
2932
2933 pat = get_pat(pat, 1);
2934 if (!rb_block_given_p()) {
2935 VALUE ary = rb_ary_new();
2936
2937 while (!NIL_P(result = scan_once(str, pat, &start))) {
2938 match = rb_backref_get();
2939 rb_ary_push(ary, result);
2940 }
2941 rb_backref_set(match);
2942 return ary;
2943 }
2944
2945 while (!NIL_P(result = scan_once(str, pat, &start))) {
2946 match = rb_backref_get();
2947 rb_match_busy(match);
2948 rb_yield(result);
2949 rb_backref_set(match);
2950 }
2951 rb_backref_set(match);
2952 return str;
2953 }
2954
2955 static VALUE
2956 rb_f_scan(self, pat)
2957 VALUE self, pat;
2958 {
2959 return rb_str_scan(uscore_get(), pat);
2960 }
2961
2962 static VALUE
2963 rb_str_hex(str)
2964 VALUE str;
2965 {
2966 return rb_str_to_inum(str, 16, Qfalse);
2967 }
2968
2969 static VALUE
2970 rb_str_oct(str)
2971 VALUE str;
2972 {
2973 return rb_str_to_inum(str, -8, Qfalse);
2974 }
2975
2976 static VALUE
2977 rb_str_crypt(str, salt)
2978 VALUE str, salt;
2979 {
2980 extern char *crypt();
2981 VALUE result;
2982 char *s;
2983
2984 StringValue(salt);
2985 if (RSTRING(salt)->len < 2)
2986 rb_raise(rb_eArgError, "salt too short(need >=2 bytes)");
2987
2988 if (RSTRING(str)->ptr) s = RSTRING(str)->ptr;
2989 else s = "";
2990 result = rb_str_new2(crypt(s, RSTRING(salt)->ptr));
2991 OBJ_INFECT(result, str);
2992 OBJ_INFECT(result, salt);
2993 return result;
2994 }
2995
2996 static VALUE
2997 rb_str_intern(str)
2998 VALUE str;
2999 {
3000 ID id;
3001
3002 if (!RSTRING(str)->ptr || RSTRING(str)->len == 0) {
3003 rb_raise(rb_eArgError, "interning empty string");
3004 }
3005 if (strlen(RSTRING(str)->ptr) != RSTRING(str)->len)
3006 rb_raise(rb_eArgError, "string contains `\\0'");
3007 id = rb_intern(RSTRING(str)->ptr);
3008 return ID2SYM(id);
3009 }
3010
3011 static VALUE
3012 rb_str_sum(argc, argv, str)
3013 int argc;
3014 VALUE *argv;
3015 VALUE str;
3016 {
3017 VALUE vbits;
3018 int bits;
3019 char *p, *pend;
3020
3021 if (rb_scan_args(argc, argv, "01", &vbits) == 0) {
3022 bits = 16;
3023 }
3024 else bits = NUM2INT(vbits);
3025
3026 p = RSTRING(str)->ptr; pend = p + RSTRING(str)->len;
3027 if (bits > sizeof(long)*CHAR_BIT) {
3028 VALUE res = INT2FIX(0);
3029 VALUE mod;
3030
3031 mod = rb_funcall(INT2FIX(1), rb_intern("<<"), 1, INT2FIX(bits));
3032 mod = rb_funcall(mod, '-', 1, INT2FIX(1));
3033
3034 while (p < pend) {
3035 res = rb_funcall(res, '+', 1, INT2FIX((unsigned int)*p));
3036 p++;
3037 }
3038 res = rb_funcall(res, '&', 1, mod);
3039 return res;
3040 }
3041 else {
3042 unsigned int res = 0;
3043 unsigned int mod = (1<<bits)-1;
3044
3045 if (mod == 0) {
3046 mod = -1;
3047 }
3048 while (p < pend) {
3049 res += (unsigned int)*p;
3050 p++;
3051 }
3052 res &= mod;
3053 return rb_int2inum(res);
3054 }
3055 }
3056
3057 static VALUE
3058 rb_str_ljust(str, w)
3059 VALUE str;
3060 VALUE w;
3061 {
3062 long width = NUM2LONG(w);
3063 VALUE res;
3064 char *p, *pend;
3065
3066 if (width < 0 || RSTRING(str)->len >= width) return rb_str_dup(str);
3067 res = rb_str_new5(str, 0, width);
3068 memcpy(RSTRING(res)->ptr, RSTRING(str)->ptr, RSTRING(str)->len);
3069 p = RSTRING(res)->ptr + RSTRING(str)->len; pend = RSTRING(res)->ptr + width;
3070 while (p < pend) {
3071 *p++ = ' ';
3072 }
3073 OBJ_INFECT(res, str);
3074 return res;
3075 }
3076
3077 static VALUE
3078 rb_str_rjust(str, w)
3079 VALUE str;
3080 VALUE w;
3081 {
3082 long width = NUM2LONG(w);
3083 VALUE res;
3084 char *p, *pend;
3085
3086 if (width < 0 || RSTRING(str)->len >= width) return rb_str_dup(str);
3087 res = rb_str_new5(str, 0, width);
3088 p = RSTRING(res)->ptr; pend = p + width - RSTRING(str)->len;
3089 while (p < pend) {
3090 *p++ = ' ';
3091 }
3092 memcpy(pend, RSTRING(str)->ptr, RSTRING(str)->len);
3093 OBJ_INFECT(res, str);
3094 return res;
3095 }
3096
3097 static VALUE
3098 rb_str_center(str, w)
3099 VALUE str;
3100 VALUE w;
3101 {
3102 long width = NUM2LONG(w);
3103 VALUE res;
3104 char *p, *pend;
3105 long n;
3106
3107 if (width < 0 || RSTRING(str)->len >= width) return rb_str_dup(str);
3108 res = rb_str_new5(str, 0, width);
3109 n = (width - RSTRING(str)->len)/2;
3110 p = RSTRING(res)->ptr; pend = p + n;
3111 while (p < pend) {
3112 *p++ = ' ';
3113 }
3114 memcpy(pend, RSTRING(str)->ptr, RSTRING(str)->len);
3115 p = pend + RSTRING(str)->len; pend = RSTRING(res)->ptr + width;
3116 while (p < pend) {
3117 *p++ = ' ';
3118 }
3119 OBJ_INFECT(res, str);
3120 return res;
3121 }
3122
3123 void
3124 rb_str_setter(val, id, var)
3125 VALUE val;
3126 ID id;
3127 VALUE *var;
3128 {
3129 if (!NIL_P(val) && TYPE(val) != T_STRING) {
3130 rb_raise(rb_eTypeError, "value of %s must be String", rb_id2name(id));
3131 }
3132 *var = val;
3133 }
3134
3135 void
3136 Init_String()
3137 {
3138 rb_cString = rb_define_class("String", rb_cObject);
3139 rb_include_module(rb_cString, rb_mComparable);
3140 rb_include_module(rb_cString, rb_mEnumerable);
3141 rb_define_singleton_method(rb_cString, "allocate", rb_str_s_alloc, 0);
3142 rb_define_method(rb_cString, "initialize", rb_str_init, -1);
3143 rb_define_method(rb_cString, "become", rb_str_replace, 1);
3144 rb_define_method(rb_cString, "<=>", rb_str_cmp_m, 1);
3145 rb_define_method(rb_cString, "==", rb_str_equal, 1);
3146 rb_define_method(rb_cString, "===", rb_str_equal, 1);
3147 rb_define_method(rb_cString, "eql?", rb_str_eql, 1);
3148 rb_define_method(rb_cString, "hash", rb_str_hash_m, 0);
3149 rb_define_method(rb_cString, "casecmp", rb_str_casecmp, 1);
3150 rb_define_method(rb_cString, "+", rb_str_plus, 1);
3151 rb_define_method(rb_cString, "*", rb_str_times, 1);
3152 rb_define_method(rb_cString, "%", rb_str_format, 1);
3153 rb_define_method(rb_cString, "[]", rb_str_aref_m, -1);
3154 rb_define_method(rb_cString, "[]=", rb_str_aset_m, -1);
3155 rb_define_method(rb_cString, "insert", rb_str_insert, 2);
3156 rb_define_method(rb_cString, "length", rb_str_length, 0);
3157 rb_define_method(rb_cString, "size", rb_str_length, 0);
3158 rb_define_method(rb_cString, "empty?", rb_str_empty, 0);
3159 rb_define_method(rb_cString, "=~", rb_str_match, 1);
3160 rb_define_method(rb_cString, "~", rb_str_match2, 0);
3161 rb_define_method(rb_cString, "match", rb_str_match_m, 1);
3162 rb_define_method(rb_cString, "succ", rb_str_succ, 0);
3163 rb_define_method(rb_cString, "succ!", rb_str_succ_bang, 0);
3164 rb_define_method(rb_cString, "next", rb_str_succ, 0);
3165 rb_define_method(rb_cString, "next!", rb_str_succ_bang, 0);
3166 rb_define_method(rb_cString, "upto", rb_str_upto_m, 1);
3167 rb_define_method(rb_cString, "index", rb_str_index_m, -1);
3168 rb_define_method(rb_cString, "rindex", rb_str_rindex_m, -1);
3169 rb_define_method(rb_cString, "replace", rb_str_replace, 1);
3170
3171 rb_define_method(rb_cString, "to_i", rb_str_to_i, -1);
3172 rb_define_method(rb_cString, "to_f", rb_str_to_f, 0);
3173 rb_define_method(rb_cString, "to_s", rb_str_to_s, 0);
3174 rb_define_method(rb_cString, "to_str", rb_str_to_s, 0);
3175 rb_define_method(rb_cString, "inspect", rb_str_inspect, 0);
3176 rb_define_method(rb_cString, "dump", rb_str_dump, 0);
3177
3178 rb_define_method(rb_cString, "upcase", rb_str_upcase, 0);
3179 rb_define_method(rb_cString, "downcase", rb_str_downcase, 0);
3180 rb_define_method(rb_cString, "capitalize", rb_str_capitalize, 0);
3181 rb_define_method(rb_cString, "swapcase", rb_str_swapcase, 0);
3182
3183 rb_define_method(rb_cString, "upcase!", rb_str_upcase_bang, 0);
3184 rb_define_method(rb_cString, "downcase!", rb_str_downcase_bang, 0);
3185 rb_define_method(rb_cString, "capitalize!", rb_str_capitalize_bang, 0);
3186 rb_define_method(rb_cString, "swapcase!", rb_str_swapcase_bang, 0);
3187
3188 rb_define_method(rb_cString, "hex", rb_str_hex, 0);
3189 rb_define_method(rb_cString, "oct", rb_str_oct, 0);
3190 rb_define_method(rb_cString, "split", rb_str_split_m, -1);
3191 rb_define_method(rb_cString, "reverse", rb_str_reverse, 0);
3192 rb_define_method(rb_cString, "reverse!", rb_str_reverse_bang, 0);
3193 rb_define_method(rb_cString, "concat", rb_str_concat, 1);
3194 rb_define_method(rb_cString, "<<", rb_str_concat, 1);
3195 rb_define_method(rb_cString, "crypt", rb_str_crypt, 1);
3196 rb_define_method(rb_cString, "intern", rb_str_intern, 0);
3197
3198 rb_define_method(rb_cString, "include?", rb_str_include, 1);
3199
3200 rb_define_method(rb_cString, "scan", rb_str_scan, 1);
3201
3202 rb_define_method(rb_cString, "ljust", rb_str_ljust, 1);
3203 rb_define_method(rb_cString, "rjust", rb_str_rjust, 1);
3204 rb_define_method(rb_cString, "center", rb_str_center, 1);
3205
3206 rb_define_method(rb_cString, "sub", rb_str_sub, -1);
3207 rb_define_method(rb_cString, "gsub", rb_str_gsub, -1);
3208 rb_define_method(rb_cString, "chop", rb_str_chop, 0);
3209 rb_define_method(rb_cString, "chomp", rb_str_chomp, -1);
3210 rb_define_method(rb_cString, "strip", rb_str_strip, 0);
3211 rb_define_method(rb_cString, "lstrip", rb_str_lstrip, 0);
3212 rb_define_method(rb_cString, "rstrip", rb_str_rstrip, 0);
3213
3214 rb_define_method(rb_cString, "sub!", rb_str_sub_bang, -1);
3215 rb_define_method(rb_cString, "gsub!", rb_str_gsub_bang, -1);
3216 rb_define_method(rb_cString, "chop!", rb_str_chop_bang, 0);
3217 rb_define_method(rb_cString, "chomp!", rb_str_chomp_bang, -1);
3218 rb_define_method(rb_cString, "strip!", rb_str_strip_bang, 0);
3219 rb_define_method(rb_cString, "lstrip!", rb_str_lstrip_bang, 0);
3220 rb_define_method(rb_cString, "rstrip!", rb_str_rstrip_bang, 0);
3221
3222 rb_define_method(rb_cString, "tr", rb_str_tr, 2);
3223 rb_define_method(rb_cString, "tr_s", rb_str_tr_s, 2);
3224 rb_define_method(rb_cString, "delete", rb_str_delete, -1);
3225 rb_define_method(rb_cString, "squeeze", rb_str_squeeze, -1);
3226 rb_define_method(rb_cString, "count", rb_str_count, -1);
3227
3228 rb_define_method(rb_cString, "tr!", rb_str_tr_bang, 2);
3229 rb_define_method(rb_cString, "tr_s!", rb_str_tr_s_bang, 2);
3230 rb_define_method(rb_cString, "delete!", rb_str_delete_bang, -1);
3231 rb_define_method(rb_cString, "squeeze!", rb_str_squeeze_bang, -1);
3232
3233 rb_define_method(rb_cString, "each_line", rb_str_each_line, -1);
3234 rb_define_method(rb_cString, "each", rb_str_each_line, -1);
3235 rb_define_method(rb_cString, "each_byte", rb_str_each_byte, 0);
3236
3237 rb_define_method(rb_cString, "sum", rb_str_sum, -1);
3238
3239 rb_define_global_function("sub", rb_f_sub, -1);
3240 rb_define_global_function("gsub", rb_f_gsub, -1);
3241
3242 rb_define_global_function("sub!", rb_f_sub_bang, -1);
3243 rb_define_global_function("gsub!", rb_f_gsub_bang, -1);
3244
3245 rb_define_global_function("chop", rb_f_chop, 0);
3246 rb_define_global_function("chop!", rb_f_chop_bang, 0);
3247
3248 rb_define_global_function("chomp", rb_f_chomp, -1);
3249 rb_define_global_function("chomp!", rb_f_chomp_bang, -1);
3250
3251 rb_define_global_function("split", rb_f_split, -1);
3252 rb_define_global_function("scan", rb_f_scan, 1);
3253
3254 rb_define_method(rb_cString, "slice", rb_str_aref_m, -1);
3255 rb_define_method(rb_cString, "slice!", rb_str_slice_bang, -1);
3256
3257 id_to_s = rb_intern("to_s");
3258
3259 rb_fs = Qnil;
3260 rb_define_hooked_variable("$;", &rb_fs, 0, rb_str_setter);
3261 rb_define_hooked_variable("$-F", &rb_fs, 0, rb_str_setter);
3262 }