DEFINITIONS
This source file includes following functions.
- rb_nkf_putchar
- rb_nkf_kconv
- rb_nkf_guess
- Init_nkf
1 #include "ruby.h"
2
3 #define _AUTO 0
4 #define _JIS 1
5 #define _EUC 2
6 #define _SJIS 3
7 #define _BINARY 4
8 #define _NOCONV 4
9 #define _UNKNOWN _AUTO
10
11 #undef getc
12 #undef ungetc
13 #define getc(f) (input_ctr<i_len?input[input_ctr++]:-1)
14 #define ungetc(c,f) input_ctr--
15
16 #undef putchar
17 #define putchar(c) rb_nkf_putchar(c)
18
19 #define INCSIZE 32
20 static int incsize;
21
22 static unsigned char *input, *output;
23 static int input_ctr, i_len;
24 static int output_ctr, o_len;
25
26 static VALUE dst;
27
28 static int
29 rb_nkf_putchar(c)
30 unsigned int c;
31 {
32 if (output_ctr >= o_len) {
33 o_len += incsize;
34 rb_str_cat(dst, 0, incsize);
35 output = RSTRING(dst)->ptr;
36 incsize *= 2;
37 }
38 output[output_ctr++] = c;
39
40 return c;
41 }
42
43 #define PERL_XS 1
44 #include "nkf1.7/nkf.c"
45
46 static VALUE
47 rb_nkf_kconv(obj, opt, src)
48 VALUE obj, opt, src;
49 {
50 char *opt_ptr, *opt_end;
51 volatile VALUE v;
52
53 reinit();
54 StringValue(opt);
55 opt_ptr = RSTRING(opt)->ptr;
56 opt_end = opt_ptr + RSTRING(opt)->len;
57 for (; opt_ptr < opt_end; opt_ptr++) {
58 if (*opt_ptr != '-') {
59 continue;
60 }
61 arguments(opt_ptr);
62 }
63
64 incsize = INCSIZE;
65
66 input_ctr = 0;
67 StringValue(src);
68 input = RSTRING(src)->ptr;
69 i_len = RSTRING(src)->len;
70 dst = rb_str_new(0, i_len*3 + 10);
71 v = dst;
72
73 output_ctr = 0;
74 output = RSTRING(dst)->ptr;
75 o_len = RSTRING(dst)->len;
76 *output = '\0';
77
78 if(iso8859_f && (oconv != j_oconv || !x0201_f )) {
79 iso8859_f = FALSE;
80 }
81
82 kanji_convert(NULL);
83 RSTRING(dst)->ptr[output_ctr] = '\0';
84 RSTRING(dst)->len = output_ctr;
85 OBJ_INFECT(dst, src);
86
87 return dst;
88 }
89
90
91
92
93
94
95
96 static VALUE
97 rb_nkf_guess(obj, src)
98 VALUE obj, src;
99 {
100 unsigned char *p;
101 unsigned char *pend;
102 int sequence_counter = 0;
103
104 StringValue(src);
105 p = RSTRING(src)->ptr;
106 pend = p + RSTRING(src)->len;
107 if (p == pend) return INT2FIX(_UNKNOWN);
108
109 #define INCR do {\
110 p++;\
111 if (p==pend) return INT2FIX(_UNKNOWN);\
112 sequence_counter++;\
113 if (sequence_counter % 2 == 1 && *p != 0xa4)\
114 sequence_counter = 0;\
115 if (6 <= sequence_counter) {\
116 sequence_counter = 0;\
117 return INT2FIX(_EUC);\
118 }\
119 } while (0)
120
121 if (*p == 0xa4)
122 sequence_counter = 1;
123
124 while (p<pend) {
125 if (*p == '\033') {
126 return INT2FIX(_JIS);
127 }
128 if (*p < '\006' || *p == 0x7f || *p == 0xff) {
129 return INT2FIX(_BINARY);
130 }
131 if (0x81 <= *p && *p <= 0x8d) {
132 return INT2FIX(_SJIS);
133 }
134 if (0x8f <= *p && *p <= 0x9f) {
135 return INT2FIX(_SJIS);
136 }
137 if (*p == 0x8e) {
138 INCR;
139 if ((0x40 <= *p && *p <= 0x7e) ||
140 (0x80 <= *p && *p <= 0xa0) ||
141 (0xe0 <= *p && *p <= 0xfc))
142 return INT2FIX(_SJIS);
143 }
144 else if (0xa1 <= *p && *p <= 0xdf) {
145 INCR;
146 if (0xf0 <= *p && *p <= 0xfe)
147 return INT2FIX(_EUC);
148 if (0xe0 <= *p && *p <= 0xef) {
149 while (p < pend && *p >= 0x40) {
150 if (*p >= 0x81) {
151 if (*p <= 0x8d || (0x8f <= *p && *p <= 0x9f)) {
152 return INT2FIX(_SJIS);
153 }
154 else if (0xfd <= *p && *p <= 0xfe) {
155 return INT2FIX(_EUC);
156 }
157 }
158 INCR;
159 }
160 }
161 else if (*p <= 0x9f) {
162 return INT2FIX(_SJIS);
163 }
164 }
165 else if (0xf0 <= *p && *p <= 0xfe) {
166 return INT2FIX(_EUC);
167 }
168 else if (0xe0 <= *p && *p <= 0xef) {
169 INCR;
170 if ((0x40 <= *p && *p <= 0x7e) ||
171 (0x80 <= *p && *p <= 0xa0)) {
172 return INT2FIX(_SJIS);
173 }
174 if (0xfd <= *p && *p <= 0xfe) {
175 return INT2FIX(_EUC);
176 }
177 }
178 INCR;
179 }
180 return INT2FIX(_UNKNOWN);
181 }
182
183 void
184 Init_nkf()
185 {
186 VALUE mKconv = rb_define_module("NKF");
187
188 rb_define_module_function(mKconv, "nkf", rb_nkf_kconv, 2);
189 rb_define_module_function(mKconv, "guess", rb_nkf_guess, 1);
190
191 rb_define_const(mKconv, "AUTO", INT2FIX(_AUTO));
192 rb_define_const(mKconv, "JIS", INT2FIX(_JIS));
193 rb_define_const(mKconv, "EUC", INT2FIX(_EUC));
194 rb_define_const(mKconv, "SJIS", INT2FIX(_SJIS));
195 rb_define_const(mKconv, "BINARY", INT2FIX(_BINARY));
196 rb_define_const(mKconv, "NOCONV", INT2FIX(_NOCONV));
197 rb_define_const(mKconv, "UNKNOWN", INT2FIX(_UNKNOWN));
198 }