lib/jcode.rb
DEFINITIONS
This source file includes following functions.
1 # jcode.rb - ruby code to handle japanese (EUC/SJIS) string
2
3 if $VERBOSE && $KCODE == "NONE"
4 STDERR.puts "Warning: $KCODE is NONE."
5 end
6
7 $vsave, $VERBOSE = $VERBOSE, false
8 class String
9 printf STDERR, "feel free for some warnings:\n" if $VERBOSE
10
11 def _regex_quote(str)
12 str.gsub(/(\\[][\-\\])|\\(.)|([][\\])/) do
13 $1 || $2 || '\\' + $3
14 end
15 end
16 private :_regex_quote
17
18 PATTERN_SJIS = '[\x81-\x9f\xe0-\xef][\x40-\x7e\x80-\xfc]'
19 PATTERN_EUC = '[\xa1-\xfe][\xa1-\xfe]'
20 PATTERN_UTF8 = '[\xc0-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf][\x80-\xbf]'
21
22 RE_SJIS = Regexp.new(PATTERN_SJIS, 'n')
23 RE_EUC = Regexp.new(PATTERN_EUC, 'n')
24 RE_UTF8 = Regexp.new(PATTERN_UTF8, 'n')
25
26 SUCC = {}
27 SUCC['s'] = Hash.new(1)
28 for i in 0 .. 0x3f
29 SUCC['s'][i.chr] = 0x40 - i
30 end
31 SUCC['s']["\x7e"] = 0x80 - 0x7e
32 SUCC['s']["\xfd"] = 0x100 - 0xfd
33 SUCC['s']["\xfe"] = 0x100 - 0xfe
34 SUCC['s']["\xff"] = 0x100 - 0xff
35 SUCC['e'] = Hash.new(1)
36 for i in 0 .. 0xa0
37 SUCC['e'][i.chr] = 0xa1 - i
38 end
39 SUCC['e']["\xfe"] = 2
40 SUCC['u'] = Hash.new(1)
41 for i in 0 .. 0x7f
42 SUCC['u'][i.chr] = 0x80 - i
43 end
44 SUCC['u']["\xbf"] = 0x100 - 0xbf
45
46 def mbchar?
47 case $KCODE[0]
48 when ?s, ?S
49 self =~ RE_SJIS
50 when ?e, ?E
51 self =~ RE_EUC
52 when ?u, ?U
53 self =~ RE_UTF8
54 else
55 nil
56 end
57 end
58
59 def end_regexp
60 case $KCODE[0]
61 when ?s, ?S
62 /#{PATTERN_SJIS}$/o
63 when ?e, ?E
64 /#{PATTERN_EUC}$/o
65 when ?u, ?U
66 /#{PATTERN_UTF8}$/o
67 else
68 /.$/o
69 end
70 end
71
72 alias original_succ! succ!
73 private :original_succ!
74
75 alias original_succ succ
76 private :original_succ
77
78 def succ!
79 reg = end_regexp
80 if self =~ reg
81 succ_table = SUCC[$KCODE[0,1].downcase]
82 begin
83 self[-1] += succ_table[self[-1]]
84 self[-2] += 1 if self[-1] == 0
85 end while self !~ reg
86 self
87 else
88 original_succ!
89 end
90 end
91
92 def succ
93 (str = self.dup).succ! or str
94 end
95
96 private
97
98 def _expand_ch str
99 a = []
100 str.scan(/(?:\\(.)|([^\\]))-(?:\\(.)|([^\\]))|(?:\\(.)|(.))/m) do
101 from = $1 || $2
102 to = $3 || $4
103 one = $5 || $6
104 if one
105 a.push one
106 elsif from.length != to.length
107 next
108 elsif from.length == 1
109 from[0].upto(to[0]) { |c| a.push c.chr }
110 else
111 from.upto(to) { |c| a.push c }
112 end
113 end
114 a
115 end
116
117 def expand_ch_hash from, to
118 h = {}
119 afrom = _expand_ch(from)
120 ato = _expand_ch(to)
121 afrom.each_with_index do |x,i| h[x] = ato[i] || ato[-1] end
122 h
123 end
124
125 HashCache = {}
126 TrPatternCache = {}
127 DeletePatternCache = {}
128 SqueezePatternCache = {}
129
130 public
131
132 def tr!(from, to)
133 return self.delete!(from) if to.length == 0
134
135 pattern = TrPatternCache[from] ||= /[#{_regex_quote(from)}]/
136 if from[0] == ?^
137 last = /.$/.match(to)[0]
138 self.gsub!(pattern, last)
139 else
140 h = HashCache[from + "1-0" + to] ||= expand_ch_hash(from, to)
141 self.gsub!(pattern) do |c| h[c] end
142 end
143 end
144
145 def tr(from, to)
146 (str = self.dup).tr!(from, to) or str
147 end
148
149 def delete!(del)
150 self.gsub!(DeletePatternCache[del] ||= /[#{_regex_quote(del)}]+/, '')
151 end
152
153 def delete(del)
154 (str = self.dup).delete!(del) or str
155 end
156
157 def squeeze!(del=nil)
158 pattern =
159 if del
160 SqueezePatternCache[del] ||= /([#{_regex_quote(del)}])\1+/
161 else
162 /(.|\n)\1+/
163 end
164 self.gsub!(pattern, '\1')
165 end
166
167 def squeeze(del=nil)
168 (str = self.dup).squeeze!(del) or str
169 end
170
171 def tr_s!(from, to)
172 return self.delete!(from) if to.length == 0
173
174 pattern = SqueezePatternCache[from] ||= /([#{_regex_quote(from)}])\1+/
175 if from[0] == ?^
176 last = /.$/.match(to)[0]
177 self.gsub!(pattern, last)
178 else
179 h = HashCache[from + "1-0" + to] ||= expand_ch_hash(from, to)
180 self.gsub!(pattern) do h[$1] end
181 end
182 end
183
184 def tr_s(from, to)
185 (str = self.dup).tr_s!(from,to) or str
186 end
187
188 def chop!
189 self.gsub!(/(?:.|\r?\n)\z/, '')
190 end
191
192 def chop
193 (str = self.dup).chop! or str
194 end
195
196 def jlength
197 self.gsub(/[^\Wa-zA-Z_\d]/, ' ').length
198 end
199 alias jsize jlength
200
201 def jcount(str)
202 self.delete("^#{str}").jlength
203 end
204
205 def each_char
206 if block_given?
207 scan(/./m) do |x|
208 yield x
209 end
210 else
211 scan(/./m)
212 end
213 end
214
215 end
216 $VERBOSE = $vsave