lib/irb/ruby-lex.rb
DEFINITIONS
This source file includes following functions.
1 #
2 # irb/ruby-lex.rb - ruby lexcal analizer
3 # $Release Version: 0.9$
4 # $Revision: 1.11 $
5 # $Date: 2002/07/29 06:14:08 $
6 # by Keiju ISHITSUKA(keiju@ishitsuka.com)
7 #
8 # --
9 #
10 #
11 #
12
13 require "e2mmap"
14 require "irb/slex"
15 require "irb/ruby-token"
16
17 class RubyLex
18 @RCS_ID='-$Id: ruby-lex.rb,v 1.11 2002/07/29 06:14:08 matz Exp $-'
19
20 extend Exception2MessageMapper
21 def_exception(:AlreadyDefinedToken, "Already defined token(%s)")
22 def_exception(:TkReading2TokenNoKey, "key nothing(key='%s')")
23 def_exception(:TkSymbol2TokenNoKey, "key nothing(key='%s')")
24 def_exception(:TkReading2TokenDuplicateError,
25 "key duplicate(token_n='%s', key='%s')")
26 def_exception(:SyntaxError, "%s")
27
28 def_exception(:TerminateLineInput, "Terminate Line Input")
29
30 include RubyToken
31
32 class << self
33 attr_accessor :debug_level
34 def debug?
35 @debug_level > 0
36 end
37 end
38 @debug_level = 0
39
40 def initialize
41 lex_init
42 set_input(STDIN)
43
44 @seek = 0
45 @exp_line_no = @line_no = 1
46 @base_char_no = 0
47 @char_no = 0
48 @rests = []
49 @readed = []
50 @here_readed = []
51
52 @indent = 0
53 @indent_stack = []
54
55 @skip_space = false
56 @readed_auto_clean_up = false
57 @exception_on_syntax_error = true
58 end
59
60 attr_accessor :skip_space
61 attr_accessor :readed_auto_clean_up
62 attr_accessor :exception_on_syntax_error
63
64 attr_reader :seek
65 attr_reader :char_no
66 attr_reader :line_no
67 attr_reader :indent
68
69 # io functions
70 def set_input(io, p = nil)
71 @io = io
72 if p.kind_of?(Proc)
73 @input = p
74 elsif iterator?
75 @input = proc
76 else
77 @input = proc{@io.gets}
78 end
79 end
80
81 def get_readed
82 if idx = @readed.reverse.index("\n")
83 @base_char_no = idx
84 else
85 @base_char_no += @readed.size
86 end
87
88 readed = @readed.join("")
89 @readed = []
90 readed
91 end
92
93 def getc
94 while @rests.empty?
95 return nil unless buf_input
96 end
97 c = @rests.shift
98 if @here_header
99 @here_readed.push c
100 else
101 @readed.push c
102 end
103 @seek += 1
104 if c == "\n"
105 @line_no += 1
106 @char_no = 0
107 else
108 @char_no += 1
109 end
110 c
111 end
112
113 def gets
114 l = ""
115 while c = getc
116 l.concat c
117 break if c == "\n"
118 end
119 l
120 end
121
122 def eof?
123 @io.eof?
124 end
125
126 def getc_of_rests
127 if @rests.empty?
128 nil
129 else
130 getc
131 end
132 end
133
134 def ungetc(c = nil)
135 if @here_readed.empty?
136 c2 = @readed.pop
137 else
138 c2 = @here_readed.pop
139 end
140 c = c2 unless c
141 @rests.unshift c #c =
142 @seek -= 1
143 if c == "\n"
144 @line_no -= 1
145 if idx = @readed.reverse.index("\n")
146 @char_no = @readed.size - idx
147 else
148 @char_no = @base_char_no + @readed.size
149 end
150 else
151 @char_no -= 1
152 end
153 end
154
155 def peek_equal?(str)
156 chrs = str.split(
157 until @rests.size >= chrs.size
158 return false unless buf_input
159 end
160 @rests[0, chrs.size] == chrs
161 end
162
163 def peek_match?(regexp)
164 while @rests.empty?
165 return false unless buf_input
166 end
167 regexp =~ @rests.join("")
168 end
169
170 def peek(i = 0)
171 while @rests.size <= i
172 return nil unless buf_input
173 end
174 @rests[i]
175 end
176
177 def buf_input
178 prompt
179 line = @input.call
180 return nil unless line
181 @rests.concat line.split(
182 true
183 end
184 private :buf_input
185
186 def set_prompt(p = proc)
187 if p.kind_of?(Proc)
188 @prompt = p
189 else
190 @prompt = proc{print p}
191 end
192 end
193
194 def prompt
195 if @prompt
196 @prompt.call(@ltype, @indent, @continue, @line_no)
197 end
198 end
199
200 def initialize_input
201 @ltype = nil
202 @quoted = nil
203 @indent = 0
204 @indent_stack = []
205 @lex_state = EXPR_BEG
206 @space_seen = false
207 @here_header = false
208
209 @continue = false
210 prompt
211
212 @line = ""
213 @exp_line_no = @line_no
214 end
215
216 def each_top_level_statement
217 initialize_input
218 catch(:TERM_INPUT) do
219 loop do
220 begin
221 @continue = false
222 prompt
223 unless l = lex
224 throw :TERM_INPUT if @line == ''
225 else
226 #p l
227 @line.concat l
228 if @ltype or @continue or @indent > 0
229 next
230 end
231 end
232 if @line != "\n"
233 yield @line, @exp_line_no
234 end
235 break unless l
236 @line = ''
237 @exp_line_no = @line_no
238
239 @indent = 0
240 @indent_stack = []
241 prompt
242 rescue TerminateLineInput
243 initialize_input
244 prompt
245 get_readed
246 end
247 end
248 end
249 end
250
251 def lex
252 until (((tk = token).kind_of?(TkNL) || tk.kind_of?(TkEND_OF_SCRIPT)) &&
253 !@continue or
254 tk.nil?)
255 #p tk
256 #p @lex_state
257 #p self
258 end
259 line = get_readed
260 # print self.inspect
261 if line == "" and tk.kind_of?(TkEND_OF_SCRIPT) || tk.nil?
262 nil
263 else
264 line
265 end
266 end
267
268 def token
269 # require "tracer"
270 # Tracer.on
271 @prev_seek = @seek
272 @prev_line_no = @line_no
273 @prev_char_no = @char_no
274 begin
275 begin
276 tk = @OP.match(self)
277 @space_seen = tk.kind_of?(TkSPACE)
278 rescue SyntaxError
279 abort if @exception_on_syntax_error
280 tk = TkError.new(@seek, @line_no, @char_no)
281 end
282 end while @skip_space and tk.kind_of?(TkSPACE)
283 if @readed_auto_clean_up
284 get_readed
285 end
286 # Tracer.off
287 tk
288 end
289
290 ENINDENT_CLAUSE = [
291 "case", "class", "def", "do", "for", "if",
292 "module", "unless", "until", "while", "begin" #, "when"
293 ]
294 DEINDENT_CLAUSE = ["end" #, "when"
295 ]
296
297 PERCENT_LTYPE = {
298 "q" => "\'",
299 "Q" => "\"",
300 "x" => "\`",
301 "r" => "\/",
302 "w" => "]"
303 }
304
305 PERCENT_PAREN = {
306 "{" => "}",
307 "[" => "]",
308 "<" => ">",
309 "(" => ")"
310 }
311
312 Ltype2Token = {
313 "\'" => TkSTRING,
314 "\"" => TkSTRING,
315 "\`" => TkXSTRING,
316 "\/" => TkREGEXP,
317 "]" => TkDSTRING
318 }
319 DLtype2Token = {
320 "\"" => TkDSTRING,
321 "\`" => TkDXSTRING,
322 "\/" => TkDREGEXP,
323 }
324
325 def lex_init()
326 @OP = SLex.new
327 @OP.def_rules("\0", "\004", "\032") do
328 Token(TkEND_OF_SCRIPT)
329 end
330
331 @OP.def_rules(" ", "\t", "\f", "\r", "\13") do
332 @space_seen = true
333 while getc =~ /[ \t\f\r\13]/; end
334 ungetc
335 Token(TkSPACE)
336 end
337
338 @OP.def_rule("#") do
339 |op, io|
340 identify_comment
341 end
342
343 @OP.def_rule("=begin", proc{@prev_char_no == 0 && peek(0) =~ /\s/}) do
344 |op, io|
345 @ltype = "="
346 until getc == "\n"; end
347 until peek_equal?("=end") && peek(4) =~ /\s/
348 until getc == "\n"; end
349 end
350 gets
351 @ltype = nil
352 Token(TkRD_COMMENT)
353 end
354
355 @OP.def_rule("\n") do
356 print "\\n\n" if RubyLex.debug?
357 case @lex_state
358 when EXPR_BEG, EXPR_FNAME, EXPR_DOT
359 @continue = true
360 else
361 @continue = false
362 @lex_state = EXPR_BEG
363 until (@indent_stack.empty? ||
364 [TkLPAREN, TkLBRACK, TkLBRACE,
365 TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
366 @indent_stack.pop
367 end
368 end
369 @here_header = false
370 @here_readed = []
371 Token(TkNL)
372 end
373
374 @OP.def_rules("*", "**",
375 "=", "==", "===",
376 "=~", "<=>",
377 "<", "<=",
378 ">", ">=", ">>") do
379 |op, io|
380 case @lex_state
381 when EXPR_FNAME, EXPR_DOT
382 @lex_state = EXPR_ARG
383 else
384 @lex_state = EXPR_BEG
385 end
386 Token(op)
387 end
388
389 @OP.def_rules("!", "!=", "!~") do
390 |op, io|
391 #@lex_state = EXPR_BEG
392 Token(op)
393 end
394
395 @OP.def_rules("<<") do
396 |op, io|
397 tk = nil
398 if @lex_state != EXPR_END && @lex_state != EXPR_CLASS &&
399 (@lex_state != EXPR_ARG || @space_seen)
400 c = peek(0)
401 if /\S/ =~ c && (/["'`]/ =~ c || /[\w_]/ =~ c || c == "-")
402 tk = identify_here_document
403 end
404 end
405 unless tk
406 tk = Token(op)
407 case @lex_state
408 when EXPR_FNAME, EXPR_DOT
409 @lex_state = EXPR_ARG
410 else
411 @lex_state = EXPR_BEG
412 end
413 end
414 tk
415 end
416
417 @OP.def_rules("'", '"') do
418 |op, io|
419 identify_string(op)
420 end
421
422 @OP.def_rules("`") do
423 |op, io|
424 if @lex_state == EXPR_FNAME
425 Token(op)
426 else
427 identify_string(op)
428 end
429 end
430
431 @OP.def_rules('?') do
432 |op, io|
433 if @lex_state == EXPR_END
434 @lex_state = EXPR_BEG
435 Token(TkQUESTION)
436 else
437 ch = getc
438 if @lex_state == EXPR_ARG && ch =~ /\s/
439 ungetc
440 @lex_state = EXPR_BEG;
441 Token(TkQUESTION)
442 else
443 if (ch == '\\')
444 read_escape
445 end
446 @lex_state = EXPR_END
447 Token(TkINTEGER)
448 end
449 end
450 end
451
452 @OP.def_rules("&", "&&", "|", "||") do
453 |op, io|
454 @lex_state = EXPR_BEG
455 Token(op)
456 end
457
458 @OP.def_rules("+=", "-=", "*=", "**=",
459 "&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do
460 |op, io|
461 @lex_state = EXPR_BEG
462 op =~ /^(.*)=$/
463 Token(TkOPASGN, $1)
464 end
465
466 @OP.def_rule("+@", proc{@lex_state == EXPR_FNAME}) do
467 |op, io|
468 Token(op)
469 end
470
471 @OP.def_rule("-@", proc{@lex_state == EXPR_FNAME}) do
472 |op, io|
473 Token(op)
474 end
475
476 @OP.def_rules("+", "-") do
477 |op, io|
478 catch(:RET) do
479 if @lex_state == EXPR_ARG
480 if @space_seen and peek(0) =~ /[0-9]/
481 throw :RET, identify_number
482 else
483 @lex_state = EXPR_BEG
484 end
485 elsif @lex_state != EXPR_END and peek(0) =~ /[0-9]/
486 throw :RET, identify_number
487 else
488 @lex_state = EXPR_BEG
489 end
490 Token(op)
491 end
492 end
493
494 @OP.def_rule(".") do
495 @lex_state = EXPR_BEG
496 if peek(0) =~ /[0-9]/
497 ungetc
498 identify_number
499 else
500 # for "obj.if" etc.
501 @lex_state = EXPR_DOT
502 Token(TkDOT)
503 end
504 end
505
506 @OP.def_rules("..", "...") do
507 |op, io|
508 @lex_state = EXPR_BEG
509 Token(op)
510 end
511
512 lex_int2
513 end
514
515 def lex_int2
516 @OP.def_rules("]", "}", ")") do
517 |op, io|
518 @lex_state = EXPR_END
519 @indent -= 1
520 @indent_stack.pop
521 Token(op)
522 end
523
524 @OP.def_rule(":") do
525 if @lex_state == EXPR_END || peek(0) =~ /\s/
526 @lex_state = EXPR_BEG
527 Token(TkCOLON)
528 else
529 @lex_state = EXPR_FNAME;
530 Token(TkSYMBEG)
531 end
532 end
533
534 @OP.def_rule("::") do
535 # p @lex_state.id2name, @space_seen
536 if @lex_state == EXPR_BEG or @lex_state == EXPR_ARG && @space_seen
537 @lex_state = EXPR_BEG
538 Token(TkCOLON3)
539 else
540 @lex_state = EXPR_DOT
541 Token(TkCOLON2)
542 end
543 end
544
545 @OP.def_rule("/") do
546 |op, io|
547 if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
548 identify_string(op)
549 elsif peek(0) == '='
550 getc
551 @lex_state = EXPR_BEG
552 Token(TkOPASGN, "/") #/)
553 elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
554 identify_string(op)
555 else
556 @lex_state = EXPR_BEG
557 Token("/") #/)
558 end
559 end
560
561 @OP.def_rules("^") do
562 @lex_state = EXPR_BEG
563 Token("^")
564 end
565
566 # @OP.def_rules("^=") do
567 # @lex_state = EXPR_BEG
568 # Token(OP_ASGN, :^)
569 # end
570
571 @OP.def_rules(",") do
572 |op, io|
573 @lex_state = EXPR_BEG
574 Token(op)
575 end
576
577 @OP.def_rules(";") do
578 |op, io|
579 @lex_state = EXPR_BEG
580 until (@indent_stack.empty? ||
581 [TkLPAREN, TkLBRACK, TkLBRACE,
582 TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
583 @indent_stack.pop
584 end
585 Token(op)
586 end
587
588 @OP.def_rule("~") do
589 @lex_state = EXPR_BEG
590 Token("~")
591 end
592
593 @OP.def_rule("~@", proc{@lex_state == EXPR_FNAME}) do
594 @lex_state = EXPR_BEG
595 Token("~")
596 end
597
598 @OP.def_rule("(") do
599 @indent += 1
600 if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
601 @lex_state = EXPR_BEG
602 tk_c = TkfLPAREN
603 else
604 @lex_state = EXPR_BEG
605 tk_c = TkLPAREN
606 end
607 @indent_stack.push tk_c
608 tk = Token(tk_c)
609 end
610
611 @OP.def_rule("[]", proc{@lex_state == EXPR_FNAME}) do
612 Token("[]")
613 end
614
615 @OP.def_rule("[]=", proc{@lex_state == EXPR_FNAME}) do
616 Token("[]=")
617 end
618
619 @OP.def_rule("[") do
620 @indent += 1
621 if @lex_state == EXPR_FNAME
622 tk_c = TkfLBRACK
623 else
624 if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
625 tk_c = TkLBRACK
626 elsif @lex_state == EXPR_ARG && @space_seen
627 tk_c = TkLBRACK
628 else
629 tk_c = TkfLBRACK
630 end
631 @lex_state = EXPR_BEG
632 end
633 @indent_stack.push tk_c
634 Token(tk_c)
635 end
636
637 @OP.def_rule("{") do
638 @indent += 1
639 if @lex_state != EXPR_END && @lex_state != EXPR_ARG
640 tk_c = TkLBRACE
641 else
642 tk_c = TkfLBRACE
643 end
644 @lex_state = EXPR_BEG
645 @indent_stack.push tk_c
646 Token(tk_c)
647 end
648
649 @OP.def_rule('\\') do
650 if getc == "\n"
651 @space_seen = true
652 @continue = true
653 Token(TkSPACE)
654 else
655 ungetc
656 Token("\\")
657 end
658 end
659
660 @OP.def_rule('%') do
661 |op, io|
662 if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
663 identify_quotation
664 elsif peek(0) == '='
665 getc
666 Token(TkOPASGN, :%)
667 elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
668 identify_quotation
669 else
670 @lex_state = EXPR_BEG
671 Token("%") #))
672 end
673 end
674
675 @OP.def_rule('$') do
676 identify_gvar
677 end
678
679 @OP.def_rule('@') do
680 if peek(0) =~ /[\w_@]/
681 ungetc
682 identify_identifier
683 else
684 Token("@")
685 end
686 end
687
688 # @OP.def_rule("def", proc{|op, io| /\s/ =~ io.peek(0)}) do
689 # |op, io|
690 # @indent += 1
691 # @lex_state = EXPR_FNAME
692 # # @lex_state = EXPR_END
693 # # until @rests[0] == "\n" or @rests[0] == ";"
694 # # rests.shift
695 # # end
696 # end
697
698 @OP.def_rule("") do
699 |op, io|
700 printf "MATCH: start %s: %s\n", op, io.inspect if RubyLex.debug?
701 if peek(0) =~ /[0-9]/
702 t = identify_number
703 elsif peek(0) =~ /[\w_]/
704 t = identify_identifier
705 end
706 printf "MATCH: end %s: %s\n", op, io.inspect if RubyLex.debug?
707 t
708 end
709
710 p @OP if RubyLex.debug?
711 end
712
713 def identify_gvar
714 @lex_state = EXPR_END
715
716 case ch = getc
717 when /[~_*$?!@\/\\;,=:<>".]/ #"
718 Token(TkGVAR, "$" + ch)
719 when "-"
720 Token(TkGVAR, "$-" + getc)
721 when "&", "`", "'", "+"
722 Token(TkBACK_REF, "$"+ch)
723 when /[1-9]/
724 while getc =~ /[0-9]/; end
725 ungetc
726 Token(TkNTH_REF)
727 when /\w/
728 ungetc
729 ungetc
730 identify_identifier
731 else
732 ungetc
733 Token("$")
734 end
735 end
736
737 def identify_identifier
738 token = ""
739 if peek(0) =~ /[$@]/
740 token.concat(c = getc)
741 if c == "@" and peek(0) == "@"
742 token.concat getc
743 end
744 end
745
746 while (ch = getc) =~ /\w|_/
747 print ":", ch, ":" if RubyLex.debug?
748 token.concat ch
749 end
750 ungetc
751
752 if (ch == "!" || ch == "?") && token[0,1] =~ /\w/ && peek(0) != "="
753 token.concat getc
754 end
755
756 # almost fix token
757
758 case token
759 when /^\$/
760 return Token(TkGVAR, token)
761 when /^\@\@/
762 @lex_state = EXPR_END
763 # p Token(TkCVAR, token)
764 return Token(TkCVAR, token)
765 when /^\@/
766 @lex_state = EXPR_END
767 return Token(TkIVAR, token)
768 end
769
770 if @lex_state != EXPR_DOT
771 print token, "\n" if RubyLex.debug?
772
773 token_c, *trans = TkReading2Token[token]
774 if token_c
775 # reserved word?
776
777 if (@lex_state != EXPR_BEG &&
778 @lex_state != EXPR_FNAME &&
779 trans[1])
780 # modifiers
781 token_c = TkSymbol2Token[trans[1]]
782 @lex_state = trans[0]
783 else
784 if @lex_state != EXPR_FNAME
785 if ENINDENT_CLAUSE.include?(token)
786 # check for ``class = val''.
787 valid = true
788 case token
789 when "class"
790 valid = false unless peek_match?(/^\s*(<<|\w)/)
791
792 when "def"
793 valid = false if peek_match?(/^\s*(([+-\/*&\|^]|<<|>>|\|\||\&\&)?=|\&\&|\|\|)/)
794 when "do"
795 valid = false if peek_match?(/^\s*([+-\/*]?=|\*|<|>|\&)/)
796 when *ENINDENT_CLAUSE
797 valid = false if peek_match?(/^\s*([+-\/*]?=|\*|<|>|\&|\|)/)
798 else
799 # no nothing
800 end
801 if valid
802 if token == "do"
803 if ![TkFOR, TkWHILE, TkUNTIL].include?(@indent_stack.last)
804 @indent += 1
805 @indent_stack.push token_c
806 end
807 else
808 @indent += 1
809 @indent_stack.push token_c
810 end
811 # p @indent_stack
812 end
813
814 elsif DEINDENT_CLAUSE.include?(token)
815 @indent -= 1
816 @indent_stack.pop
817 end
818 @lex_state = trans[0]
819 else
820 @lex_state = EXPR_END
821 end
822 end
823 return Token(token_c, token)
824 end
825 end
826
827 if @lex_state == EXPR_FNAME
828 @lex_state = EXPR_END
829 if peek(0) == '='
830 token.concat getc
831 end
832 elsif @lex_state == EXPR_BEG || @lex_state == EXPR_DOT
833 @lex_state = EXPR_ARG
834 else
835 @lex_state = EXPR_END
836 end
837
838 if token[0, 1] =~ /[A-Z]/
839 return Token(TkCONSTANT, token)
840 elsif token[token.size - 1, 1] =~ /[!?]/
841 return Token(TkFID, token)
842 else
843 return Token(TkIDENTIFIER, token)
844 end
845 end
846
847 def identify_here_document
848 ch = getc
849 # if lt = PERCENT_LTYPE[ch]
850 if ch == "-"
851 ch = getc
852 indent = true
853 end
854 if /['"`]/ =~ ch
855 lt = ch
856 quoted = ""
857 while (c = getc) && c != lt
858 quoted.concat c
859 end
860 else
861 lt = '"'
862 quoted = ch.dup
863 while (c = getc) && c =~ /\w/
864 quoted.concat c
865 end
866 ungetc
867 end
868
869 ltback, @ltype = @ltype, lt
870 reserve = []
871 while ch = getc
872 reserve.push ch
873 if ch == "\\"
874 reserve.push ch = getc
875 elsif ch == "\n"
876 break
877 end
878 end
879
880 @here_header = false
881 while (l = gets.chomp) && (indent ? l.strip : l) != quoted
882 end
883
884 @here_header = true
885 @here_readed.concat reserve
886 while ch = reserve.pop
887 ungetc ch
888 end
889
890 @ltype = ltback
891 @lex_state = EXPR_END
892 Token(Ltype2Token[lt])
893 end
894
895 def identify_quotation
896 ch = getc
897 if lt = PERCENT_LTYPE[ch]
898 ch = getc
899 elsif ch =~ /\W/
900 lt = "\""
901 else
902 RubyLex.fail SyntaxError, "unknown type of %string"
903 end
904 # if ch !~ /\W/
905 # ungetc
906 # next
907 # end
908 #@ltype = lt
909 @quoted = ch unless @quoted = PERCENT_PAREN[ch]
910 identify_string(lt, @quoted)
911 end
912
913 def identify_number
914 @lex_state = EXPR_END
915
916 if ch = getc
917 if /[xX]/ =~ peek(0)
918 ch = getc
919 match = /[0-9a-fA-F_]/
920 elsif /[bB]/ =~ peek(0)
921 ch = getc
922 match = /[01_]/
923 else
924 match = /[0-7_]/
925 end
926 while ch = getc
927 if ch !~ match
928 ungetc
929 break
930 end
931 end
932 return Token(TkINTEGER)
933 end
934
935 type = TkINTEGER
936 allow_point = true
937 allow_e = true
938 while ch = getc
939 case ch
940 when /[0-9_]/
941 when allow_point && "."
942 type = TkFLOAT
943 if peek(0) !~ /[0-9]/
944 ungetc
945 break
946 end
947 allow_point = false
948 when allow_e && "e", allow_e && "E"
949 type = TkFLOAT
950 if peek(0) =~ /[+-]/
951 getc
952 end
953 allow_e = false
954 allow_point = false
955 else
956 ungetc
957 break
958 end
959 end
960 Token(type)
961 end
962
963 def identify_string(ltype, quoted = ltype)
964 @ltype = ltype
965 @quoted = quoted
966 subtype = nil
967 begin
968 nest = 0
969 while ch = getc
970 if @quoted == ch and nest == 0
971 break
972 elsif @ltype != "'" && @ltype != "]" and ch == "#"
973 subtype = true
974 elsif ch == '\\' #'
975 read_escape
976 end
977 if PERCENT_PAREN.values.include?(@quoted)
978 if PERCENT_PAREN[ch] == @quoted
979 nest += 1
980 elsif ch == @quoted
981 nest -= 1
982 end
983 end
984 end
985 if @ltype == "/"
986 if peek(0) =~ /i|m|x|o|e|s|u|n/
987 getc
988 end
989 end
990 if subtype
991 Token(DLtype2Token[ltype])
992 else
993 Token(Ltype2Token[ltype])
994 end
995 ensure
996 @ltype = nil
997 @quoted = nil
998 @lex_state = EXPR_END
999 end
1000 end
1001
1002 def identify_comment
1003 @ltype = "#"
1004
1005 while ch = getc
1006 # if ch == "\\" #"
1007 # read_escape
1008 # end
1009 if ch == "\n"
1010 @ltype = nil
1011 ungetc
1012 break
1013 end
1014 end
1015 return Token(TkCOMMENT)
1016 end
1017
1018 def read_escape
1019 case ch = getc
1020 when "\n", "\r", "\f"
1021 when "\\", "n", "t", "r", "f", "v", "a", "e", "b" #"
1022 when /[0-7]/
1023 ungetc ch
1024 3.times do
1025 case ch = getc
1026 when /[0-7]/
1027 when nil
1028 break
1029 else
1030 ungetc
1031 break
1032 end
1033 end
1034
1035 when "x"
1036 2.times do
1037 case ch = getc
1038 when /[0-9a-fA-F]/
1039 when nil
1040 break
1041 else
1042 ungetc
1043 break
1044 end
1045 end
1046
1047 when "M"
1048 if (ch = getc) != '-'
1049 ungetc
1050 else
1051 if (ch = getc) == "\\" #"
1052 read_escape
1053 end
1054 end
1055
1056 when "C", "c", "^"
1057 if ch == "C" and (ch = getc) != "-"
1058 ungetc
1059 elsif (ch = getc) == "\\" #"
1060 read_escape
1061 end
1062 else
1063 # other characters
1064 end
1065 end
1066 end