lib/irb/ruby-lex.rb


DEFINITIONS

This source file includes following functions.


   1  #
   2  #   irb/ruby-lex.rb - ruby lexcal analizer
   3  #       $Release Version: 0.9$
   4  #       $Revision: 1.11 $
   5  #       $Date: 2002/07/29 06:14:08 $
   6  #       by Keiju ISHITSUKA(keiju@ishitsuka.com)
   7  #
   8  # --
   9  #
  10  #   
  11  #
  12  
  13  require "e2mmap"
  14  require "irb/slex"
  15  require "irb/ruby-token"
  16  
  17  class RubyLex
  18    @RCS_ID='-$Id: ruby-lex.rb,v 1.11 2002/07/29 06:14:08 matz Exp $-'
  19  
  20    extend Exception2MessageMapper
  21    def_exception(:AlreadyDefinedToken, "Already defined token(%s)")
  22    def_exception(:TkReading2TokenNoKey, "key nothing(key='%s')")
  23    def_exception(:TkSymbol2TokenNoKey, "key nothing(key='%s')")
  24    def_exception(:TkReading2TokenDuplicateError, 
  25                  "key duplicate(token_n='%s', key='%s')")
  26    def_exception(:SyntaxError, "%s")
  27  
  28    def_exception(:TerminateLineInput, "Terminate Line Input")
  29    
  30    include RubyToken
  31  
  32    class << self
  33      attr_accessor :debug_level
  34      def debug?
  35        @debug_level > 0
  36      end
  37    end
  38    @debug_level = 0
  39  
  40    def initialize
  41      lex_init
  42      set_input(STDIN)
  43  
  44      @seek = 0
  45      @exp_line_no = @line_no = 1
  46      @base_char_no = 0
  47      @char_no = 0
  48      @rests = []
  49      @readed = []
  50      @here_readed = []
  51  
  52      @indent = 0
  53      @indent_stack = []
  54  
  55      @skip_space = false
  56      @readed_auto_clean_up = false
  57      @exception_on_syntax_error = true
  58    end
  59  
  60    attr_accessor :skip_space
  61    attr_accessor :readed_auto_clean_up
  62    attr_accessor :exception_on_syntax_error
  63  
  64    attr_reader :seek
  65    attr_reader :char_no
  66    attr_reader :line_no
  67    attr_reader :indent
  68  
  69    # io functions
  70    def set_input(io, p = nil)
  71      @io = io
  72      if p.kind_of?(Proc)
  73        @input = p
  74      elsif iterator?
  75        @input = proc
  76      else
  77        @input = proc{@io.gets}
  78      end
  79    end
  80  
  81    def get_readed
  82      if idx = @readed.reverse.index("\n")
  83        @base_char_no = idx
  84      else
  85        @base_char_no += @readed.size
  86      end
  87      
  88      readed = @readed.join("")
  89      @readed = []
  90      readed
  91    end
  92  
  93    def getc
  94      while @rests.empty?
  95        return nil unless buf_input
  96      end
  97      c = @rests.shift
  98      if @here_header
  99        @here_readed.push c
 100      else
 101        @readed.push c
 102      end
 103      @seek += 1
 104      if c == "\n"
 105        @line_no += 1 
 106        @char_no = 0
 107      else
 108        @char_no += 1
 109      end
 110      c
 111    end
 112  
 113    def gets
 114      l = ""
 115      while c = getc
 116        l.concat c
 117        break if c == "\n"
 118      end
 119      l
 120    end
 121  
 122    def eof?
 123      @io.eof?
 124    end
 125  
 126    def getc_of_rests
 127      if @rests.empty?
 128        nil
 129      else
 130        getc
 131      end
 132    end
 133  
 134    def ungetc(c = nil)
 135      if @here_readed.empty?
 136        c2 = @readed.pop
 137      else
 138        c2 = @here_readed.pop
 139      end
 140      c = c2 unless c
 141      @rests.unshift c #c = 
 142        @seek -= 1
 143      if c == "\n"
 144        @line_no -= 1 
 145        if idx = @readed.reverse.index("\n")
 146          @char_no = @readed.size - idx
 147        else
 148          @char_no = @base_char_no + @readed.size
 149        end
 150      else
 151        @char_no -= 1
 152      end
 153    end
 154  
 155    def peek_equal?(str)
 156      chrs = str.split(//)
 157      until @rests.size >= chrs.size
 158        return false unless buf_input
 159      end
 160      @rests[0, chrs.size] == chrs
 161    end
 162  
 163    def peek_match?(regexp)
 164      while @rests.empty?
 165        return false unless buf_input
 166      end
 167      regexp =~ @rests.join("")
 168    end
 169  
 170    def peek(i = 0)
 171      while @rests.size <= i
 172        return nil unless buf_input
 173      end
 174      @rests[i]
 175    end
 176  
 177    def buf_input
 178      prompt
 179      line = @input.call
 180      return nil unless line
 181      @rests.concat line.split(//)
 182      true
 183    end
 184    private :buf_input
 185  
 186    def set_prompt(p = proc)
 187      if p.kind_of?(Proc)
 188        @prompt = p
 189      else
 190        @prompt = proc{print p}
 191      end
 192    end
 193  
 194    def prompt
 195      if @prompt
 196        @prompt.call(@ltype, @indent, @continue, @line_no)
 197      end
 198    end
 199  
 200    def initialize_input
 201      @ltype = nil
 202      @quoted = nil
 203      @indent = 0
 204      @indent_stack = []
 205      @lex_state = EXPR_BEG
 206      @space_seen = false
 207      @here_header = false
 208      
 209      @continue = false
 210      prompt
 211  
 212      @line = ""
 213      @exp_line_no = @line_no
 214    end
 215    
 216    def each_top_level_statement
 217      initialize_input
 218      catch(:TERM_INPUT) do
 219        loop do
 220          begin
 221            @continue = false
 222            prompt
 223            unless l = lex
 224              throw :TERM_INPUT if @line == ''
 225            else
 226              #p l
 227              @line.concat l
 228              if @ltype or @continue or @indent > 0
 229                next
 230              end
 231            end
 232            if @line != "\n"
 233              yield @line, @exp_line_no
 234            end
 235            break unless l
 236            @line = ''
 237            @exp_line_no = @line_no
 238  
 239            @indent = 0
 240            @indent_stack = []
 241            prompt
 242          rescue TerminateLineInput
 243            initialize_input
 244            prompt
 245            get_readed
 246          end
 247        end
 248      end
 249    end
 250  
 251    def lex
 252      until (((tk = token).kind_of?(TkNL) || tk.kind_of?(TkEND_OF_SCRIPT)) &&
 253               !@continue or
 254               tk.nil?)
 255        #p tk
 256        #p @lex_state
 257        #p self
 258      end
 259      line = get_readed
 260      #      print self.inspect
 261      if line == "" and tk.kind_of?(TkEND_OF_SCRIPT) || tk.nil?
 262        nil
 263      else
 264        line
 265      end
 266    end
 267  
 268    def token
 269      #      require "tracer"
 270      #      Tracer.on
 271      @prev_seek = @seek
 272      @prev_line_no = @line_no
 273      @prev_char_no = @char_no
 274      begin
 275        begin
 276          tk = @OP.match(self)
 277          @space_seen = tk.kind_of?(TkSPACE)
 278        rescue SyntaxError
 279          abort if @exception_on_syntax_error
 280          tk = TkError.new(@seek, @line_no, @char_no)
 281        end
 282      end while @skip_space and tk.kind_of?(TkSPACE)
 283      if @readed_auto_clean_up
 284        get_readed
 285      end
 286      #      Tracer.off
 287      tk
 288    end
 289    
 290    ENINDENT_CLAUSE = [
 291      "case", "class", "def", "do", "for", "if",
 292      "module", "unless", "until", "while", "begin" #, "when"
 293    ]
 294    DEINDENT_CLAUSE = ["end" #, "when"
 295    ]
 296  
 297    PERCENT_LTYPE = {
 298      "q" => "\'",
 299      "Q" => "\"",
 300      "x" => "\`",
 301      "r" => "\/",
 302      "w" => "]"
 303    }
 304    
 305    PERCENT_PAREN = {
 306      "{" => "}",
 307      "[" => "]",
 308      "<" => ">",
 309      "(" => ")"
 310    }
 311  
 312    Ltype2Token = {
 313      "\'" => TkSTRING,
 314      "\"" => TkSTRING,
 315      "\`" => TkXSTRING,
 316      "\/" => TkREGEXP,
 317      "]" => TkDSTRING
 318    }
 319    DLtype2Token = {
 320      "\"" => TkDSTRING,
 321      "\`" => TkDXSTRING,
 322      "\/" => TkDREGEXP,
 323    }
 324  
 325    def lex_init()
 326      @OP = SLex.new
 327      @OP.def_rules("\0", "\004", "\032") do
 328        Token(TkEND_OF_SCRIPT)
 329      end
 330  
 331      @OP.def_rules(" ", "\t", "\f", "\r", "\13") do
 332        @space_seen = true
 333        while getc =~ /[ \t\f\r\13]/; end
 334        ungetc
 335        Token(TkSPACE)
 336      end
 337  
 338      @OP.def_rule("#") do
 339        |op, io|
 340        identify_comment
 341      end
 342  
 343      @OP.def_rule("=begin", proc{@prev_char_no == 0 && peek(0) =~ /\s/}) do
 344        |op, io|
 345        @ltype = "="
 346        until getc == "\n"; end
 347        until peek_equal?("=end") && peek(4) =~ /\s/
 348          until getc == "\n"; end
 349        end
 350        gets
 351        @ltype = nil
 352        Token(TkRD_COMMENT)
 353      end
 354  
 355      @OP.def_rule("\n") do
 356        print "\\n\n" if RubyLex.debug?
 357        case @lex_state
 358        when EXPR_BEG, EXPR_FNAME, EXPR_DOT
 359          @continue = true
 360        else
 361          @continue = false
 362          @lex_state = EXPR_BEG
 363          until (@indent_stack.empty? || 
 364                 [TkLPAREN, TkLBRACK, TkLBRACE, 
 365                   TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
 366            @indent_stack.pop
 367          end
 368        end
 369        @here_header = false
 370        @here_readed = []
 371        Token(TkNL)
 372      end
 373  
 374      @OP.def_rules("*", "**",    
 375                    "=", "==", "===", 
 376                    "=~", "<=>",  
 377                    "<", "<=",
 378                    ">", ">=", ">>") do
 379        |op, io|
 380        case @lex_state
 381        when EXPR_FNAME, EXPR_DOT
 382          @lex_state = EXPR_ARG
 383        else
 384          @lex_state = EXPR_BEG
 385        end
 386        Token(op)
 387      end
 388  
 389      @OP.def_rules("!", "!=", "!~") do
 390        |op, io|
 391        #@lex_state = EXPR_BEG
 392        Token(op)
 393      end
 394  
 395      @OP.def_rules("<<") do
 396        |op, io|
 397        tk = nil
 398        if @lex_state != EXPR_END && @lex_state != EXPR_CLASS &&
 399            (@lex_state != EXPR_ARG || @space_seen)
 400          c = peek(0)
 401          if /\S/ =~ c && (/["'`]/ =~ c || /[\w_]/ =~ c || c == "-")
 402            tk = identify_here_document
 403          end
 404        end
 405        unless tk
 406          tk = Token(op)
 407          case @lex_state
 408          when EXPR_FNAME, EXPR_DOT
 409            @lex_state = EXPR_ARG
 410          else
 411            @lex_state = EXPR_BEG
 412          end
 413        end
 414        tk
 415      end
 416  
 417      @OP.def_rules("'", '"') do
 418        |op, io|
 419        identify_string(op)
 420      end
 421  
 422      @OP.def_rules("`") do
 423        |op, io|
 424        if @lex_state == EXPR_FNAME
 425          Token(op)
 426        else
 427          identify_string(op)
 428        end
 429      end
 430  
 431      @OP.def_rules('?') do
 432        |op, io|
 433        if @lex_state == EXPR_END
 434          @lex_state = EXPR_BEG
 435          Token(TkQUESTION)
 436        else
 437          ch = getc
 438          if @lex_state == EXPR_ARG && ch =~ /\s/
 439            ungetc
 440            @lex_state = EXPR_BEG;
 441            Token(TkQUESTION)
 442          else
 443            if (ch == '\\') 
 444              read_escape
 445            end
 446            @lex_state = EXPR_END
 447            Token(TkINTEGER)
 448          end
 449        end
 450      end
 451  
 452      @OP.def_rules("&", "&&", "|", "||") do
 453        |op, io|
 454        @lex_state = EXPR_BEG
 455        Token(op)
 456      end
 457      
 458      @OP.def_rules("+=", "-=", "*=", "**=", 
 459                    "&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do
 460        |op, io|
 461        @lex_state = EXPR_BEG
 462        op =~ /^(.*)=$/
 463        Token(TkOPASGN, $1)
 464      end
 465  
 466      @OP.def_rule("+@", proc{@lex_state == EXPR_FNAME}) do
 467        |op, io|
 468        Token(op)
 469      end
 470  
 471      @OP.def_rule("-@", proc{@lex_state == EXPR_FNAME}) do
 472        |op, io|
 473        Token(op)
 474      end
 475  
 476      @OP.def_rules("+", "-") do
 477        |op, io|
 478        catch(:RET) do
 479          if @lex_state == EXPR_ARG
 480            if @space_seen and peek(0) =~ /[0-9]/
 481              throw :RET, identify_number
 482            else
 483              @lex_state = EXPR_BEG
 484            end
 485          elsif @lex_state != EXPR_END and peek(0) =~ /[0-9]/
 486            throw :RET, identify_number
 487          else
 488            @lex_state = EXPR_BEG
 489          end
 490          Token(op)
 491        end
 492      end
 493  
 494      @OP.def_rule(".") do
 495        @lex_state = EXPR_BEG
 496        if peek(0) =~ /[0-9]/
 497          ungetc
 498          identify_number
 499        else
 500          # for "obj.if" etc.
 501          @lex_state = EXPR_DOT
 502          Token(TkDOT)
 503        end
 504      end
 505  
 506      @OP.def_rules("..", "...") do
 507        |op, io|
 508        @lex_state = EXPR_BEG
 509        Token(op)
 510      end
 511  
 512      lex_int2
 513    end
 514    
 515    def lex_int2
 516      @OP.def_rules("]", "}", ")") do
 517        |op, io|
 518        @lex_state = EXPR_END
 519        @indent -= 1
 520        @indent_stack.pop
 521        Token(op)
 522      end
 523  
 524      @OP.def_rule(":") do
 525        if @lex_state == EXPR_END || peek(0) =~ /\s/
 526          @lex_state = EXPR_BEG
 527          Token(TkCOLON)
 528        else
 529          @lex_state = EXPR_FNAME;
 530          Token(TkSYMBEG)
 531        end
 532      end
 533  
 534      @OP.def_rule("::") do
 535  #      p @lex_state.id2name, @space_seen
 536        if @lex_state == EXPR_BEG or @lex_state == EXPR_ARG && @space_seen
 537          @lex_state = EXPR_BEG
 538          Token(TkCOLON3)
 539        else
 540          @lex_state = EXPR_DOT
 541          Token(TkCOLON2)
 542        end
 543      end
 544  
 545      @OP.def_rule("/") do
 546        |op, io|
 547        if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
 548          identify_string(op)
 549        elsif peek(0) == '='
 550          getc
 551          @lex_state = EXPR_BEG
 552          Token(TkOPASGN, "/") #/)
 553        elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
 554          identify_string(op)
 555        else 
 556          @lex_state = EXPR_BEG
 557          Token("/") #/)
 558        end
 559      end
 560  
 561      @OP.def_rules("^") do
 562        @lex_state = EXPR_BEG
 563        Token("^")
 564      end
 565  
 566      #       @OP.def_rules("^=") do
 567      #   @lex_state = EXPR_BEG
 568      #   Token(OP_ASGN, :^)
 569      #       end
 570      
 571      @OP.def_rules(",") do
 572        |op, io|
 573        @lex_state = EXPR_BEG
 574        Token(op)
 575      end
 576  
 577      @OP.def_rules(";") do
 578        |op, io|
 579        @lex_state = EXPR_BEG
 580        until (@indent_stack.empty? || 
 581               [TkLPAREN, TkLBRACK, TkLBRACE, 
 582                 TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
 583          @indent_stack.pop
 584        end
 585        Token(op)
 586      end
 587  
 588      @OP.def_rule("~") do
 589        @lex_state = EXPR_BEG
 590        Token("~")
 591      end
 592  
 593      @OP.def_rule("~@", proc{@lex_state == EXPR_FNAME}) do
 594        @lex_state = EXPR_BEG
 595        Token("~")
 596      end
 597      
 598      @OP.def_rule("(") do
 599        @indent += 1
 600        if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
 601          @lex_state = EXPR_BEG
 602          tk_c = TkfLPAREN
 603        else
 604          @lex_state = EXPR_BEG
 605          tk_c = TkLPAREN
 606        end
 607        @indent_stack.push tk_c
 608        tk = Token(tk_c)
 609      end
 610  
 611      @OP.def_rule("[]", proc{@lex_state == EXPR_FNAME}) do
 612        Token("[]")
 613      end
 614  
 615      @OP.def_rule("[]=", proc{@lex_state == EXPR_FNAME}) do
 616        Token("[]=")
 617      end
 618  
 619      @OP.def_rule("[") do
 620        @indent += 1
 621        if @lex_state == EXPR_FNAME
 622          tk_c = TkfLBRACK
 623        else
 624          if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
 625            tk_c = TkLBRACK
 626          elsif @lex_state == EXPR_ARG && @space_seen
 627            tk_c = TkLBRACK
 628          else
 629            tk_c = TkfLBRACK
 630          end
 631          @lex_state = EXPR_BEG
 632        end
 633        @indent_stack.push tk_c
 634        Token(tk_c)
 635      end
 636  
 637      @OP.def_rule("{") do
 638        @indent += 1
 639        if @lex_state != EXPR_END && @lex_state != EXPR_ARG
 640          tk_c = TkLBRACE
 641        else
 642          tk_c = TkfLBRACE
 643        end
 644        @lex_state = EXPR_BEG
 645        @indent_stack.push tk_c
 646        Token(tk_c)
 647      end
 648  
 649      @OP.def_rule('\\') do
 650        if getc == "\n"
 651          @space_seen = true
 652          @continue = true
 653          Token(TkSPACE)
 654        else
 655          ungetc
 656          Token("\\")
 657        end
 658      end
 659  
 660      @OP.def_rule('%') do
 661        |op, io|
 662        if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
 663          identify_quotation
 664        elsif peek(0) == '='
 665          getc
 666          Token(TkOPASGN, :%)
 667        elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
 668          identify_quotation
 669        else
 670          @lex_state = EXPR_BEG
 671          Token("%") #))
 672        end
 673      end
 674  
 675      @OP.def_rule('$') do
 676        identify_gvar
 677      end
 678  
 679      @OP.def_rule('@') do
 680        if peek(0) =~ /[\w_@]/
 681          ungetc
 682          identify_identifier
 683        else
 684          Token("@")
 685        end
 686      end
 687  
 688      #       @OP.def_rule("def", proc{|op, io| /\s/ =~ io.peek(0)}) do 
 689      #   |op, io|
 690      #   @indent += 1
 691      #   @lex_state = EXPR_FNAME
 692      # # @lex_state = EXPR_END
 693      # # until @rests[0] == "\n" or @rests[0] == ";"
 694      # #   rests.shift
 695      # # end
 696      #       end
 697  
 698      @OP.def_rule("") do
 699        |op, io|
 700        printf "MATCH: start %s: %s\n", op, io.inspect if RubyLex.debug?
 701        if peek(0) =~ /[0-9]/
 702          t = identify_number
 703        elsif peek(0) =~ /[\w_]/
 704          t = identify_identifier
 705        end
 706        printf "MATCH: end %s: %s\n", op, io.inspect if RubyLex.debug?
 707        t
 708      end
 709      
 710      p @OP if RubyLex.debug?
 711    end
 712    
 713    def identify_gvar
 714      @lex_state = EXPR_END
 715      
 716      case ch = getc
 717      when /[~_*$?!@\/\\;,=:<>".]/   #"
 718        Token(TkGVAR, "$" + ch)
 719      when "-"
 720        Token(TkGVAR, "$-" + getc)
 721      when "&", "`", "'", "+"
 722        Token(TkBACK_REF, "$"+ch)
 723      when /[1-9]/
 724        while getc =~ /[0-9]/; end
 725        ungetc
 726        Token(TkNTH_REF)
 727      when /\w/
 728        ungetc
 729        ungetc
 730        identify_identifier
 731      else 
 732        ungetc
 733        Token("$")
 734      end
 735    end
 736    
 737    def identify_identifier
 738      token = ""
 739      if peek(0) =~ /[$@]/
 740        token.concat(c = getc)
 741        if c == "@" and peek(0) == "@"
 742          token.concat getc
 743        end
 744      end
 745  
 746      while (ch = getc) =~ /\w|_/
 747        print ":", ch, ":" if RubyLex.debug?
 748        token.concat ch
 749      end
 750      ungetc
 751      
 752      if (ch == "!" || ch == "?") && token[0,1] =~ /\w/ && peek(0) != "="
 753        token.concat getc
 754      end
 755  
 756      # almost fix token
 757  
 758      case token
 759      when /^\$/
 760        return Token(TkGVAR, token)
 761      when /^\@\@/
 762        @lex_state = EXPR_END
 763        # p Token(TkCVAR, token)
 764        return Token(TkCVAR, token)
 765      when /^\@/
 766        @lex_state = EXPR_END
 767        return Token(TkIVAR, token)
 768      end
 769      
 770      if @lex_state != EXPR_DOT
 771        print token, "\n" if RubyLex.debug?
 772  
 773        token_c, *trans = TkReading2Token[token]
 774        if token_c
 775          # reserved word?
 776  
 777          if (@lex_state != EXPR_BEG &&
 778              @lex_state != EXPR_FNAME &&
 779              trans[1])
 780            # modifiers
 781            token_c = TkSymbol2Token[trans[1]]
 782            @lex_state = trans[0]
 783          else
 784            if @lex_state != EXPR_FNAME
 785              if ENINDENT_CLAUSE.include?(token)
 786                # check for ``class = val''.
 787                valid = true
 788                case token
 789                when "class"
 790                  valid = false unless peek_match?(/^\s*(<<|\w)/)
 791  
 792                when "def"
 793                  valid = false if peek_match?(/^\s*(([+-\/*&\|^]|<<|>>|\|\||\&\&)?=|\&\&|\|\|)/)
 794                when "do"
 795                  valid = false if peek_match?(/^\s*([+-\/*]?=|\*|<|>|\&)/)
 796                when *ENINDENT_CLAUSE
 797                  valid = false if peek_match?(/^\s*([+-\/*]?=|\*|<|>|\&|\|)/)
 798                else
 799                  # no nothing
 800                end
 801                if valid
 802                  if token == "do"
 803                    if ![TkFOR, TkWHILE, TkUNTIL].include?(@indent_stack.last)
 804                      @indent += 1
 805                      @indent_stack.push token_c
 806                    end
 807                  else
 808                    @indent += 1
 809                    @indent_stack.push token_c
 810                  end
 811  #               p @indent_stack
 812                end
 813  
 814              elsif DEINDENT_CLAUSE.include?(token)
 815                @indent -= 1
 816                @indent_stack.pop
 817              end
 818              @lex_state = trans[0]
 819            else
 820              @lex_state = EXPR_END
 821            end
 822          end
 823          return Token(token_c, token)
 824        end
 825      end
 826  
 827      if @lex_state == EXPR_FNAME
 828        @lex_state = EXPR_END
 829        if peek(0) == '='
 830          token.concat getc
 831        end
 832      elsif @lex_state == EXPR_BEG || @lex_state == EXPR_DOT
 833        @lex_state = EXPR_ARG
 834      else
 835        @lex_state = EXPR_END
 836      end
 837  
 838      if token[0, 1] =~ /[A-Z]/
 839        return Token(TkCONSTANT, token)
 840      elsif token[token.size - 1, 1] =~ /[!?]/
 841        return Token(TkFID, token)
 842      else
 843        return Token(TkIDENTIFIER, token)
 844      end
 845    end
 846  
 847    def identify_here_document
 848      ch = getc
 849  #    if lt = PERCENT_LTYPE[ch]
 850      if ch == "-"
 851        ch = getc
 852        indent = true
 853      end
 854      if /['"`]/ =~ ch
 855        lt = ch
 856        quoted = ""
 857        while (c = getc) && c != lt
 858          quoted.concat c
 859        end
 860      else
 861        lt = '"'
 862        quoted = ch.dup
 863        while (c = getc) && c =~ /\w/
 864          quoted.concat c
 865        end
 866        ungetc
 867      end
 868  
 869      ltback, @ltype = @ltype, lt
 870      reserve = []
 871      while ch = getc
 872        reserve.push ch
 873        if ch == "\\"
 874          reserve.push ch = getc
 875        elsif ch == "\n"
 876          break
 877        end
 878      end
 879  
 880      @here_header = false
 881      while (l = gets.chomp) && (indent ? l.strip : l) != quoted
 882      end
 883  
 884      @here_header = true
 885      @here_readed.concat reserve
 886      while ch = reserve.pop
 887        ungetc ch
 888      end
 889  
 890      @ltype = ltback
 891      @lex_state = EXPR_END
 892      Token(Ltype2Token[lt])
 893    end
 894    
 895    def identify_quotation
 896      ch = getc
 897      if lt = PERCENT_LTYPE[ch]
 898        ch = getc
 899      elsif ch =~ /\W/
 900        lt = "\""
 901      else
 902        RubyLex.fail SyntaxError, "unknown type of %string"
 903      end
 904  #     if ch !~ /\W/
 905  #       ungetc
 906  #       next
 907  #     end
 908      #@ltype = lt
 909      @quoted = ch unless @quoted = PERCENT_PAREN[ch]
 910      identify_string(lt, @quoted)
 911    end
 912  
 913    def identify_number
 914      @lex_state = EXPR_END
 915  
 916      if ch = getc
 917        if /[xX]/ =~ peek(0)
 918          ch = getc
 919          match = /[0-9a-fA-F_]/
 920        elsif /[bB]/ =~ peek(0)
 921          ch = getc
 922          match = /[01_]/
 923        else
 924          match = /[0-7_]/
 925        end
 926        while ch = getc
 927          if ch !~ match
 928            ungetc
 929            break
 930          end
 931        end
 932        return Token(TkINTEGER)
 933      end
 934      
 935      type = TkINTEGER
 936      allow_point = true
 937      allow_e = true
 938      while ch = getc
 939        case ch
 940        when /[0-9_]/
 941        when allow_point && "."
 942          type = TkFLOAT
 943          if peek(0) !~ /[0-9]/
 944            ungetc
 945            break
 946          end
 947          allow_point = false
 948        when allow_e && "e", allow_e && "E"
 949          type = TkFLOAT
 950          if peek(0) =~ /[+-]/
 951            getc
 952          end
 953          allow_e = false
 954          allow_point = false
 955        else
 956          ungetc
 957          break
 958        end
 959      end
 960      Token(type)
 961    end
 962    
 963    def identify_string(ltype, quoted = ltype)
 964      @ltype = ltype
 965      @quoted = quoted
 966      subtype = nil
 967      begin
 968        nest = 0
 969        while ch = getc
 970          if @quoted == ch and nest == 0
 971            break
 972          elsif @ltype != "'" && @ltype != "]" and ch == "#"
 973            subtype = true
 974          elsif ch == '\\' #'
 975            read_escape
 976          end
 977          if PERCENT_PAREN.values.include?(@quoted) 
 978            if PERCENT_PAREN[ch] == @quoted
 979              nest += 1
 980            elsif ch == @quoted
 981              nest -= 1
 982            end
 983          end
 984        end
 985        if @ltype == "/"
 986          if peek(0) =~ /i|m|x|o|e|s|u|n/
 987            getc
 988          end
 989        end
 990        if subtype
 991          Token(DLtype2Token[ltype])
 992        else
 993          Token(Ltype2Token[ltype])
 994        end
 995      ensure
 996        @ltype = nil
 997        @quoted = nil
 998        @lex_state = EXPR_END
 999      end
1000    end
1001    
1002    def identify_comment
1003      @ltype = "#"
1004  
1005      while ch = getc
1006  #      if ch == "\\" #"
1007  #       read_escape
1008  #      end
1009        if ch == "\n"
1010          @ltype = nil
1011          ungetc
1012          break
1013        end
1014      end
1015      return Token(TkCOMMENT)
1016    end
1017    
1018    def read_escape
1019      case ch = getc
1020      when "\n", "\r", "\f"
1021      when "\\", "n", "t", "r", "f", "v", "a", "e", "b" #"
1022      when /[0-7]/
1023        ungetc ch
1024        3.times do
1025          case ch = getc
1026          when /[0-7]/
1027          when nil
1028            break
1029          else
1030            ungetc
1031            break
1032          end
1033        end
1034        
1035      when "x"
1036        2.times do
1037          case ch = getc
1038          when /[0-9a-fA-F]/
1039          when nil
1040            break
1041          else
1042            ungetc
1043            break
1044          end
1045        end
1046  
1047      when "M"
1048        if (ch = getc) != '-'
1049          ungetc
1050        else
1051          if (ch = getc) == "\\" #"
1052            read_escape
1053          end
1054        end
1055  
1056      when "C", "c", "^"
1057        if ch == "C" and (ch = getc) != "-"
1058          ungetc
1059        elsif (ch = getc) == "\\" #"
1060          read_escape
1061        end
1062      else
1063        # other characters 
1064      end
1065    end
1066  end