Parent

RubyLexer

Attributes

cmdarg[RW]
command_start[RW]
cond[RW]
lex_state[R]

Additional context surrounding tokens that both the lexer and grammar use.

lex_strterm[RW]
lineno[W]
nest[RW]
parser[RW]
src[R]

Stream of data that yylex examines.

string_buffer[RW]
token[RW]

Last token read via yylex.

warnings[RW]

What handles warnings

yacc_value[RW]

Value of last token which had a value associated with it.

Public Class Methods

new() click to toggle source
# File lib/ruby_lexer.rb, line 219
def initialize
  self.cond = RubyParser::StackState.new(:cond)
  self.cmdarg = RubyParser::StackState.new(:cmdarg)
  self.nest = 0
  @comments = []

  reset
end

Public Instance Methods

advance() click to toggle source

How the parser advances to the next token.

@return true if not at end of file (EOF).

# File lib/ruby_lexer.rb, line 67
def advance
  r = yylex
  self.token = r

  raise "yylex returned nil" unless r

  return RubyLexer::EOF != r
end
arg_ambiguous() click to toggle source
# File lib/ruby_lexer.rb, line 76
def arg_ambiguous
  self.warning("Ambiguous first argument. make sure.")
end
comments() click to toggle source
# File lib/ruby_lexer.rb, line 80
def comments
  c = @comments.join
  @comments.clear
  c
end
expr_beg_push(val) click to toggle source
# File lib/ruby_lexer.rb, line 86
def expr_beg_push val
  cond.push false
  cmdarg.push false
  self.lex_state = :expr_beg
  self.yacc_value = val
end
fix_arg_lex_state() click to toggle source
# File lib/ruby_lexer.rb, line 93
def fix_arg_lex_state
  self.lex_state = if lex_state == :expr_fname || lex_state == :expr_dot
                     :expr_arg
                   else
                     :expr_beg
                   end
end
heredoc(here) click to toggle source
# File lib/ruby_lexer.rb, line 101
def heredoc here # 63 lines
  _, eos, func, last_line = here

  indent  = (func & STR_FUNC_INDENT) != 0
  expand  = (func & STR_FUNC_EXPAND) != 0
  eos_re  = indent ? /[ \t]*#{eos}(\r?\n|\z)/ : /#{eos}(\r?\n|\z)/
  err_msg = "can't match #{eos_re.inspect} anywhere in "

  rb_compile_error err_msg if
    src.eos?

  if src.beginning_of_line? && src.scan(eos_re) then
    src.unread_many last_line # TODO: figure out how to remove this
    self.yacc_value = eos
    return :tSTRING_END
  end

  self.string_buffer = []

  if expand then
    case
    when src.scan(/#[$@]/) then
      src.pos -= 1 # FIX omg stupid
      self.yacc_value = src.matched
      return :tSTRING_DVAR
    when src.scan(/#[{]/) then
      self.yacc_value = src.matched
      return :tSTRING_DBEG
    when src.scan(/#/) then
      string_buffer << '#'
    end

    until src.scan(eos_re) do
      c = tokadd_string func, "\n", nil

      rb_compile_error err_msg if
        c == RubyLexer::EOF

      if c != "\n" then
        self.yacc_value = string_buffer.join.delete("\r")
        return :tSTRING_CONTENT
      else
        string_buffer << src.scan(/\n/)
      end

      rb_compile_error err_msg if
        src.eos?
    end

    # tack on a NL after the heredoc token - FIX NL should not be needed
    src.unread_many(eos + "\n") # TODO: remove this... stupid stupid stupid
  else
    until src.check(eos_re) do
      string_buffer << src.scan(/.*(\n|\z)/)
      rb_compile_error err_msg if
        src.eos?
    end
  end

  self.lex_strterm = [:heredoc, eos, func, last_line]
  self.yacc_value = string_buffer.join.delete("\r")

  return :tSTRING_CONTENT
end
heredoc_identifier() click to toggle source
# File lib/ruby_lexer.rb, line 166
def heredoc_identifier # 51 lines
  term, func = nil, STR_FUNC_BORING
  self.string_buffer = []

  case
  when src.scan(/(-?)(['"`])(.*?)\22//) then
    term = src[2]
    unless src[1].empty? then
      func |= STR_FUNC_INDENT
    end
    func |= case term
            when "\'" then
              STR_SQUOTE
            when '"' then
              STR_DQUOTE
            else
              STR_XQUOTE
            end
    string_buffer << src[3]
  when src.scan(/-?(['"`])(?!\11**\Z)/) then
    rb_compile_error "unterminated here document identifier"
  when src.scan(/(-?)(\w+)/) then
    term = '"'
    func |= STR_DQUOTE
    unless src[1].empty? then
      func |= STR_FUNC_INDENT
    end
    string_buffer << src[2]
  else
    return nil
  end

  if src.check(/.*\n/) then
    # TODO: think about storing off the char range instead
    line = src.string[src.pos, src.matched_size]
    src.string[src.pos, src.matched_size] = "\n"
    src.extra_lines_added += 1
    src.pos += 1
  else
    line = nil
  end

  self.lex_strterm = [:heredoc, string_buffer.join, func, line]

  if term == '`' then
    self.yacc_value = "`"
    return :tXSTRING_BEG
  else
    self.yacc_value = "\""
    return :tSTRING_BEG
  end
end
int_with_base(base) click to toggle source
# File lib/ruby_lexer.rb, line 228
def int_with_base base
  rb_compile_error "Invalid numeric format" if src.matched =~ /__/
  self.yacc_value = src.matched.to_i(base)
  return :tINTEGER
end
lex_state=(o) click to toggle source
# File lib/ruby_lexer.rb, line 234
def lex_state= o
  raise "wtf\?" unless Symbol === o
  @lex_state = o
end
lineno() click to toggle source
# File lib/ruby_lexer.rb, line 240
def lineno
  @lineno ||= src.lineno
end
parse_number() click to toggle source
Parse a number from the input stream.

@param c The first character of the number. @return A int constant wich represents a token.

# File lib/ruby_lexer.rb, line 250
def parse_number
  self.lex_state = :expr_end

  case
  when src.scan(/[+-]?0[xbd]\b/) then
    rb_compile_error "Invalid numeric format"
  when src.scan(/[+-]?0x[a-f0-9_]+/) then
    int_with_base(16)
  when src.scan(/[+-]?0b[01_]+/) then
    int_with_base(2)
  when src.scan(/[+-]?0d[0-9_]+/) then
    int_with_base(10)
  when src.scan(/[+-]?0[Oo]?[0-7_]*[89]/) then
    rb_compile_error "Illegal octal digit."
  when src.scan(/[+-]?0[Oo]?[0-7_]+|0[Oo]/) then
    int_with_base(8)
  when src.scan(/[+-]?[\d_]+_(e|\.)/) then
    rb_compile_error "Trailing '_' in number."
  when src.scan(/[+-]?[\d_]+\.[\d_]+(e[+-]?[\d_]+)?\b|[+-]?[\d_]+e[+-]?[\d_]+\b/) then
    number = src.matched
    if number =~ /__/ then
      rb_compile_error "Invalid numeric format"
    end
    self.yacc_value = number.to_f
    :tFLOAT
  when src.scan(/[+-]?0\b/) then
    int_with_base(10)
  when src.scan(/[+-]?[\d_]+\b/) then
    int_with_base(10)
  else
    rb_compile_error "Bad number format"
  end
end
parse_quote() click to toggle source
# File lib/ruby_lexer.rb, line 284
def parse_quote # 58 lines
  beg, nnd, short_hand, c = nil, nil, false, nil

  if src.scan(/[a-z0-9]{1,2}/) then # Long-hand (e.g. %Q{}).
    rb_compile_error "unknown type of %string" if src.matched_size == 2
    c, beg, short_hand = src.matched, src.getch, false
  else                               # Short-hand (e.g. %{, %., %!, etc)
    c, beg, short_hand = 'Q', src.getch, true
  end

  if src.eos? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
    rb_compile_error "unterminated quoted string meets end of file"
  end

  # Figure nnd-char.  "\0" is special to indicate beg=nnd and that no nesting?
  nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
  nnd, beg = beg, "\00"" if nnd.nil?

  token_type, self.yacc_value = nil, "%#{c}#{beg}"
  token_type, string_type = case c
                            when 'Q' then
                              ch = short_hand ? nnd : c + beg
                              self.yacc_value = "%#{ch}"
                              [:tSTRING_BEG,   STR_DQUOTE]
                            when 'q' then
                              [:tSTRING_BEG,   STR_SQUOTE]
                            when 'W' then
                              src.scan(/\s*/)
                              [:tWORDS_BEG,    STR_DQUOTE | STR_FUNC_AWORDS]
                            when 'w' then
                              src.scan(/\s*/)
                              [:tAWORDS_BEG,   STR_SQUOTE | STR_FUNC_AWORDS]
                            when 'x' then
                              [:tXSTRING_BEG,  STR_XQUOTE]
                            when 'r' then
                              [:tREGEXP_BEG,   STR_REGEXP]
                            when 's' then
                              self.lex_state  = :expr_fname
                              [:tSYMBEG,       STR_SSYM]
                            end

  rb_compile_error "Bad %string type. Expected [Qqwxr\W], found '#{c}'." if
    token_type.nil?

  self.lex_strterm = [:strterm, string_type, nnd, beg]

  return token_type
end
parse_string(quote) click to toggle source
# File lib/ruby_lexer.rb, line 333
def parse_string(quote) # 65 lines
  _, string_type, term, open = quote

  space = false # FIX: remove these
  func = string_type
  paren = open
  term_re = Regexp.escape term

  awords = (func & STR_FUNC_AWORDS) != 0
  regexp = (func & STR_FUNC_REGEXP) != 0
  expand = (func & STR_FUNC_EXPAND) != 0

  unless func then # FIX: impossible, prolly needs == 0
    self.lineno = nil
    return :tSTRING_END
  end

  space = true if awords and src.scan(/\s+/)

  if self.nest == 0 && src.scan(/#{term_re}/) then
    if awords then
      quote[1] = nil
      return :tSPACE
    elsif regexp then
      self.yacc_value = self.regx_options
      self.lineno = nil
      return :tREGEXP_END
    else
      self.yacc_value = term
      self.lineno = nil
      return :tSTRING_END
    end
  end

  if space then
    return :tSPACE
  end

  self.string_buffer = []

  if expand
    case
    when src.scan(/#(?=[$@])/) then
      return :tSTRING_DVAR
    when src.scan(/#[{]/) then
      return :tSTRING_DBEG
    when src.scan(/#/) then
      string_buffer << '#'
    end
  end

  if tokadd_string(func, term, paren) == RubyLexer::EOF then
    rb_compile_error "unterminated string meets end of file"
  end

  self.yacc_value = string_buffer.join

  return :tSTRING_CONTENT
end
process_token(command_state) click to toggle source
# File lib/ruby_lexer.rb, line 1211
def process_token(command_state)

  token << src.matched if token =~ /^\w/ && src.scan(/[\!\?](?!=)/)

  result = nil
  last_state = lex_state


  case token
  when /^\$/ then
    self.lex_state, result = :expr_end, :tGVAR
  when /^@@/ then
    self.lex_state, result = :expr_end, :tCVAR
  when /^@/ then
    self.lex_state, result = :expr_end, :tIVAR
  else
    if token =~ /[!?]$/ then
      result = :tFID
    else
      if lex_state == :expr_fname then
        # ident=, not =~ => == or followed by =>
        # TODO test lexing of a=>b vs a==>b
        if src.scan(/=(?:(?![~>=])|(?==>))/) then
          result = :tIDENTIFIER
          token << src.matched
        end
      end

      result ||= if token =~ /^[A-Z]/ then
                   :tCONSTANT
                 else
                   :tIDENTIFIER
                 end
    end

    unless lex_state == :expr_dot then
      # See if it is a reserved word.
      keyword = RubyParser::Keyword.keyword token

      if keyword then
        state           = lex_state
        self.lex_state  = keyword.state
        self.yacc_value = [token, src.lineno]

        if state == :expr_fname then
          self.yacc_value = keyword.name
          return keyword.id0
        end

        if keyword.id0 == :kDO then
          self.command_start = true
          return :kDO_COND  if cond.is_in_state
          return :kDO_BLOCK if cmdarg.is_in_state && state != :expr_cmdarg
          return :kDO_BLOCK if state == :expr_endarg
          return :kDO
        end

        return keyword.id0 if state == :expr_beg or state == :expr_value

        self.lex_state = :expr_beg if keyword.id0 != keyword.id1

        return keyword.id1
      end
    end

    if (lex_state == :expr_beg || lex_state == :expr_mid ||
        lex_state == :expr_dot || lex_state == :expr_arg ||
        lex_state == :expr_cmdarg) then
      if command_state then
        self.lex_state = :expr_cmdarg
      else
        self.lex_state = :expr_arg
      end
    else
      self.lex_state = :expr_end
    end
  end

  self.yacc_value = token


  self.lex_state = :expr_end if
    last_state != :expr_dot && self.parser.env[token.to_sym] == :lvar

  return result
end
rb_compile_error(msg) click to toggle source
# File lib/ruby_lexer.rb, line 393
def rb_compile_error msg
  msg += ". near line #{self.lineno}: #{src.rest[/^.*/].inspect}"
  raise SyntaxError, msg
end
read_escape() click to toggle source
# File lib/ruby_lexer.rb, line 398
def read_escape # 51 lines
  case
  when src.scan(/\\/) then                  # Backslash
    '\'
  when src.scan(/n/) then                   # newline
    "\n"
  when src.scan(/t/) then                   # horizontal tab
    "\t"
  when src.scan(/r/) then                   # carriage-return
    "\r"
  when src.scan(/f/) then                   # form-feed
    "\f"
  when src.scan(/v/) then                   # vertical tab
    "\113""
  when src.scan(/a/) then                   # alarm(bell)
    "\0007"
  when src.scan(/e/) then                   # escape
    "\0033"
  when src.scan(/b/) then                   # backspace
    "\0010"
  when src.scan(/s/) then                   # space
    " "
  when src.scan(/[0-7]{1,3}/) then          # octal constant
    src.matched.to_i(8).chr
  when src.scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
    src[1].to_i(16).chr
  when src.check(/M-\\[\\MCc]/) then
    src.scan(/M-\\/) # eat it
    c = self.read_escape
    c[0] = (c[0].ord | 0x80).chr
    c
  when src.scan(/M-(.)/) then
    c = src[1]
    c[0] = (c[0].ord | 0x80).chr
    c
  when src.check(/(C-|c)\\[\\MCc]/) then
    src.scan(/(C-|c)\\/) # eat it
    c = self.read_escape
    c[0] = (c[0].ord & 0x9f).chr
    c
  when src.scan(/C-\?|c\?/) then
    127.chr
  when src.scan(/(C-|c)(.)/) then
    c = src[2]
    c[0] = (c[0].ord & 0x9f).chr
    c
  when src.scan(/[McCx0-9]/) || src.eos? then
    rb_compile_error("Invalid escape character syntax")
  else
    src.getch
  end
end
regx_options() click to toggle source
# File lib/ruby_lexer.rb, line 451
def regx_options # 15 lines
  good, bad = [], []

  if src.scan(/[a-z]+/) then
    good, bad = src.matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ }
  end

  unless bad.empty? then
    rb_compile_error("unknown regexp option%s - %s" %
                     [(bad.size > 1 ? "s" : ""), bad.join.inspect])
  end

  return good.join
end
reset() click to toggle source
# File lib/ruby_lexer.rb, line 466
def reset
  self.command_start = true
  self.lex_strterm   = nil
  self.token         = nil
  self.yacc_value    = nil

  @src       = nil
  @lex_state = nil
end
src=(src) click to toggle source
# File lib/ruby_lexer.rb, line 476
def src= src
  raise "bad src: #{src.inspect}" unless String === src
  @src = RPStringScanner.new(src)
end
tokadd_escape(term) click to toggle source
# File lib/ruby_lexer.rb, line 481
def tokadd_escape term # 20 lines
  case
  when src.scan(/\\\n/) then
    # just ignore
  when src.scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
    self.string_buffer << src.matched
  when src.scan(/\\([MC]-|c)(?=\\)/) then
    self.string_buffer << src.matched
    self.tokadd_escape term
  when src.scan(/\\([MC]-|c)(.)/) then
    self.string_buffer << src.matched
  when src.scan(/\\[McCx]/) then
    rb_compile_error "Invalid escape character syntax"
  when src.scan(/\\(.)/) then
    self.string_buffer << src.matched
  else
    rb_compile_error "Invalid escape character syntax"
  end
end
tokadd_string(func, term, paren) click to toggle source
# File lib/ruby_lexer.rb, line 501
def tokadd_string(func, term, paren) # 105 lines
  awords = (func & STR_FUNC_AWORDS) != 0
  escape = (func & STR_FUNC_ESCAPE) != 0
  expand = (func & STR_FUNC_EXPAND) != 0
  regexp = (func & STR_FUNC_REGEXP) != 0
  symbol = (func & STR_FUNC_SYMBOL) != 0

  paren_re = paren.nil? ? nil : Regexp.new(Regexp.escape(paren))
  term_re  = Regexp.new(Regexp.escape(term))

  until src.eos? do
    c = nil
    handled = true
    case
    when self.nest == 0 && src.scan(term_re) then
      src.pos -= 1
      break
    when paren_re && src.scan(paren_re) then
      self.nest += 1
    when src.scan(term_re) then
      self.nest -= 1
    when awords && src.scan(/\s/) then
      src.pos -= 1
      break
    when expand && src.scan(/#(?=[\$\@\{])/) then
      src.pos -= 1
      break
    when expand && src.scan(/#(?!\n)/) then
      # do nothing
    when src.check(/\\/) then
      case
      when awords && src.scan(/\\\n/) then
        string_buffer << "\n"
        next
      when awords && src.scan(/\\\s/) then
        c = ' '
      when expand && src.scan(/\\\n/) then
        next
      when regexp && src.check(/\\/) then
        self.tokadd_escape term
        next
      when expand && src.scan(/\\/) then
        c = self.read_escape
      when src.scan(/\\\n/) then
        # do nothing
      when src.scan(/\\\\/) then
        string_buffer << '\' if escape
        c = '\'
      when src.scan(/\\/) then
        unless src.scan(term_re) || paren.nil? || src.scan(paren_re) then
          string_buffer << "\\"
        end
      else
        handled = false
      end
    else
      handled = false
    end # case

    unless handled then

      t = Regexp.escape term
      x = Regexp.escape(paren) if paren && paren != "\0000"
      re = if awords then
             /[^#{t}#{x}\#\00\\\\n\ ]+|./ # |. to pick up whatever
           else
             /[^#{t}#{x}\#\00\\\]+|./
           end

      src.scan re
      c = src.matched

      rb_compile_error "symbol cannot contain '\\0'" if symbol && c =~ /\00//
    end # unless handled

    c ||= src.matched
    string_buffer << c
  end # until

  c ||= src.matched
  c = RubyLexer::EOF if src.eos?


  return c
end
unescape(s) click to toggle source
# File lib/ruby_lexer.rb, line 587
def unescape s

  r = {
    "a"    => "\0007",
    "b"    => "\0010",
    "e"    => "\0033",
    "f"    => "\f",
    "n"    => "\n",
    "r"    => "\r",
    "s"    => " ",
    "t"    => "\t",
    "v"    => "\113"",
    "\\"   => '\',
    "\n"   => "",
    "C-\?" => 127.chr,
    "c\?"  => 127.chr,
  }[s]

  return r if r

  case s
  when /^[0-7]{1,3}/ then
    $&.to_i(8).chr
  when /^x([0-9a-fA-F]{1,2})/ then
    $1.to_i(16).chr
  when /^M-(.)/ then
    ($1[0].ord | 0x80).chr
  when /^(C-|c)(.)/ then
    ($2[0].ord & 0x9f).chr
  when /^[McCx0-9]/ then
    rb_compile_error("Invalid escape character syntax")
  else
    s
  end
end
warning(s) click to toggle source
# File lib/ruby_lexer.rb, line 623
def warning s
  # do nothing for now
end
yylex() click to toggle source

Returns the next token. Also sets yy_val is needed.

@return Description of the Returned Value

# File lib/ruby_lexer.rb, line 632
def yylex # 826 lines

  c = ''
  space_seen = false
  command_state = false
  src = self.src

  self.token = nil
  self.yacc_value = nil

  return yylex_string if lex_strterm

  command_state = self.command_start
  self.command_start = false

  last_state = lex_state

  loop do # START OF CASE
    if src.scan(/[\ \t\r\f\v]/) then # \s - \n + \v
      space_seen = true
      next
    elsif src.check(/[^a-zA-Z]/) then
      if src.scan(/\n|#/) then
        self.lineno = nil
        c = src.matched
        if c == '#' then
          src.pos -= 1

          while src.scan(/\s*#.*(\n+|\z)/) do
            @comments << src.matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
          end

          if src.eos? then
            return RubyLexer::EOF
          end
        end

        # Replace a string of newlines with a single one
        src.scan(/\n+/)

        if [:expr_beg, :expr_fname,
            :expr_dot, :expr_class].include? lex_state then
          next
        end

        self.command_start = true
        self.lex_state = :expr_beg
        return :tNL
      elsif src.scan(/[\]\)\}]/) then
        cond.lexpop
        cmdarg.lexpop
        self.lex_state = :expr_end
        self.yacc_value = src.matched
        result = {
          ")" => :tRPAREN,
          "]" => :tRBRACK,
          "}" => :tRCURLY
        }[src.matched]
        return result
      elsif src.scan(/\.\.\.?|,|![=~]?/) then
        self.lex_state = :expr_beg
        tok = self.yacc_value = src.matched
        return TOKENS[tok]
      elsif src.check(/\./) then
        if src.scan(/\.\d/) then
          rb_compile_error "no .<digit> floating literal anymore put 0 before dot"
        elsif src.scan(/\./) then
          self.lex_state = :expr_dot
          self.yacc_value = "."
          return :tDOT
        end
      elsif src.scan(/\(/) then
        result = :tLPAREN2
        self.command_start = true

        if lex_state == :expr_beg || lex_state == :expr_mid then
          result = :tLPAREN
        elsif space_seen then
          if lex_state == :expr_cmdarg then
            result = :tLPAREN_ARG
          elsif lex_state == :expr_arg then
            warning("don't put space before argument parentheses")
            result = :tLPAREN2
          end
        end

        self.expr_beg_push "("

        return result
      elsif src.check(/\=/) then
        if src.scan(/\=\=\=|\=\=|\=~|\=>|\=(?!begin\b)/) then
          self.fix_arg_lex_state
          tok = self.yacc_value = src.matched
          return TOKENS[tok]
        elsif src.scan(/\=begin(?=\s)/) then
          # @comments << '=' << src.matched
          @comments << src.matched

          unless src.scan(/.*?\n=end( |\t|\f)*[^\n]*(\n|\z)/) then
            @comments.clear
            rb_compile_error("embedded document meets end of file")
          end

          @comments << src.matched

          next
        else
          raise "you shouldn't be able to get here"
        end
      elsif src.scan(/\"(#{ESC_RE}|#(#{ESC_RE}|[^\{\#\@\$\"\\])|[^\"\\\#])*\"/) then
        self.yacc_value = src.matched[1..-2].gsub(ESC_RE) { unescape $1 }
        self.lex_state = :expr_end
        return :tSTRING
      elsif src.scan(/\"/) then # FALLBACK
        self.lex_strterm = [:strterm, STR_DQUOTE, '"', "\00""] # TODO: question this
        self.yacc_value = "\""
        return :tSTRING_BEG
      elsif src.scan(/\@\@?\w*/) then
        self.token = src.matched

        rb_compile_error "`#{token}` is not allowed as a variable name" if
          token =~ /\@\d/

        return process_token(command_state)
      elsif src.scan(/\:\:/) then
        if (lex_state == :expr_beg ||
            lex_state == :expr_mid ||
            lex_state == :expr_class ||
            (lex_state.is_argument && space_seen)) then
          self.lex_state = :expr_beg
          self.yacc_value = "::"
          return :tCOLON3
        end

        self.lex_state = :expr_dot
        self.yacc_value = "::"
        return :tCOLON2
      elsif lex_state != :expr_end && lex_state != :expr_endarg && src.scan(/:([a-zA-Z_]\w*(?:[?!]|=(?!>))?)/) then
        self.yacc_value = src[1]
        self.lex_state = :expr_end
        return :tSYMBOL
      elsif src.scan(/\:/) then
        # ?: / then / when
        if (lex_state == :expr_end || lex_state == :expr_endarg||
            src.check(/\s/)) then
          self.lex_state = :expr_beg
          self.yacc_value = ":"
          return :tCOLON
        end

        case
        when src.scan(/\'/) then
          self.lex_strterm = [:strterm, STR_SSYM, src.matched, "\00""]
        when src.scan(/\"/) then
          self.lex_strterm = [:strterm, STR_DSYM, src.matched, "\00""]
        end

        self.lex_state = :expr_fname
        self.yacc_value = ":"
        return :tSYMBEG
      elsif src.check(/[0-9]/) then
        return parse_number
      elsif src.scan(/\[/) then
        result = src.matched

        if lex_state == :expr_fname || lex_state == :expr_dot then
          self.lex_state = :expr_arg
          case
          when src.scan(/\]\=/) then
            self.yacc_value = "[]="
            return :tASET
          when src.scan(/\]/) then
            self.yacc_value = "[]"
            return :tAREF
          else
            rb_compile_error "unexpected '['"
          end
        elsif lex_state == :expr_beg || lex_state == :expr_mid then
          result = :tLBRACK
        elsif lex_state.is_argument && space_seen then
          result = :tLBRACK
        end

        self.expr_beg_push "["

        return result
      elsif src.scan(/\'(\\.|[^\'])*\'/) then
        self.yacc_value = src.matched[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'")
        self.lex_state = :expr_end
        return :tSTRING
      elsif src.check(/\|/) then
        if src.scan(/\|\|\=/) then
          self.lex_state = :expr_beg
          self.yacc_value = "||"
          return :tOP_ASGN
        elsif src.scan(/\|\|/) then
          self.lex_state = :expr_beg
          self.yacc_value = "||"
          return :tOROP
        elsif src.scan(/\|\=/) then
          self.lex_state = :expr_beg
          self.yacc_value = "|"
          return :tOP_ASGN
        elsif src.scan(/\|/) then
          self.fix_arg_lex_state
          self.yacc_value = "|"
          return :tPIPE
        end
      elsif src.scan(/\{/) then
        result = if lex_state.is_argument || lex_state == :expr_end then
                   :tLCURLY      #  block (primary)
                 elsif lex_state == :expr_endarg then
                   :tLBRACE_ARG  #  block (expr)
                 else
                   :tLBRACE      #  hash
                 end

        self.expr_beg_push "{"
        self.command_start = true unless result == :tLBRACE

        return result
      elsif src.scan(/[+-]/) then
        sign = src.matched
        utype, type = if sign == "+" then
                        [:tUPLUS, :tPLUS]
                      else
                        [:tUMINUS, :tMINUS]
                      end

        if lex_state == :expr_fname || lex_state == :expr_dot then
          self.lex_state = :expr_arg
          if src.scan(/@/) then
            self.yacc_value = "#{sign}@"
            return utype
          else
            self.yacc_value = sign
            return type
          end
        end

        if src.scan(/\=/) then
          self.lex_state = :expr_beg
          self.yacc_value = sign
          return :tOP_ASGN
        end

        if (lex_state == :expr_beg || lex_state == :expr_mid ||
            (lex_state.is_argument && space_seen && !src.check(/\s/))) then
          if lex_state.is_argument then
            arg_ambiguous
          end

          self.lex_state = :expr_beg
          self.yacc_value = sign

          if src.check(/\d/) then
            if utype == :tUPLUS then
              return self.parse_number
            else
              return :tUMINUS_NUM
            end
          end

          return utype
        end

        self.lex_state = :expr_beg
        self.yacc_value = sign
        return type
      elsif src.check(/\*/) then
        if src.scan(/\*\*=/) then
          self.lex_state = :expr_beg
          self.yacc_value = "**"
          return :tOP_ASGN
        elsif src.scan(/\*\*/) then
          self.yacc_value = "**"
          self.fix_arg_lex_state
          return :tPOW
        elsif src.scan(/\*\=/) then
          self.lex_state = :expr_beg
          self.yacc_value = "*"
          return :tOP_ASGN
        elsif src.scan(/\*/) then
          result = if lex_state.is_argument && space_seen && src.check(/\S/) then
                     warning("`*' interpreted as argument prefix")
                     :tSTAR
                   elsif lex_state == :expr_beg || lex_state == :expr_mid then
                     :tSTAR
                   else
                     :tSTAR2
                   end
          self.yacc_value = "*"
          self.fix_arg_lex_state

          return result
        end
      elsif src.check(/\</) then
        if src.scan(/\<\=\>/) then
          self.fix_arg_lex_state
          self.yacc_value = "<=>"
          return :tCMP
        elsif src.scan(/\<\=/) then
          self.fix_arg_lex_state
          self.yacc_value = "<="
          return :tLEQ
        elsif src.scan(/\<\<\=/) then
          self.fix_arg_lex_state
          self.lex_state = :expr_beg
          self.yacc_value = "\<\<"
          return :tOP_ASGN
        elsif src.scan(/\<\</) then
          if (! [:expr_end,    :expr_dot,
                 :expr_endarg, :expr_class].include?(lex_state) &&
              (!lex_state.is_argument || space_seen)) then
            tok = self.heredoc_identifier
            if tok then
              return tok
            end
          end

          self.fix_arg_lex_state
          self.yacc_value = "\<\<"
          return :tLSHFT
        elsif src.scan(/\</) then
          self.fix_arg_lex_state
          self.yacc_value = "<"
          return :tLT
        end
      elsif src.check(/\>/) then
        if src.scan(/\>\=/) then
          self.fix_arg_lex_state
          self.yacc_value = ">="
          return :tGEQ
        elsif src.scan(/\>\>=/) then
          self.fix_arg_lex_state
          self.lex_state = :expr_beg
          self.yacc_value = ">>"
          return :tOP_ASGN
        elsif src.scan(/\>\>/) then
          self.fix_arg_lex_state
          self.yacc_value = ">>"
          return :tRSHFT
        elsif src.scan(/\>/) then
          self.fix_arg_lex_state
          self.yacc_value = ">"
          return :tGT
        end
      elsif src.scan(/\`/) then
        self.yacc_value = "`"
        case lex_state
        when :expr_fname then
          self.lex_state = :expr_end
          return :tBACK_REF2
        when :expr_dot then
          self.lex_state = if command_state then
                             :expr_cmdarg
                           else
                             :expr_arg
                           end
          return :tBACK_REF2
        end
        self.lex_strterm = [:strterm, STR_XQUOTE, '`', "\00""]
        return :tXSTRING_BEG
      elsif src.scan(/\?/) then
        if lex_state == :expr_end || lex_state == :expr_endarg then
          self.lex_state = :expr_beg
          self.yacc_value = "?"
          return :tEH
        end

        if src.eos? then
          rb_compile_error "incomplete character syntax"
        end

        if src.check(/\s|\v/) then
          unless lex_state.is_argument then
            c2 = { " " => 's',
                  "\n" => 'n',
                  "\t" => 't',
                  "\v" => 'v',
                  "\r" => 'r',
                  "\f" => 'f' }[src.matched]

            if c2 then
              warning("invalid character syntax; use ?\\" + c2)
            end
          end

          # ternary
          self.lex_state = :expr_beg
          self.yacc_value = "?"
          return :tEH
        elsif src.check(/\w(?=\w)/) then # ternary, also
          self.lex_state = :expr_beg
          self.yacc_value = "?"
          return :tEH
        end

        c = if src.scan(/\\/) then
              self.read_escape
            else
              src.getch
            end
        self.lex_state = :expr_end
        self.yacc_value = c[0].ord & 0xff
        return :tINTEGER
      elsif src.check(/\&/) then
        if src.scan(/\&\&\=/) then
          self.yacc_value = "&&"
          self.lex_state = :expr_beg
          return :tOP_ASGN
        elsif src.scan(/\&\&/) then
          self.lex_state = :expr_beg
          self.yacc_value = "&&"
          return :tANDOP
        elsif src.scan(/\&\=/) then
          self.yacc_value = "&"
          self.lex_state = :expr_beg
          return :tOP_ASGN
        elsif src.scan(/&/) then
          result = if lex_state.is_argument && space_seen &&
                       !src.check(/\s/) then
                     warning("`&' interpreted as argument prefix")
                     :tAMPER
                   elsif lex_state == :expr_beg || lex_state == :expr_mid then
                     :tAMPER
                   else
                     :tAMPER2
                   end

          self.fix_arg_lex_state
          self.yacc_value = "&"
          return result
        end
      elsif src.scan(/\//) then
        if lex_state == :expr_beg || lex_state == :expr_mid then
          self.lex_strterm = [:strterm, STR_REGEXP, '/', "\00""]
          self.yacc_value = "/"
          return :tREGEXP_BEG
        end

        if src.scan(/\=/) then
          self.yacc_value = "/"
          self.lex_state = :expr_beg
          return :tOP_ASGN
        end

        if lex_state.is_argument && space_seen then
          unless src.scan(/\s/) then
            arg_ambiguous
            self.lex_strterm = [:strterm, STR_REGEXP, '/', "\00""]
            self.yacc_value = "/"
            return :tREGEXP_BEG
          end
        end

        self.fix_arg_lex_state
        self.yacc_value = "/"

        return :tDIVIDE
      elsif src.scan(/\^=/) then
        self.lex_state = :expr_beg
        self.yacc_value = "^"
        return :tOP_ASGN
      elsif src.scan(/\^/) then
        self.fix_arg_lex_state
        self.yacc_value = "^"
        return :tCARET
      elsif src.scan(/\;/) then
        self.command_start = true
        self.lex_state = :expr_beg
        self.yacc_value = ";"
        return :tSEMI
      elsif src.scan(/\~/) then
        if lex_state == :expr_fname || lex_state == :expr_dot then
          src.scan(/@/)
        end

        self.fix_arg_lex_state
        self.yacc_value = "~"

        return :tTILDE
      elsif src.scan(/\\/) then
        if src.scan(/\n/) then
          self.lineno = nil
          space_seen = true
          next
        end
        rb_compile_error "bare backslash only allowed before newline"
      elsif src.scan(/\%/) then
        if lex_state == :expr_beg || lex_state == :expr_mid then
          return parse_quote
        end

        if src.scan(/\=/) then
          self.lex_state = :expr_beg
          self.yacc_value = "%"
          return :tOP_ASGN
        end

        if lex_state.is_argument && space_seen && ! src.check(/\s/) then
          return parse_quote
        end

        self.fix_arg_lex_state
        self.yacc_value = "%"

        return :tPERCENT
      elsif src.check(/\$/) then
        if src.scan(/(\$_)(\w+)/) then
          self.lex_state = :expr_end
          self.token = src.matched
          return process_token(command_state)
        elsif src.scan(/\$_/) then
          self.lex_state = :expr_end
          self.token = src.matched
          self.yacc_value = src.matched
          return :tGVAR
        elsif src.scan(/\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then
          self.lex_state = :expr_end
          self.yacc_value = src.matched
          return :tGVAR
        elsif src.scan(/\$([\&\`\'\+])/) then
          self.lex_state = :expr_end
          # Explicit reference to these vars as symbols...
          if last_state == :expr_fname then
            self.yacc_value = src.matched
            return :tGVAR
          else
            self.yacc_value = src[1].to_sym
            return :tBACK_REF
          end
        elsif src.scan(/\$([1-9]\d*)/) then
          self.lex_state = :expr_end
          if last_state == :expr_fname then
            self.yacc_value = src.matched
            return :tGVAR
          else
            self.yacc_value = src[1].to_i
            return :tNTH_REF
          end
        elsif src.scan(/\$0/) then
          self.lex_state = :expr_end
          self.token = src.matched
          return process_token(command_state)
        elsif src.scan(/\$\W|\$\z/) then # TODO: remove?
          self.lex_state = :expr_end
          self.yacc_value = "$"
          return "$"
        elsif src.scan(/\$\w+/)
          self.lex_state = :expr_end
          self.token = src.matched
          return process_token(command_state)
        end
      elsif src.check(/\_/) then
        if src.beginning_of_line? && src.scan(/\__END__(\n|\Z)/) then
          self.lineno = nil
          return RubyLexer::EOF
        elsif src.scan(/\_\w*/) then
          self.token = src.matched
          return process_token(command_state)
        end
      end
    end # END OF CASE

    if src.scan(/\0004|\0032|\0000/) || src.eos? then # ^D, ^Z, EOF
      return RubyLexer::EOF
    else # alpha check
      if src.scan(/\W/) then
        rb_compile_error "Invalid char #{src.matched.inspect} in expression"
      end
    end

    self.token = src.matched if self.src.scan(/\w+/)

    return process_token(command_state)
  end
end
yylex_string() click to toggle source
# File lib/ruby_lexer.rb, line 1298
def yylex_string # 23 lines
  token = if lex_strterm[0] == :heredoc then
            self.heredoc lex_strterm
          else
            self.parse_string lex_strterm
          end

  if token == :tSTRING_END || token == :tREGEXP_END then
    self.lineno      = nil
    self.lex_strterm = nil
    self.lex_state   = :expr_end
  end

  return token
end

[Validate]

Generated with the Darkfish Rdoc Generator 2.