/**
 * Copyright (c) 2010, Benjamin Joffe
 * http://www.benjoffe.com/
 *
 * All rights reserved.
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted for NON-COMMERSIAL PURPOSES  providedthat the
 * following conditions are met:
 *
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR AND CONTRIBUTORS BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

function lexer(script)
{
  var tokens = [];
  var buffer = '';
  var c = '';
  var k = '';
  
  var modeStack = []; // true indicates block stack
  var MODES = {
    BLOCK: 1,    // { statements; }
    OBJECT: 2,   // { a: 1, b: 2 }
    TERTIARY: 3, // a ? b : c;
    PARENS: 4,   // (statement)
    ARRAY: 5,    // [1,2,4]
    VARS: 6      // var a,b,c;
  }
  
  var i=0;
  var j=0;
  
  var lastToken = null;
  
  c = script[0];
  
  while (c)
  {
    if (c in WHITESPACE)
    { 
      buffer = '';
      
      do {
        buffer += c;
        c = script[++i];
      }
      while (c in WHITESPACE);
      
      tokens[tokens.length] = {type:TYPES.WHITESPACE,value:buffer};
      continue;
    }
    
    if (c in STRING_DELIMITER)
    {
      buffer = k = c;
      c = script[++i];
      while (c && !(c in LINETERMINATOR)) // check for newline ensures invalid js is not unreadable
      {
        buffer += c;
        if (c==k)
        {
          break;
        }
        if (c=='\\')
        {
          buffer += c = script[++i];
          if (c=='\r')
          {
            c = script[++i];
            if (c=='\n') // backslash escapes two characters in the case of \r\n
            {
              buffer += script[++i];
              c = script[++i];
            }
            continue;
          }
        }
        c = script[++i];
      }
      lastToken = tokens[tokens.length] = {type:TYPES.STRING,value:buffer};
      c = script[++i];
      continue;
    }
    
    if (c in PUNCTUATOR)
    {
      buffer = c;
      c = script[++i];
      
      if (buffer=='/')
      {
        if (c=='/') // single line comment
        {
          do {
            buffer += c;
            c = script[++i];
          }
          while (c && !(c in LINETERMINATOR));
          tokens[tokens.length] = {type:TYPES.COMMENT,value:buffer};
          continue;
        }
        
        if (c=='*') // multi line comment
        {
          j = script.indexOf('*/', i+1);
          if (j<1)
          {
            alert('Error, comment does not end.');
            return [];
          }
          tokens[tokens.length] = {type:TYPES.COMMENT,value:script.substring(i-1,j+2)};
          
          c = script[i=j+2];
          continue;
        }
        
        // regexp
        if (!lastToken ||
            lastToken.type == TYPES.COMMAND ||
            lastToken.type == TYPES.BLOCK ||
            lastToken.type == TYPES.PUNCTUATOR &&
              lastToken.value != '++' &&
              lastToken.value != '--' &&
              lastToken.value != ')' &&
              lastToken.value != ']' &&
              lastToken.value != '}')
        {
          
          while (c && !(c in LINETERMINATOR)) // check for newline ensures invalid js is not unreadable
          {
            buffer += c;
            
            if (c=='\\')
            {
              c = script[++i];
              buffer += c;
            }
            else
            {
              if (c=='[')
              {
                c = script[++i];
                while (c && (buffer += c, c!=']'))
                {
                  if (c == ']')
                  {
                    break;
                  }
                  if (c=='\\')
                  {
                    c = script[++i];
                    buffer += c;
                  }
                  c = script[++i];
                }
                
              }
              else
              {
                if (c=='/')
                {
                  c = script[++i];
                  while (c in REG_EXP_FLAG)
                  {
                    buffer += c;
                    c = script[++i];
                  }
                  break;
                }
              }
            }
            c = script[++i];
          }
          lastToken = tokens[tokens.length] = {type:TYPES.REGEXP, value: buffer};
          continue;
        }
      }
      
      if (buffer == '?')
      {
        // tertiary statement
        modeStack.push(MODES.TERTIARY);
        lastToken = tokens[tokens.length] = {type:TYPES.PUNCTUATOR,value: buffer};
        continue;
      }
      if (buffer == ':' && modeStack[modeStack.length-1] == MODES.TERTIARY)
      {
        modeStack.pop();
        lastToken = tokens[tokens.length] = {type:TYPES.PUNCTUATOR,value: buffer,
          mode: MODES.TERTIARY // todo: find cleaner way
        };
        continue;
      }

      if (buffer == '.' && (c in NUMBER))
      {
        c = '.'; // we are in the wrong place, get to number
        i--;
      }
      else
      {
        // actual punctuator and not a funky '/' or number starting with '.'
        
        // distinguish between block levels and object literals
        if (buffer=='}' || buffer=='{')
        {
          k = MODES.BLOCK;
          if (buffer=='{')
          {
            if (lastToken && (
              lastToken.type == TYPES.PUNCTUATOR && (
                lastToken.value != '}' &&
                lastToken.value != ')' &&
                lastToken.value != ']' &&
                lastToken.value != '++' &&
                lastToken.value != '--' &&
                lastToken.value != '{' &&
                lastToken.value != ';' &&
                (lastToken.value != ':' ||
                 lastToken.mode == MODES.TERTIARY || 
                 modeStack[modeStack.length-1] == MODES.OBJECT
                )
              )
            ))
            {
              k = MODES.OBJECT;
            }
            else
            {
              k = MODES.BLOCK;
            }
            modeStack[modeStack.length] = k;
          }
          else
          {
            k = modeStack.pop() || MODES.BLOCK;
          }
          lastToken = tokens[tokens.length] = {
            type: k==MODES.BLOCK ? TYPES.BLOCK : TYPES.PUNCTUATOR,
            value: buffer
          };
          continue;
        }
        
        switch (buffer)
        {
          case '(' : {
            modeStack.push(MODES.PARENS);
            break;
          }
          case ')' : {
            if (modeStack[modeStack.length-1] == MODES.PARENS)
            {
              modeStack.pop();
            }
            break;
          }
          case '[' : {
            modeStack.push(MODES.ARRAY);
            break;
          }
          case ']' : {
            if (modeStack[modeStack.length-1] == MODES.ARRAY)
            {
              modeStack.pop();
            }
            break;
          }
        }
        
        // check for multi character punctuators
        if (c in PUNCTUATOR)
        {
          if (c=='=')
          {
            if (buffer=='<' || buffer=='>' ||
                buffer=='+' || buffer=='-' ||
                buffer=='*' || buffer=='/' ||
                buffer=='&' || buffer=='|' ||
                buffer=='%' || buffer=='^')
            {
              buffer += c;
              c = script[++i];
            }
            else
            {
              if (buffer=='=' || buffer=='!')
              {
                // '!=' and '==' tokens
                buffer += c;
                c = script[++i];
                if (c=='=')
                {
                  // '!==' and '===' tokens
                  buffer += c;
                  c = script[++i];
                }
              }
            }
          }
          else
          {
            if ((buffer+c) in {'++':1,'--':1,'&&':1,'||':1})
            {
              buffer += c;
              c = script[++i];
            }
            else
            {
              if ((buffer=='<' || buffer=='>') && c==buffer)
              {
                k = c;
                buffer += c;
                c = script[++i];
                if (k=='>' && c=='>')
                {
                  // can have >>> and >>>= but not <<<
                  buffer += '>';
                  c = script[++i];
                }
                if (c=='=')
                {
                  buffer += '=';
                  c = script[++i];
                }
              }
            }
          }
        }
        
        lastToken = tokens[tokens.length] = {type:TYPES.PUNCTUATOR,value: buffer};
        continue;
      }
    }
    
    if (c in NUMBER || c=='.' ) // if c is a dot by here it is not a punctuator
    {
      buffer = c;
      c = script[++i];
      
      if (buffer == '0')
      {
        
        if (c == 'x' || c=='X')
        {
          // HEX INTEGER
          
          do {
            buffer += c;
            c = script[++i];
          }
          while (c in HEX_NUMBER);
          
          lastToken = tokens[tokens.length] = {type:TYPES.NUMBER,value:buffer}
          continue;
        }
        
        if (c in NUMBER)
        {
          // OCTAL INTEGER
          
          do {
            buffer += c;
            c = script[++i];
          }
          while (c in NUMBER);
          
          lastToken = tokens[tokens.length] = {type:TYPES.NUMBER,value:buffer}
          continue;
        }
      }
      
      // DEC NUMBER
      
      if (buffer=='.') // starts with decimal
      {
        // digits after decimal
        while (c in NUMBER)
        {
          buffer += c;
          c = script[++i];
        }
      }
      else
      {
        // normal number starting with a digit
        while (c in NUMBER)
        {
          buffer += c;
          c = script[++i];
        }
        
        // digits after decimal
        if (c=='.')
        {
          do
          {
            buffer += c;
            c = script[++i];
          }
          while (c in NUMBER)
        }
      }
      
      // exponent part
      if (c=='e' || c=='E')
      {
        buffer += c;
        c = script[++i];
        if (c=='+' || c=='-')
        {
          buffer += c;
          c = script[++i];
        }
        while (c in NUMBER)
        {
          buffer += c;
          c = script[++i];
        }
      }
      
      lastToken = tokens[tokens.length] = {type:TYPES.NUMBER,value:buffer}
      continue;
    }
    
    if (c in LINETERMINATOR)
    {
      buffer = c;
      c = script[++i];
      if (buffer == '\r' && c=='\n')
      {
        buffer += c;
        c = script[++i];
      }
      tokens[tokens.length] = {type:TYPES.NEWLINE,value:buffer};
      continue;
    }
    
    // else identifier
    buffer = c;
    c = script[++i];
    while (c && !(c in WHITESPACE) &&
           !(c in LINETERMINATOR) &&
           !(c in PUNCTUATOR) &&
           !(c in STRING_DELIMITER)
    )
    {
      buffer += c;
      c = script[++i];
    }
    
    if (buffer in COMMAND) // eg: 'if', 'for', 'function'
    {
      lastToken = tokens[tokens.length] = { type: TYPES.COMMAND, value: buffer };
      continue;
    }
    
    if (buffer in KEYWORD) // eg: 'this', 'true', 'null'
    {
      lastToken = tokens[tokens.length] = { type: TYPES.KEYWORD, value: buffer };
      continue;
    }
    
    // else identifier
    lastToken = tokens[tokens.length] = { type: TYPES.IDENTIFIER, value: buffer };
  }
  
  return tokens;
}
