__author__ = 'Ka-Ping Yee <ping@lfw.org>'
__credits__ = 'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro, Raymond Hettinger, Trent Nelson, Michael Foord'
from builtins import open as _builtin_open
from codecs import lookup, BOM_UTF8
import collections
from io import TextIOWrapper
from itertools import chain
import itertools as _itertools
import re
import sys
from token import *
cookie_re = re.compile('^[ \\t\\f]*#.*?coding[:=][ \\t]*([-\\w.]+)', re.ASCII)
blank_re = re.compile(b'^[ \\t\\f]*(?:[#\\r\\n]|$)', re.ASCII)
import token
__all__ = token.__all__ + ['tokenize', 'detect_encoding', 'untokenize', 'TokenInfo']
del token
EXACT_TOKEN_TYPES = {'(': LPAR, ')': RPAR, '[': LSQB, ']': RSQB, ':': COLON, ',': COMMA, ';': SEMI, '+': PLUS, '-': MINUS, '*': STAR, '/': SLASH, '|': VBAR, '&': AMPER, '<': LESS, '>': GREATER, '=': EQUAL, '.': DOT, '%': PERCENT, '{': LBRACE, '}': RBRACE, '==': EQEQUAL, '!=': NOTEQUAL, '<=': LESSEQUAL, '>=': GREATEREQUAL, '~': TILDE, '^': CIRCUMFLEX, '<<': LEFTSHIFT, '>>': RIGHTSHIFT, '**': DOUBLESTAR, '+=': PLUSEQUAL, '-=': MINEQUAL, '*=': STAREQUAL, '/=': SLASHEQUAL, '%=': PERCENTEQUAL, '&=': AMPEREQUAL, '|=': VBAREQUAL, '^=': CIRCUMFLEXEQUAL, '<<=': LEFTSHIFTEQUAL, '>>=': RIGHTSHIFTEQUAL, '**=': DOUBLESTAREQUAL, '//': DOUBLESLASH, '//=': DOUBLESLASHEQUAL, '...': ELLIPSIS, '->': RARROW, '@': AT, '@=': ATEQUAL}

class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line')):

    def __repr__(self):
        annotated_type = '%d (%s)' % (self.type, tok_name[self.type])
        return 'TokenInfo(type=%s, string=%r, start=%r, end=%r, line=%r)' % self._replace(type=annotated_type)

    @property
    def exact_type(self):
        if self.type == OP and self.string in EXACT_TOKEN_TYPES:
            return EXACT_TOKEN_TYPES[self.string]
        else:
            return self.type


def group(*choices):
    return '(' + '|'.join(choices) + ')'


def any(*choices):
    return group(*choices) + '*'


def maybe(*choices):
    return group(*choices) + '?'

Whitespace = '[ \\f\\t]*'
Comment = '#[^\\r\\n]*'
Ignore = Whitespace + any('\\\\\\r?\\n' + Whitespace) + maybe(Comment)
Name = '\\w+'
Hexnumber = '0[xX](?:_?[0-9a-fA-F])+'
Binnumber = '0[bB](?:_?[01])+'
Octnumber = '0[oO](?:_?[0-7])+'
Decnumber = '(?:0(?:_?0)*|[1-9](?:_?[0-9])*)'
Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
Exponent = '[eE][-+]?[0-9](?:_?[0-9])*'
Pointfloat = group('[0-9](?:_?[0-9])*\\.(?:[0-9](?:_?[0-9])*)?', '\\.[0-9](?:_?[0-9])*') + maybe(Exponent)
Expfloat = '[0-9](?:_?[0-9])*' + Exponent
Floatnumber = group(Pointfloat, Expfloat)
Imagnumber = group('[0-9](?:_?[0-9])*[jJ]', Floatnumber + '[jJ]')
Number = group(Imagnumber, Floatnumber, Intnumber)

def _all_string_prefixes():
    _valid_string_prefixes = ['b', 'r', 'u', 'f', 'br', 'fr']
    result = {''}
    for prefix in _valid_string_prefixes:
        for t in _itertools.permutations(prefix):
            for u in _itertools.product(*[(c, c.upper()) for c in t]):
                result.add(''.join(u))
    return result


def _compile(expr):
    return re.compile(expr, re.UNICODE)

StringPrefix = group(*_all_string_prefixes())
Single = "[^'\\\\]*(?:\\\\.[^'\\\\]*)*'"
Double = '[^"\\\\]*(?:\\\\.[^"\\\\]*)*"'
Single3 = "[^'\\\\]*(?:(?:\\\\.|'(?!''))[^'\\\\]*)*'''"
Double3 = '[^"\\\\]*(?:(?:\\\\.|"(?!""))[^"\\\\]*)*"""'
Triple = group(StringPrefix + "'''", StringPrefix + '"""')
String = group(StringPrefix + "'[^\\n'\\\\]*(?:\\\\.[^\\n'\\\\]*)*'", StringPrefix + '"[^\\n"\\\\]*(?:\\\\.[^\\n"\\\\]*)*"')
Operator = group('\\*\\*=?', '>>=?', '<<=?', '!=', '//=?', '->', '[+\\-*/%&@|^=<>]=?', '~')
Bracket = '[][(){}]'
Special = group('\\r?\\n', '\\.\\.\\.', '[:;.,@]')
Funny = group(Operator, Bracket, Special)
PlainToken = group(Number, Funny, String, Name)
Token = Ignore + PlainToken
ContStr = group(StringPrefix + "'[^\\n'\\\\]*(?:\\\\.[^\\n'\\\\]*)*" + group("'", '\\\\\\r?\\n'), StringPrefix + '"[^\\n"\\\\]*(?:\\\\.[^\\n"\\\\]*)*' + group('"', '\\\\\\r?\\n'))
PseudoExtras = group('\\\\\\r?\\n|\\Z', Comment, Triple)
PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
endpats = {}
for _prefix in _all_string_prefixes():
    endpats[_prefix + "'"] = Single
    endpats[_prefix + '"'] = Double
    endpats[_prefix + "'''"] = Single3
    endpats[_prefix + '"""'] = Double3
single_quoted = set()
triple_quoted = set()
for t in _all_string_prefixes():
    for u in (t + '"', t + "'"):
        single_quoted.add(u)
    for u in (t + '"""', t + "'''"):
        triple_quoted.add(u)
tabsize = 8

class TokenError(Exception):
    pass


class StopTokenizing(Exception):
    pass


class Untokenizer:

    def __init__(self):
        self.tokens = []
        self.prev_row = 1
        self.prev_col = 0
        self.encoding = None

    def add_whitespace(self, start):
        (row, col) = start
        if row < self.prev_row or row == self.prev_row and col < self.prev_col:
            raise ValueError('start ({},{}) precedes previous end ({},{})'.format(row, col, self.prev_row, self.prev_col))
        row_offset = row - self.prev_row
        if row_offset:
            self.tokens.append('\\\n'*row_offset)
            self.prev_col = 0
        col_offset = col - self.prev_col
        if col_offset:
            self.tokens.append(' '*col_offset)

    def untokenize(self, iterable):
        it = iter(iterable)
        indents = []
        startline = False
        for t in it:
            if len(t) == 2:
                self.compat(t, it)
                break
            (tok_type, token, start, end, line) = t
            if tok_type == ENCODING:
                self.encoding = token
            else:
                if tok_type == ENDMARKER:
                    break
                if tok_type == INDENT:
                    indents.append(token)
                elif tok_type == DEDENT:
                    indents.pop()
                    (self.prev_row, self.prev_col) = end
                elif tok_type in (NEWLINE, NL):
                    startline = True
                elif indents:
                    indent = indents[-1]
                    if start[1] >= len(indent):
                        self.tokens.append(indent)
                        self.prev_col = len(indent)
                    startline = False
                self.add_whitespace(start)
                self.tokens.append(token)
                (self.prev_row, self.prev_col) = end
                if tok_type in (NEWLINE, NL):
                    self.prev_row += 1
                    self.prev_col = 0
        return ''.join(self.tokens)

    def compat(self, token, iterable):
        indents = []
        toks_append = self.tokens.append
        startline = token[0] in (NEWLINE, NL)
        prevstring = False
        for tok in chain([token], iterable):
            (toknum, tokval) = tok[:2]
            if toknum == ENCODING:
                self.encoding = tokval
            else:
                if toknum in (NAME, NUMBER):
                    tokval += ' '
                if toknum == STRING:
                    if prevstring:
                        tokval = ' ' + tokval
                    prevstring = True
                else:
                    prevstring = False
                if toknum == INDENT:
                    indents.append(tokval)
                elif toknum == DEDENT:
                    indents.pop()
                elif toknum in (NEWLINE, NL):
                    startline = True
                elif indents:
                    toks_append(indents[-1])
                    startline = False
                toks_append(tokval)


def untokenize(iterable):
    ut = Untokenizer()
    out = ut.untokenize(iterable)
    if ut.encoding is not None:
        out = out.encode(ut.encoding)
    return out


def _get_normal_name(orig_enc):
    enc = orig_enc[:12].lower().replace('_', '-')
    if enc == 'utf-8' or enc.startswith('utf-8-'):
        return 'utf-8'
    elif enc in ('latin-1', 'iso-8859-1', 'iso-latin-1') or enc.startswith(('latin-1-', 'iso-8859-1-', 'iso-latin-1-')):
        return 'iso-8859-1'
    return orig_enc


def detect_encoding(readline):
    try:
        filename = readline.__self__.name
    except AttributeError:
        filename = None
    bom_found = False
    encoding = None
    default = 'utf-8'

    def read_or_stop():
        try:
            return readline()
        except StopIteration:
            return b''

    def find_cookie(line):
        try:
            line_string = line.decode('utf-8')
        except UnicodeDecodeError:
            msg = 'invalid or missing encoding declaration'
            if filename is not None:
                msg = '{} for {!r}'.format(msg, filename)
            raise SyntaxError(msg)
        match = cookie_re.match(line_string)
        if not match:
            return
        encoding = _get_normal_name(match.group(1))
        try:
            codec = lookup(encoding)
        except LookupError:
            if filename is None:
                msg = 'unknown encoding: ' + encoding
            else:
                msg = 'unknown encoding for {!r}: {}'.format(filename, encoding)
            raise SyntaxError(msg)
        if bom_found:
            if encoding != 'utf-8':
                if filename is None:
                    msg = 'encoding problem: utf-8'
                else:
                    msg = 'encoding problem for {!r}: utf-8'.format(filename)
                raise SyntaxError(msg)
            encoding += '-sig'
        return encoding

    first = read_or_stop()
    if first.startswith(BOM_UTF8):
        bom_found = True
        first = first[3:]
        default = 'utf-8-sig'
    if not first:
        return (default, [])
    encoding = find_cookie(first)
    if encoding:
        return (encoding, [first])
    if not blank_re.match(first):
        return (default, [first])
    second = read_or_stop()
    if not second:
        return (default, [first])
    encoding = find_cookie(second)
    if encoding:
        return (encoding, [first, second])
    return (default, [first, second])


def open(filename):
    buffer = _builtin_open(filename, 'rb')
    try:
        (encoding, lines) = detect_encoding(buffer.readline)
        buffer.seek(0)
        text = TextIOWrapper(buffer, encoding, line_buffering=True)
        text.mode = 'r'
        return text
    except:
        buffer.close()
        raise


def tokenize(readline):
    from itertools import chain, repeat
    (encoding, consumed) = detect_encoding(readline)
    rl_gen = iter(readline, b'')
    empty = repeat(b'')
    return _tokenize(chain(consumed, rl_gen, empty).__next__, encoding)


def _tokenize(readline, encoding):
    lnum = parenlev = continued = 0
    numchars = '0123456789'
    (contstr, needcont) = ('', 0)
    contline = None
    indents = [0]
    if encoding is not None:
        encoding = 'utf-8'
        yield TokenInfo(ENCODING, encoding, (0, 0), (0, 0), '')
    while True:
        try:
            line = readline()
        except StopIteration:
            line = b''
        line = line.decode(encoding)
        lnum += 1
        pos = 0
        max = len(line)
        if encoding is not None and contstr:
            if not line:
                raise TokenError('EOF in multi-line string', strstart)
            endmatch = endprog.match(line)
            if endmatch:
                pos = end = endmatch.end(0)
                yield TokenInfo(STRING, contstr + line[:end], strstart, (lnum, end), contline + line)
                (contstr, needcont) = ('', 0)
                contline = None
            elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
                yield TokenInfo(ERRORTOKEN, contstr + line, strstart, (lnum, len(line)), contline)
                contstr = ''
                contline = None
            else:
                contstr = contstr + line
                contline = contline + line
        elif parenlev == 0 and not continued:
            break
            column = 0
            if line[pos] == ' ':
                column += 1
            elif line[pos] == '\t':
                column = (column//tabsize + 1)*tabsize
            elif line[pos] == '\x0c':
                column = 0
            else:
                break
            pos += 1
            break
            if (line or pos < max) and pos == max and line[pos] in '#\r\n':
                comment_token = line[pos:].rstrip('\r\n')
                yield TokenInfo(COMMENT, comment_token, (lnum, pos), (lnum, pos + len(comment_token)), line)
                pos += len(comment_token)
                yield TokenInfo(NL, line[pos:], (lnum, pos), (lnum, len(line)), line)
            else:
                if column > indents[-1]:
                    indents.append(column)
                    yield TokenInfo(INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
                if column not in indents:
                    raise IndentationError('unindent does not match any outer indentation level', ('<tokenize>', lnum, pos, line))
                indents = indents[:-1]
                yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line)
                while column < indents[-1] and pos < max:
                    pseudomatch = _compile(PseudoToken).match(line, pos)
                    if pseudomatch:
                        (start, end) = pseudomatch.span(1)
                        spos = (lnum, start)
                        epos = (lnum, end)
                        pos = end
                        if start == end:
                            pass
                        else:
                            token = line[start:end]
                            initial = line[start]
                            if initial in numchars or initial == '.' and token != '.' and token != '...':
                                yield TokenInfo(NUMBER, token, spos, epos, line)
                            elif initial in '\r\n':
                                if parenlev > 0:
                                    yield TokenInfo(NL, token, spos, epos, line)
                                else:
                                    yield TokenInfo(NEWLINE, token, spos, epos, line)
                            elif initial == '#':
                                yield TokenInfo(COMMENT, token, spos, epos, line)
                            elif token in triple_quoted:
                                endprog = _compile(endpats[token])
                                endmatch = endprog.match(line, pos)
                                if endmatch:
                                    pos = endmatch.end(0)
                                    token = line[start:pos]
                                    yield TokenInfo(STRING, token, spos, (lnum, pos), line)
                                else:
                                    strstart = (lnum, start)
                                    contstr = line[start:]
                                    contline = line
                                    break
                            elif initial in single_quoted or token[:2] in single_quoted or token[:3] in single_quoted:
                                if token[-1] == '\n':
                                    strstart = (lnum, start)
                                    endprog = _compile(endpats.get(initial) or (endpats.get(token[1]) or endpats.get(token[2])))
                                    contstr = line[start:]
                                    needcont = 1
                                    contline = line
                                    break
                                else:
                                    yield TokenInfo(STRING, token, spos, epos, line)
                            elif initial.isidentifier():
                                yield TokenInfo(NAME, token, spos, epos, line)
                            elif initial == '\\':
                                continued = 1
                            else:
                                if initial in '([{':
                                    parenlev += 1
                                else:
                                    parenlev -= 1
                                yield TokenInfo(OP, token, spos, epos, line)
                    else:
                        yield TokenInfo(ERRORTOKEN, line[pos], (lnum, pos), (lnum, pos + 1), line)
                        pos += 1
        else:
            if not line:
                raise TokenError('EOF in multi-line statement', (lnum, 0))
            continued = 0
        while pos < max:
            pseudomatch = _compile(PseudoToken).match(line, pos)
            if pseudomatch:
                (start, end) = pseudomatch.span(1)
                spos = (lnum, start)
                epos = (lnum, end)
                pos = end
                if start == end:
                    pass
                else:
                    token = line[start:end]
                    initial = line[start]
                    if initial in numchars or initial == '.' and token != '.' and token != '...':
                        yield TokenInfo(NUMBER, token, spos, epos, line)
                    elif initial in '\r\n':
                        if parenlev > 0:
                            yield TokenInfo(NL, token, spos, epos, line)
                        else:
                            yield TokenInfo(NEWLINE, token, spos, epos, line)
                    elif initial == '#':
                        yield TokenInfo(COMMENT, token, spos, epos, line)
                    elif token in triple_quoted:
                        endprog = _compile(endpats[token])
                        endmatch = endprog.match(line, pos)
                        if endmatch:
                            pos = endmatch.end(0)
                            token = line[start:pos]
                            yield TokenInfo(STRING, token, spos, (lnum, pos), line)
                        else:
                            strstart = (lnum, start)
                            contstr = line[start:]
                            contline = line
                            break
                    elif initial in single_quoted or token[:2] in single_quoted or token[:3] in single_quoted:
                        if token[-1] == '\n':
                            strstart = (lnum, start)
                            endprog = _compile(endpats.get(initial) or (endpats.get(token[1]) or endpats.get(token[2])))
                            contstr = line[start:]
                            needcont = 1
                            contline = line
                            break
                        else:
                            yield TokenInfo(STRING, token, spos, epos, line)
                    elif initial.isidentifier():
                        yield TokenInfo(NAME, token, spos, epos, line)
                    elif initial == '\\':
                        continued = 1
                    else:
                        if initial in '([{':
                            parenlev += 1
                        else:
                            parenlev -= 1
                        yield TokenInfo(OP, token, spos, epos, line)
            else:
                yield TokenInfo(ERRORTOKEN, line[pos], (lnum, pos), (lnum, pos + 1), line)
                pos += 1
    for indent in indents[1:]:
        yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '')
    yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '')


def generate_tokens(readline):
    return _tokenize(readline, None)


def main():
    import argparse

    def perror(message):
        print(message, file=sys.stderr)

    def error(message, filename=None, location=None):
        if location:
            args = (filename,) + location + (message,)
            perror('%s:%d:%d: error: %s' % args)
        elif filename:
            perror('%s: error: %s' % (filename, message))
        else:
            perror('error: %s' % message)
        sys.exit(1)

    parser = argparse.ArgumentParser(prog='python -m tokenize')
    parser.add_argument(dest='filename', nargs='?', metavar='filename.py', help='the file to tokenize; defaults to stdin')
    parser.add_argument('-e', '--exact', dest='exact', action='store_true', help='display token names using the exact type')
    args = parser.parse_args()
    try:
        if args.filename:
            filename = args.filename
            with _builtin_open(filename, 'rb') as f:
                tokens = list(tokenize(f.readline))
        else:
            filename = '<stdin>'
            tokens = _tokenize(sys.stdin.readline, None)
        for token in tokens:
            token_type = token.type
            if args.exact:
                token_type = token.exact_type
            token_range = '%d,%d-%d,%d:' % (token.start + token.end)
            print('%-20s%-15s%-15r' % (token_range, tok_name[token_type], token.string))
    except IndentationError as err:
        (line, column) = err.args[1][1:3]
        error(err.args[0], filename, (line, column))
    except TokenError as err:
        (line, column) = err.args[1]
        error(err.args[0], filename, (line, column))
    except SyntaxError as err:
        error(err, filename)
    except OSError as err:
        error(err)
    except KeyboardInterrupt:
        print('interrupted\n')
    except Exception as err:
        perror('unexpected error: %s' % err)
        raise

if __name__ == '__main__':
    main()
