import re
import sys

def bind_vars(formal_args, values):
    return {varname: value for varname, value in zip(formal_args, values)}
def make_new_env(old_env, binding):
    return (binding, old_env)
def lookup(varname, env):
    while env:
        binding = env[0]
        if varname in binding:
            return binding[varname]
        env = env[1]
    raise Exception(f'not found: {varname}')
def insert_var(var, value, env):
    if env:
        env[0][var] = value

def ast_type(x):
    return x[0]

def funcall_get_actual_args(term):
    return term[2]
def funcall_get_function(term):
    return term[1]
def function_get_formal_args(term):
    return term[1]
def function_get_body(term):
    return term[2]
def function_get_assoc_env(term):
    return term[3]
def var_get_name(term):
    return term[1]
def int_get_value(term):
    return term[1]
def pair_get_value(term):
    return term[1:]
def str_get_value(term):
    return term[1]
def def_get_varname(term):
    return term[1]
def def_get_value(term):
    return term[2]
def if_get_cond(term):
    return term[1]
def if_get_then(term):
    return term[2]
def if_get_else(term):
    return term[3]
def cond_get_clause_list(term):
    return term[1:]
def let_get_binding_list(term):
    return term[1:-1]
def let_get_body(term):
    return term[-1]
def letstar_get_binding_list(term):
    return term[1:-1]
def letstar_get_body(term):
    return term[-1]
def letrec_get_binding_list(term):
    return term[1:-1]
def letrec_get_body(term):
    return term[-1]
def binding_get_varname(binding):
    return binding[0]
def binding_get_value(binding):
    return binding[1]
def and_get_args(term):
    return term[1:]
def or_get_args(term):
    return term[1:]
def setbang_get_varname(term):
    return term[1]
def setbang_get_value(term):
    return term[2]
def begin_get_body(term):
    return term[1:]

def is_primitive_function(term):
    return type(term) == str

def my_eval(term, env):
    if ast_type(term) == 'funcall':
        actual_args = funcall_get_actual_args(term)
        function = funcall_get_function(term)
        evaluated_function = function if is_primitive_function(function) else my_eval(funcall_get_function(term), env)
        evaluated_actual_args = [my_eval(arg, env) for arg in actual_args]
        res = my_apply(evaluated_function, evaluated_actual_args, env)
        return res 
    elif ast_type(term) == 'lambda':
        if len(term) <= 3:
            return (*term, env)
        return term
    elif ast_type(term) == 'var':
        # notice that we use the tag "var" to represent variables.
        varname = var_get_name(term)
        lookup_res = lookup(var_get_name(term), env)
        return lookup_res
    elif ast_type(term) == 'int':
        return int_get_value(term)
    elif ast_type(term) == 'pair':
        return pair_get_value(term)
    elif ast_type(term) == 'str':
        return str_get_value(term)
    elif ast_type(term) == 'def':
        var = def_get_varname(term)
        body = def_get_value(term)
        insert_var(var, my_eval(body, env), env)
        return None
    elif ast_type(term) == 'begin':
        body = begin_get_body(term)
        result = None
        for expr in body:
            result = my_eval(expr, env)
        return result
    elif ast_type(term) == 'if':
        evaluated_cond = my_eval(if_get_cond(term), env)
        return my_eval(if_get_then(term) if evaluated_cond else if_get_else(term), env)
    elif ast_type(term) == 'cond':
        clause_list = cond_get_clause_list(term)
        for cond, body in clause_list:
            if my_eval(cond, env):
                return my_eval(body, env)
        return None
    elif ast_type(term) == 'let':
        binding_list = let_get_binding_list(term)
        evaled_binding = {}
        for var, val in binding_list:
            evaled_binding[var] = my_eval(val, env)
        new_env = (evaled_binding, env)
        return my_eval(let_get_body(term), new_env)
    elif ast_type(term) == 'let*':
        binding_list = let_get_binding_list(term)
        body_env = env
        for var, val in binding_list:
            binding = {}
            binding[var] = my_eval(val, body_env)
            body_env = (binding, body_env)
        return my_eval(let_get_body(term), body_env)
    elif ast_type(term) == 'letrec':
        binding_list = letrec_get_binding_list(term)
        mapping = {}
        let_env = (mapping, env)
        for binding in binding_list:
            varname = binding_get_varname(binding)
            value = binding_get_value(binding)
            mapping[varname] = my_eval(value, let_env)
        let_body = letrec_get_body(term)
        return my_eval(let_body, let_env)
    elif ast_type(term) == 'and':
        result = True
        arglist = and_get_args(term)
        for arg in arglist:
            result = my_eval(arg, env)
            if not result:
                return False
        # We also want to have the behaviour where `and` returns the value of the last arguments
        # when it's all true value. If you want to make `and` only return boolean values,
        # change `result` to `True`.
        return result
    elif ast_type(term) == 'or':
        result = False
        arglist = or_get_args(term)
        for arg in arglist:
            result = my_eval(arg, env)
            # If you want `or` to return just the boolean value instead, change `return result`
            # to `return True`.
            if result:
                return result
        return False
    elif ast_type(term) == 'set!':
        var = setbang_get_varname(term)
        body = setbang_get_value(term)
        insert_var(var, my_eval(body, env), env)
        return None
    else:
        return term

# assumes that `arglist` only contains evaluated values.
def my_apply(function, arglist, env):
    if function == '+':
        if not arglist:
            return 0
        else:
            result = arglist[0]
            for arg in arglist[1:]:
                result += arg
            return result
    elif function == '-':
        if not arglist:
            return 0
        return arglist[0] - arglist[1]
    elif function == '*':
        if not arglist:
            return 0
        else:
            result = 1
            for arg in arglist:
                result *= arg
            return result
    elif function == '/':
        if not arglist:
            return 0
        return arglist[0] / arglist[1]
    elif function == 'cons':
        if not arglist:
            raise Exception('no value to cons')
        elif len(arglist) != 2:
            raise Exception('it has to be exactly 2 values when cons-ing')
        else:
            return (arglist[0], arglist[1])

    elif function == 'car':
        if not arglist:
            raise Exception('no value to car')
        return arglist[0][0]
    elif function == 'cdr':
        if not arglist:
            raise Exception('no value to car')
        return arglist[0][1]
    elif function == 'not':
        return not arglist[0]

    elif function == 'py-import':
        # This is for demonstration only so I left out some necessary guarding here - you
        # might want to add the guardings to your own version.
        return __import__(arglist[0])
    elif function == 'py-getattr':
        return getattr(arglist[0], arglist[1])
    elif function == 'py-index':
        return arglist[0].__getitem__(arglist[1])
    elif function == 'py-call':
        return (arglist[0])(*arglist[1:])
    elif function == 'apply':
        return my_apply(arglist[0], arglist[1:], env)
    elif function == 'eval':
        return my_eval(arglist[0], env)
    elif function == 'apply-with-env':
        # (apply-with-env ENV FUNCTION ARGLIST)
        return my_apply(arglist[1], arglist[2:], arglist[0])
    elif function == 'eval-with-env':
        # (eval-with-env ENV TERM)
        return my_eval(arglist[1], arglist[0])
    elif function == 'print':
        for arg in arglist:
            print(arg)
        return None
    elif function == 'input':
        for arg in arglist:
            print(arg)
        return input()
    elif function == '>':
        # Again this is for demonstration only so I left out some necessary guarding here.
        # You might want to add the guardings to your own version.
        return arglist[0] > arglist[1]
    elif function == '<':
        return arglist[0] < arglist[1]
    elif function == '=':
        # We will use another identifier for  the assignment operator - the equal symbol is
        # reserved for testing equality here.
        return arglist[0] == arglist[1]
    elif function == 'int?':
        return type(arglist[0]) == int
    elif function == 'str?':
        return type(arglist[0]) == str
    elif function == 'pair?':
        return type(arglist[0]) == tuple and len(arglist[0]) == 2
            
    else:
        body = function_get_body(function)
        formal_args = function_get_formal_args(function)
        fun_env = function_get_assoc_env(function)
        new_env = make_new_env(fun_env, bind_vars(formal_args, arglist))
        return my_eval(body, new_env)

# at this point we need to have an evaluator that evaluates a *sequence* of
# expressions as well.
def my_eval_s(termlist, env):
    value = None
    for term in termlist:
        value = my_eval(term, env)
    return value

# `REGEX_FLOAT` must be checked first because it overlaps with `REGEX_INT`
REGEX_FLOAT = re.compile(r"[0-9]+\.[0-9]+")
REGEX_INT = re.compile(r"[0-9]+")
# I deliberately make it recognizes a subset of Python string literal so that I can directly use `eval`.
REGEX_STRING = re.compile("\"(?:\\\\n|\\\\t|\\\\r|\\\\\"|\\\\x[\\da-fA-F]{2}|[^\"\\t\\r\\n\\\\])*\"")
# You might have to change it to include any characters that primitives have to prevent errors.
REGEX_IDENTIFIER = re.compile(r"[\w+*/%?<>=:,.-]+")

def parse_atom(string):
    # It's actually not very good to return stuff like this. You might want to have them all stated more clearly.
    # Notice that I put `parse_float` before `parse_int`; it has to be in this order because what these two recognizes
    # overlap.
    return parse_float(string) or parse_int(string) or parse_string(string) or parse_identifier(string)

def parse_float(string):
    preres = REGEX_FLOAT.match(string)
    if preres:
        taken_string = preres.group()
        rest = string[len(taken_string):]
        return (('float', float(taken_string)), rest)
    return None

def parse_int(string):
    preres = REGEX_INT.match(string)
    if preres:
        taken_string = preres.group()
        rest = string[len(taken_string):]
        return (('int', int(taken_string)), rest)
    return None

def parse_string(string):
    preres = REGEX_STRING.match(string)
    if preres:
        taken_string = preres.group()
        rest = string[len(taken_string):]
        # Note that `eval` is used here instead of `str` because `str` does not do anything on string.
        return (('str', eval(taken_string)), rest)
    return None

def parse_identifier(string):
    preres = REGEX_IDENTIFIER.match(string)
    if preres:
        taken_string = preres.group()
        rest = string[len(taken_string):]
        # Note that `eval` is used here instead of `str` because `str` does not do anything on string.
        return (taken_string, rest)
    return None

def skip_whitespace(string):
    # ...yep. We ignore all whitespaces.
    return string.lstrip()

def parse_list(string):
    # We do it in some kind of a "two-phase" fashion.
    if string.startswith("("):
        # This is phase 1: we process the input string into a "raw" syntax tree
        string = skip_whitespace(string[1:])
        result = []
        while string:
            buf = parse_atom(string)
            if not buf:
                buf = parse_list(string)
            if not buf:
                break
            result.append(buf[0])
            string = skip_whitespace(buf[1])
        if not string.startswith(")"):
            # if the code reaches here, the parenthesis is not balanced.
            raise Exception(f'syntax error at {string[:10] if len(string) > 10 else string}')
        return (result, string[1:])
    else:
        return None
def parse_list_phase2(result):
    # This is phase 2: we reconstruct the syntax tree into proper AST.
    # Normally you would want to add more guarding here - a lot of guarding.
    if type(result) == list:
        if len(result) >= 1:
            if result[0] == 'lambda':
                return ('lambda', result[1], parse_list_phase2(result[2]))
            elif result[0] == 'def':
                return ('def', result[1], parse_list_phase2(result[2]))
            elif result[0] == 'if':
                return ('if', parse_list_phase2(result[1]), parse_list_phase2(result[2]), parse_list_phase2(result[3]))
            elif result[0] == 'cond':
                return ('cond', *(((x[0], parse_list_phase2(x[1]))) for x in result[1]))
            elif result[0] in ['let', 'let*', 'letrec']:
                return (result[0], *(((x[0], parse_list_phase2(x[1]))) for x in result[1:-1]), parse_list_phase2(result[-1]))
            elif result[0] in ['and', 'or']:
                return (result[0], *(parse_list_phase2(x) for x in result[1:]))
            elif result[0] in ['set!']:
                return ('set!', result[1], parse_list_phase2(result[2]))
            elif result[0] == 'begin':
                return ('begin', *(parse_list_phase2(x) for x in result[1:]))
            else:
                return ('funcall', parse_list_phase2(result[0]), [parse_list_phase2(x) for x in result[1:]])
    elif type(result) == str:
        if result in [
            '+', '-', '*', '/',
            'cons', 'car', 'cdr',
            'not',
            'py-import', 'py-getattr', 'py-index', 'py-call',
            'apply', 'eval', 'apply-with-env', 'eval-with-env',
            'print', 'input',
            '>', '<', '=',
            'int?', 'str?', 'pair?'
        ]:
            return result
        else:
            return ('var', result)
    else:
        return result

def parse_sexpr(string):
    res = parse_atom(string)
    if res:
        return res
    res = parse_list(string)
    if res:
        return (parse_list_phase2(res[0]), res[1])
    return None

def parse_source(string):
    result = []
    while string:
        buf = parse_sexpr(string)
        if not buf and string:
            raise Exception(f'grammar error at {string[:10] if len(string) > 10 else string}')
        result.append(buf[0])
        string = skip_whitespace(buf[1])
    return result


_GLOBAL_ENV = ({}, None)

def print_usage():
    print('''
usage:
    minilisp [input-source-file]          - run the file as a script.
    minilisp --repl [input-source-file]   - start the REPL.
    minilisp --help                       - display this help message.
''')

def repl():
    buffer = []
    while True:
        try:
            user_input = input('> ')
            paren_balance = calc_paren_balance(user_input)
            if paren_balance > 0:
                buffer.append(user_input)
                continue
            elif paren_balance < 0:
                print('syntax error', file=sys.stderr)
                buffer = []
                continue
            else:
                buffer.append(user_input)
                input_string = '\n'.join(buffer)
                parsed_input = parse_sexpr(input_string)[0]
                print('pinput ', parsed_input)
                result = my_eval(parsed_input, _GLOBAL_ENV)
                print(result)
                buffer = []
        except EOFError:
            break
        except Exception as e:
            print(f'Internal eror {e.args}', file=sys.stderr)
            buffer = []
            continue
_PAREN_BALANCE = 0
def calc_paren_balance(input_str):
    global _PAREN_BALANCE
    for char in input_str:
        if char == '(':
            _PAREN_BALANCE += 1
        elif char == ')':
            _PAREN_BALANCE -= 1
            if _PAREN_BALANCE < 0:
                print('syntax error', file=sys.stderr)
                raise Exception()
    return _PAREN_BALANCE

if __name__ == '__main__':
    if sys.argv[1] == '--help':
        print_usage()
    elif sys.argv[1] == '--repl':
        # Of course you need to add guarding for corner cases like the provided
        # file name does not point to any existing file - this piece of code
        # is for demonstration only.
        loading_file_name = sys.argv[2] if len(sys.argv) >= 3 else None
        if loading_file_name:
            with open(loading_file_name, 'r') as f:
                loading_source = f.read()
            my_eval_s(parse_source(loading_source), _GLOBAL_ENV)
        repl()
    else:
        loading_file_name = sys.argv[1] if len(sys.argv) >= 2 else None
        if loading_file_name:
            with open(loading_file_name, 'r') as f:
                loading_source = f.read()
            my_eval_s(parse_source(loading_source), _GLOBAL_ENV)
        else:
            print_usage()