import re import sys def bind_vars(formal_args, values): return {varname: value for varname, value in zip(formal_args, values)} def make_new_env(old_env, binding): return (binding, old_env) def lookup(varname, env): while env: binding = env[0] if varname in binding: return binding[varname] env = env[1] raise Exception(f'not found: {varname}') def insert_var(var, value, env): if env: env[0][var] = value def ast_type(x): return x[0] def funcall_get_actual_args(term): return term[2] def funcall_get_function(term): return term[1] def function_get_formal_args(term): return term[1] def function_get_body(term): return term[2] def function_get_assoc_env(term): return term[3] def var_get_name(term): return term[1] def int_get_value(term): return term[1] def pair_get_value(term): return term[1:] def str_get_value(term): return term[1] def def_get_varname(term): return term[1] def def_get_value(term): return term[2] def if_get_cond(term): return term[1] def if_get_then(term): return term[2] def if_get_else(term): return term[3] def cond_get_clause_list(term): return term[1:] def let_get_binding_list(term): return term[1:-1] def let_get_body(term): return term[-1] def letstar_get_binding_list(term): return term[1:-1] def letstar_get_body(term): return term[-1] def letrec_get_binding_list(term): return term[1:-1] def letrec_get_body(term): return term[-1] def binding_get_varname(binding): return binding[0] def binding_get_value(binding): return binding[1] def and_get_args(term): return term[1:] def or_get_args(term): return term[1:] def setbang_get_varname(term): return term[1] def setbang_get_value(term): return term[2] def begin_get_body(term): return term[1:] def is_primitive_function(term): return type(term) == str def my_eval(term, env): if ast_type(term) == 'funcall': actual_args = funcall_get_actual_args(term) function = funcall_get_function(term) evaluated_function = function if is_primitive_function(function) else my_eval(funcall_get_function(term), env) evaluated_actual_args = [my_eval(arg, env) for arg in actual_args] res = my_apply(evaluated_function, evaluated_actual_args, env) return res elif ast_type(term) == 'lambda': if len(term) <= 3: return (*term, env) return term elif ast_type(term) == 'var': # notice that we use the tag "var" to represent variables. varname = var_get_name(term) lookup_res = lookup(var_get_name(term), env) return lookup_res elif ast_type(term) == 'int': return int_get_value(term) elif ast_type(term) == 'pair': return pair_get_value(term) elif ast_type(term) == 'str': return str_get_value(term) elif ast_type(term) == 'def': var = def_get_varname(term) body = def_get_value(term) insert_var(var, my_eval(body, env), env) return None elif ast_type(term) == 'begin': body = begin_get_body(term) result = None for expr in body: result = my_eval(expr, env) return result elif ast_type(term) == 'if': evaluated_cond = my_eval(if_get_cond(term), env) return my_eval(if_get_then(term) if evaluated_cond else if_get_else(term), env) elif ast_type(term) == 'cond': clause_list = cond_get_clause_list(term) for cond, body in clause_list: if my_eval(cond, env): return my_eval(body, env) return None elif ast_type(term) == 'let': binding_list = let_get_binding_list(term) evaled_binding = {} for var, val in binding_list: evaled_binding[var] = my_eval(val, env) new_env = (evaled_binding, env) return my_eval(let_get_body(term), new_env) elif ast_type(term) == 'let*': binding_list = let_get_binding_list(term) body_env = env for var, val in binding_list: binding = {} binding[var] = my_eval(val, body_env) body_env = (binding, body_env) return my_eval(let_get_body(term), body_env) elif ast_type(term) == 'letrec': binding_list = letrec_get_binding_list(term) mapping = {} let_env = (mapping, env) for binding in binding_list: varname = binding_get_varname(binding) value = binding_get_value(binding) mapping[varname] = my_eval(value, let_env) let_body = letrec_get_body(term) return my_eval(let_body, let_env) elif ast_type(term) == 'and': result = True arglist = and_get_args(term) for arg in arglist: result = my_eval(arg, env) if not result: return False # We also want to have the behaviour where `and` returns the value of the last arguments # when it's all true value. If you want to make `and` only return boolean values, # change `result` to `True`. return result elif ast_type(term) == 'or': result = False arglist = or_get_args(term) for arg in arglist: result = my_eval(arg, env) # If you want `or` to return just the boolean value instead, change `return result` # to `return True`. if result: return result return False elif ast_type(term) == 'set!': var = setbang_get_varname(term) body = setbang_get_value(term) insert_var(var, my_eval(body, env), env) return None else: return term # assumes that `arglist` only contains evaluated values. def my_apply(function, arglist, env): if function == '+': if not arglist: return 0 else: result = arglist[0] for arg in arglist[1:]: result += arg return result elif function == '-': if not arglist: return 0 return arglist[0] - arglist[1] elif function == '*': if not arglist: return 0 else: result = 1 for arg in arglist: result *= arg return result elif function == '/': if not arglist: return 0 return arglist[0] / arglist[1] elif function == 'cons': if not arglist: raise Exception('no value to cons') elif len(arglist) != 2: raise Exception('it has to be exactly 2 values when cons-ing') else: return (arglist[0], arglist[1]) elif function == 'car': if not arglist: raise Exception('no value to car') return arglist[0][0] elif function == 'cdr': if not arglist: raise Exception('no value to car') return arglist[0][1] elif function == 'not': return not arglist[0] elif function == 'py-import': # This is for demonstration only so I left out some necessary guarding here - you # might want to add the guardings to your own version. return __import__(arglist[0]) elif function == 'py-getattr': return getattr(arglist[0], arglist[1]) elif function == 'py-index': return arglist[0].__getitem__(arglist[1]) elif function == 'py-call': return (arglist[0])(*arglist[1:]) elif function == 'apply': return my_apply(arglist[0], arglist[1:], env) elif function == 'eval': return my_eval(arglist[0], env) elif function == 'apply-with-env': # (apply-with-env ENV FUNCTION ARGLIST) return my_apply(arglist[1], arglist[2:], arglist[0]) elif function == 'eval-with-env': # (eval-with-env ENV TERM) return my_eval(arglist[1], arglist[0]) elif function == 'print': for arg in arglist: print(arg) return None elif function == 'input': for arg in arglist: print(arg) return input() elif function == '>': # Again this is for demonstration only so I left out some necessary guarding here. # You might want to add the guardings to your own version. return arglist[0] > arglist[1] elif function == '<': return arglist[0] < arglist[1] elif function == '=': # We will use another identifier for the assignment operator - the equal symbol is # reserved for testing equality here. return arglist[0] == arglist[1] elif function == 'int?': return type(arglist[0]) == int elif function == 'str?': return type(arglist[0]) == str elif function == 'pair?': return type(arglist[0]) == tuple and len(arglist[0]) == 2 else: body = function_get_body(function) formal_args = function_get_formal_args(function) fun_env = function_get_assoc_env(function) new_env = make_new_env(fun_env, bind_vars(formal_args, arglist)) return my_eval(body, new_env) # at this point we need to have an evaluator that evaluates a *sequence* of # expressions as well. def my_eval_s(termlist, env): value = None for term in termlist: value = my_eval(term, env) return value # `REGEX_FLOAT` must be checked first because it overlaps with `REGEX_INT` REGEX_FLOAT = re.compile(r"[0-9]+\.[0-9]+") REGEX_INT = re.compile(r"[0-9]+") # I deliberately make it recognizes a subset of Python string literal so that I can directly use `eval`. REGEX_STRING = re.compile("\"(?:\\\\n|\\\\t|\\\\r|\\\\\"|\\\\x[\\da-fA-F]{2}|[^\"\\t\\r\\n\\\\])*\"") # You might have to change it to include any characters that primitives have to prevent errors. REGEX_IDENTIFIER = re.compile(r"[\w+*/%?<>=:,.-]+") def parse_atom(string): # It's actually not very good to return stuff like this. You might want to have them all stated more clearly. # Notice that I put `parse_float` before `parse_int`; it has to be in this order because what these two recognizes # overlap. return parse_float(string) or parse_int(string) or parse_string(string) or parse_identifier(string) def parse_float(string): preres = REGEX_FLOAT.match(string) if preres: taken_string = preres.group() rest = string[len(taken_string):] return (('float', float(taken_string)), rest) return None def parse_int(string): preres = REGEX_INT.match(string) if preres: taken_string = preres.group() rest = string[len(taken_string):] return (('int', int(taken_string)), rest) return None def parse_string(string): preres = REGEX_STRING.match(string) if preres: taken_string = preres.group() rest = string[len(taken_string):] # Note that `eval` is used here instead of `str` because `str` does not do anything on string. return (('str', eval(taken_string)), rest) return None def parse_identifier(string): preres = REGEX_IDENTIFIER.match(string) if preres: taken_string = preres.group() rest = string[len(taken_string):] # Note that `eval` is used here instead of `str` because `str` does not do anything on string. return (taken_string, rest) return None def skip_whitespace(string): # ...yep. We ignore all whitespaces. return string.lstrip() def parse_list(string): # We do it in some kind of a "two-phase" fashion. if string.startswith("("): # This is phase 1: we process the input string into a "raw" syntax tree string = skip_whitespace(string[1:]) result = [] while string: buf = parse_atom(string) if not buf: buf = parse_list(string) if not buf: break result.append(buf[0]) string = skip_whitespace(buf[1]) if not string.startswith(")"): # if the code reaches here, the parenthesis is not balanced. raise Exception(f'syntax error at {string[:10] if len(string) > 10 else string}') return (result, string[1:]) else: return None def parse_list_phase2(result): # This is phase 2: we reconstruct the syntax tree into proper AST. # Normally you would want to add more guarding here - a lot of guarding. if type(result) == list: if len(result) >= 1: if result[0] == 'lambda': return ('lambda', result[1], parse_list_phase2(result[2])) elif result[0] == 'def': return ('def', result[1], parse_list_phase2(result[2])) elif result[0] == 'if': return ('if', parse_list_phase2(result[1]), parse_list_phase2(result[2]), parse_list_phase2(result[3])) elif result[0] == 'cond': return ('cond', *(((x[0], parse_list_phase2(x[1]))) for x in result[1])) elif result[0] in ['let', 'let*', 'letrec']: return (result[0], *(((x[0], parse_list_phase2(x[1]))) for x in result[1:-1]), parse_list_phase2(result[-1])) elif result[0] in ['and', 'or']: return (result[0], *(parse_list_phase2(x) for x in result[1:])) elif result[0] in ['set!']: return ('set!', result[1], parse_list_phase2(result[2])) elif result[0] == 'begin': return ('begin', *(parse_list_phase2(x) for x in result[1:])) else: return ('funcall', parse_list_phase2(result[0]), [parse_list_phase2(x) for x in result[1:]]) elif type(result) == str: if result in [ '+', '-', '*', '/', 'cons', 'car', 'cdr', 'not', 'py-import', 'py-getattr', 'py-index', 'py-call', 'apply', 'eval', 'apply-with-env', 'eval-with-env', 'print', 'input', '>', '<', '=', 'int?', 'str?', 'pair?' ]: return result else: return ('var', result) else: return result def parse_sexpr(string): res = parse_atom(string) if res: return res res = parse_list(string) if res: return (parse_list_phase2(res[0]), res[1]) return None def parse_source(string): result = [] while string: buf = parse_sexpr(string) if not buf and string: raise Exception(f'grammar error at {string[:10] if len(string) > 10 else string}') result.append(buf[0]) string = skip_whitespace(buf[1]) return result _GLOBAL_ENV = ({}, None) def print_usage(): print(''' usage: minilisp [input-source-file] - run the file as a script. minilisp --repl [input-source-file] - start the REPL. minilisp --help - display this help message. ''') def repl(): buffer = [] while True: try: user_input = input('> ') paren_balance = calc_paren_balance(user_input) if paren_balance > 0: buffer.append(user_input) continue elif paren_balance < 0: print('syntax error', file=sys.stderr) buffer = [] continue else: buffer.append(user_input) input_string = '\n'.join(buffer) parsed_input = parse_sexpr(input_string)[0] print('pinput ', parsed_input) result = my_eval(parsed_input, _GLOBAL_ENV) print(result) buffer = [] except EOFError: break except Exception as e: print(f'Internal eror {e.args}', file=sys.stderr) buffer = [] continue _PAREN_BALANCE = 0 def calc_paren_balance(input_str): global _PAREN_BALANCE for char in input_str: if char == '(': _PAREN_BALANCE += 1 elif char == ')': _PAREN_BALANCE -= 1 if _PAREN_BALANCE < 0: print('syntax error', file=sys.stderr) raise Exception() return _PAREN_BALANCE if __name__ == '__main__': if sys.argv[1] == '--help': print_usage() elif sys.argv[1] == '--repl': # Of course you need to add guarding for corner cases like the provided # file name does not point to any existing file - this piece of code # is for demonstration only. loading_file_name = sys.argv[2] if len(sys.argv) >= 3 else None if loading_file_name: with open(loading_file_name, 'r') as f: loading_source = f.read() my_eval_s(parse_source(loading_source), _GLOBAL_ENV) repl() else: loading_file_name = sys.argv[1] if len(sys.argv) >= 2 else None if loading_file_name: with open(loading_file_name, 'r') as f: loading_source = f.read() my_eval_s(parse_source(loading_source), _GLOBAL_ENV) else: print_usage()