#!/usr/bin/env python3 # # A C89-to-Python transpiler that is meant to behave in a maximally weird way # that is still standards compliant. The main purpose is for ensuring that C # programs are portable. For example, it has 100-bit char/int/long instead of # the usual sizes. # # Copyright © 2026 Samuel Lidén Borell # # SPDX-License-Identifier: EUPL-1.2+ OR LGPL-2.1-or-later # import argparse import os.path import re import sys __all__ = ['Ds9kCCompiler', 'CSyntaxError'] spaces = re.compile(' +') commaspace = re.compile(', *') eolcomments = re.compile('/\\*([^*]|\\*[^/])*\\*/ *$') startcomments = re.compile('^ */\\*([^*]|\\*[^/])*\\*/') c_keywords = [ 'auto', 'break', 'case', 'char', 'const', 'continue', 'default', 'do', 'double', 'else', 'enum', 'extern', 'float', 'for', 'goto', 'if', 'int', 'long', 'register', 'restrict', 'return', 'short', 'signed', 'sizeof', 'static', 'struct', 'switch', 'typedef', 'union', 'unsigned', 'void', 'volatile', 'while' ] c_int_types = [ '%char', '%int', '%long', '%short', '%signed', '%unsigned', ] c_elementary_types = c_int_types + ['%void', '_P9KCC_FILE', '_P9KCC_va_list'] c_tags = [ '%enum', '%struct', '%union', ] c_quals = [ '%auto', '%const', '%extern', '%register', '%static', '%volatile', ] c_type_prefixes = c_elementary_types + c_tags + c_quals decl_end_tokens = [';', ',', '}'] # TODO right/left associativity c_infix_postfix_operators = { '=': (1, 'infix', 'R'), '+=': (1, 'infix', 'R'), '-=': (1, 'infix', 'R'), '*=': (1, 'infix', 'R'), '/=': (1, 'infix', 'R'), '%=': (1, 'infix', 'R'), '~=': (1, 'infix', 'R'), '&=': (1, 'infix', 'R'), '|=': (1, 'infix', 'R'), '^=': (1, 'infix', 'R'), '<<=': (1, 'infix', 'R'), '>>=': (1, 'infix', 'R'), ',': (2, 'infix', '?'), '||': (3, 'infix', 'L'), '|': (3, 'infix', 'L'), '&&': (4, 'infix', 'L'), '&': (4, 'infix', 'L'), '^': (4, 'infix', 'L'), '==': (5, 'infix', 'L'), '!=': (5, 'infix', 'L'), '<': (5, 'infix', 'L'), '>': (5, 'infix', 'L'), '<=': (5, 'infix', 'L'), '>=': (5, 'infix', 'L'), '+': (6, 'infix', 'L'), '-': (6, 'infix', 'L'), '*': (7, 'infix', 'L'), '/': (7, 'infix', 'L'), '%': (7, 'infix', 'L'), '.': (9, 'infix', '?'), '->': (9, 'infix', '?'), # check precedences of these '++': (5, 'post', '?'), '--': (5, 'post', '?'), '[': (8, 'post', '?'), } c_prefix_operators = { # check precedences of these '!': (5, 'prefix', 'R'), '*': (5, 'prefix', 'R'), '&': (5, 'prefix', 'R'), '#': (8, 'prefix', 'R'), } CALL_PRECEDENCE = 8 NUM_PREFIX = '$' NUM_ZERO = NUM_PREFIX + '0' NUM_ONE = NUM_PREFIX + '1' class Ds9kCCompiler: def __init__(self): self.arg_parser = argparse.ArgumentParser( usage='%(prog)s [-c] [-o output] sourcefile.c...', description='"DeathStation 9000"-like C Compiler') self.arg_parser.add_argument( 'sourcefiles', action='extend', nargs='+', help='Source files', metavar='SOURCEFILE') self.arg_parser.add_argument( '-c', action='store_true', dest='compile_to_obj', help='Compile to (pseudo-)object file') self.arg_parser.add_argument( '-o', dest='output', help='Output file', metavar='OUTFILE') self.arg_parser.add_argument( '-I', action='append', dest='include_dirs', default=[], help='Include directory', metavar='DIR') self.arg_parser.add_argument( '-D', action='append', dest='initial_defines', default=[], help='Define a macro or function-macro', metavar='NAME=VALUE') self.arg_parser.add_argument( '-W', dest='warnings', help='Enable/disable warnings (ignored)', metavar='NAME') self.arg_parser.add_argument( '-f', dest='flags', help='Enable/disable flags (ignored)', metavar='FLAG') self.arg_parser.add_argument( '-g', action='store_true', dest='debuginfo', help='Enable debuginfo (ignored)') self.arg_parser.add_argument( '-std', dest='c_standard', help='C standard version (ignored, always c89)', metavar='CSTD') self.arg_parser.add_argument( '-ansi', action='store_true', dest='ansi_mode', help='Enable ANSI mode (default, ignored)') self.arg_parser.add_argument( '-pedantic', action='store_true', dest='pedantic_mode', help='Enable pedantic warnings (default, ignored)') self.arg_parser.add_argument( '-x', dest='language_name', help='Language name (ignored, always \'c\')', metavar='LANG') self.arg_parser.add_argument( '-E', action='store_true', dest='preprocess_only', help='Only run pre-processor') self.src_filename = None self.line = None self.defines = {} self.in_func_body = False self.scope = Scope(self, None) def init_from_argv(self, argv=None): if argv is None: argv = sys.argv[1:] self.options = self.arg_parser.parse_args(argv) if not self.options.output: if self.options.preprocess_only: self.options.output = '-' elif self.options.compile_to_obj: self.options.output = self.options.sourcefiles[-1].removesuffix(".c") + ".o" else: self.options.output = "a.out" def error(self, message): raise CSyntaxError(self.src_filename, self.line, message) def joinlines(self, lines): buffer = '' self.line = 0 for line in lines: self.line += 1 if line.endswith('\\\n'): buffer += line.removesuffix('\\\n') + ' ' else: yield buffer + line buffer = '' if len(buffer) > 0: yield buffer def preprocess(self, filename): with open(filename, 'r') as file: ignorelevel = 0 skip_if = False skip_else = False in_comment = False self.src_filename = filename for line in self.joinlines(file): if len(line) > 509: self.error("Line too long") line = line.strip() if in_comment: if '*/' in line: _, line = line.split('*/', 1) in_comment = False else: continue # FIXME comments inside lines are not yet supported line = eolcomments.sub('', line) if not line.startswith('#'): line = startcomments.sub('', line) if line.startswith('#'): splitted = spaces.split(line[1:].strip(), 1) ppop = splitted[0] ppvalue = splitted[1] if len(splitted) >= 2 else None if ppop == 'ifdef': if ignorelevel >= 1 or skip_if: ignorelevel += 1 elif ppvalue in self.defines: skip_else = True else: skip_if = True elif ppop == 'ifndef': if ignorelevel >= 1 or skip_if: ignorelevel += 1 elif not ppvalue in self.defines: skip_else = True else: skip_if = True elif ppop == 'if': if ignorelevel >= 1 or skip_if: ignorelevel += 1 elif self.eval_ppexpr(ppvalue): skip_else = True else: skip_if = True elif ppop == 'endif': if ignorelevel >= 1: ignorelevel -= 1 else: skip_else = False skip_if = False elif ppop == 'else' or ppop == 'elif': if ignorelevel >= 1: pass elif skip_else: ignorelevel = 1 else: if ppop == 'else' or self.eval_ppexpr(ppvalue): skip_else = True skip_if = False else: skip_if = True elif ignorelevel >= 1: pass elif ppop == 'define': # TODO check for incorrect redefinitions arr = spaces.split(ppvalue, 1) name = arr[0] if '(' in name: arr = ppvalue.split(')', 1) name = arr[0] + ')' value = arr[1] if len(arr) >= 2 else None else: value = arr[1] if len(arr) >= 2 else None self.define(name, value) elif ppop == 'undef': try: del self.defines[ppvalue] except KeyError: pass elif ppop == 'error': self.error(ppvalue) elif ppop == 'include': dirs = None if ppvalue.startswith('<') and ppvalue.endswith('>'): dirs = [os.path.dirname(__file__) + '/include'] elif ppvalue.startswith('"') and ppvalue.endswith('"'): dirs = [os.path.dirname(filename)] + \ self.options.include_dirs else: self.error('Invalid include type %s' % (ppvalue,)) incfilename = ppvalue[1:-1] saved_line = self.line for dir in dirs: path = dir + '/' + incfilename if os.path.exists(path): yield from self.preprocess(path) break else: self.error('#include not found: \'%s\'' % (incfilename,)) self.src_filename = filename self.line = saved_line else: self.error("Unknown preprocessor '#%s'" % (ppop,)) elif line.startswith('/*'): assert not '*/' in line in_comment = True elif ignorelevel >= 1 or skip_if: pass elif len(line) > 0: yield from self.yield_tokens(line) def parse_exprstr_to_rpn(self, s): it = self.yield_tokens(s) tok = next(it) expr, tok = self.parse_expr_to_rpn(it, tok) if tok is not None: self.error('Expression could not be parsed beyond \'%s\'' % (tok,)) if next(it, None) is not None: self.error('Stopped parsing expression before \'%s\'' % (tok,)) return expr def parse_expr_to_rpn(self, it, first_tok): opstack = [] out = [] operator_expected = False #print("---------------- parse expr to rpn") while True: tok = None if first_tok is not None: tok = first_tok first_tok = None else: tok = next(it, None) if tok is None: break #print(tok, operator_expected) #print(' ',opstack) #print(' ',out) if tok == ')' or tok == ',': # End of grouping parenthesis OR end of parameter noargs = False if tok == ',': if not operator_expected: self.error('Unexpected comma') elif not operator_expected: noargs = True popped_ops = False st_tok = None while len(opstack) >= 1: st_tok, st_op = opstack[-1] if st_tok == '%call' or st_tok == '(': break opstack.pop() out.append(st_tok) popped_ops = True else: # Reached at end of in enum value # (they end with a comma). # Also reached when there are unbalanced parentheses. break if tok == ',': # TODO does anything need to be done here? out.append('%nextarg') operator_expected = False else: opstack.pop() if noargs: out.append('%noargs') if st_tok == '%call': out.append('%call') elif operator_expected: try: op = c_infix_postfix_operators[tok] op_level, op_xfix, op_assoc = op # Pop operators with higher precedence level #print(' pop higher: ', tok, op_level) while len(opstack) >= 1: st_tok, st_op = opstack[-1] st_level, st_xfix, st_assoc = st_op if st_level < op_level: break elif st_level == op_level and st_assoc == 'R': break #print(' pop: ', st_tok, st_level) opstack.pop() if st_tok != '(': out.append(st_tok) opstack.append((tok, op)) operator_expected = False except KeyError: if tok == '(' or tok == '[': # Start of function call or array index. # Pop operators with higher precedence, # like the member operator. while len(opstack) >= 1: st_tok, st_op = opstack[-1] st_level, st_xfix, st_assoc = st_op if st_level < CALL_PRECEDENCE: break elif st_level == CALL_PRECEDENCE and \ st_assoc == 'R': break opstack.pop() out.append(st_tok) if tok == '(': opstack.append(('%call', CALL_PRECEDENCE)) else: opstack.append(('%arrind', CALL_PRECEDENCE)) operator_expected = False elif tok in [';', ']', '}']: # End of: bitfield size, array length, last enum value break else: self.error( 'Unexpected symbol \'%s\'. Expected operator' % (tok,)) else: # not operator_expected try: op = c_prefix_operators[tok] op_xfix = op[1] opstack.append((tok, op)) operator_expected = (op_xfix != 'prefix') except KeyError: c = tok[0] if (c == '"' or c == "'" or c == NUM_PREFIX or (c >= 'A' and c <= 'Z') or # identifiers (c >= 'a' and c <= 'z') or c == '_' or c == '%'): # types with keywords if self.is_type_token(tok) or c == '%': # Skip type cast if len(opstack) == 0 or opstack[-1][0] != '(': self.error('Unexpected type in expression' if c != '%' else 'Unexpected keyword in expression') decl, tok = self.parse_decl(tok, it) if decl.ident is not None: self.error('Cannot have declared identifier ' 'in type cast') if tok != ')': self.error('Expected \')\' at end of ' 'type cast') opstack.pop() else: # Terminal token (string, number or identifier) out.append(tok) operator_expected = True elif c == '(': # Start of grouping parenthesis # or type cast while len(opstack) >= 1: st_tok, st_op = opstack[-1] st_level, st_xfix, st_assoc = st_op if st_level <= CALL_PRECEDENCE: break opstack.pop() out.append(st_tok) opstack.append(('(', (0, '?', 'R'))) else: self.error('Unexpected symbol \'%s\'' % (tok,)) if not operator_expected: self.error('Unexpected end of expression') while len(opstack) >= 1: st_tok, st_op = opstack.pop() out.append(st_tok) return out, tok # = RPN def tok2bool(self, tok): if tok.startswith(NUM_PREFIX): # TODO check range return tok != NUM_ZERO else: self.error('Invalid value for boolean operation') def tok2int(self, tok): if tok.startswith(NUM_PREFIX): # TODO check range return int(tok.removeprefix(NUM_PREFIX)) elif tok.startswith("'"): return ord(tok[1]) else: self.error('Not an integer value') def scalar_op(self, op, a, b): if not a.startswith(NUM_PREFIX) or not b.startswith(NUM_PREFIX): # TODO implement short-circuiting properly, so type # checking can be enabled. if op in ['==','!=','<','<=','>','>=']: return False else: return 0 #self.error( # 'Operator %s requires scalar types. Was: \'%s\', \'%s\'' % # (op, a, b)) a = int(a[1:]) b = int(b[1:]) if op == '==': return a == b elif op == '!=': return a != b elif op == '<' : return a < b elif op == '<=': return a <= b elif op == '>' : return a > b elif op == '>=': return a >= b elif op == '+': return a + b elif op == '-': return a - b elif op == '*': return a * b elif op == '/': return a / b elif op == '%': return a % b elif op == '&': return a & b elif op == '|': return a | b elif op == '^': return a ^ b else: self.error('Unknown scalar operator') def eval_parsed_expr(self, expr): terms = [] for op in expr: #print(op) #print(' ', terms) if op == '||': b = terms.pop() a = terms.pop() res = self.tok2bool(a) or self.tok2bool(b) terms.append(bool2tok(res)) elif op == '&&': b = terms.pop() a = terms.pop() res = self.tok2bool(a) and self.tok2bool(b) terms.append(bool2tok(res)) elif op == '!': a = terms.pop() res = not self.tok2bool(a) terms.append(bool2tok(res)) elif op in ['==', '!=', '>', '>=', '<', '<=', '+', '-', '*', '/', '%', '&', '|', '^']: b = terms.pop() a = terms.pop() res = self.scalar_op(op, a, b) terms.append(bool2tok(res)) elif op == '%nextarg': terms.append(op) elif op == '%call': #print('CALL',terms) param = terms.pop() args = [] funcname = None if param == '%noargs': funcname = terms.pop() else: while True: args.append(param) param = terms.pop() if param != '%nextarg': funcname = param break #print('CALLEND',terms) #print('CALLFUNC',funcname) #print('CALLARGS',args) # FIXME 1. this is actually an operator. # FIXME 2. what is the order of evaluation of macros vs operators in pp-exprs? # FIXME 3. this should be unavailable in enum initialisers if funcname == 'defined': if len(args) != 1: self.error('Wrong number of arguments to defined()') else: name = args[0] res = bool2tok(name in self.defines) terms.append(res) else: self.error( 'Unknown macro or pre-processor function \'%s\'' % (funcname,)) else: terms.append(op) #print('RESULT:',terms, " of ", s) if len(terms) != 1: self.error('Expected exactly one subexpression after evaluation') return terms[0] def eval_ppexpr(self, s): expr = self.parse_exprstr_to_rpn(s) value = self.eval_parsed_expr(expr) return self.tok2bool(value) def define(self, name, value=None): if '(' in name: name, paramsstr = name.split('(', 1) if not paramsstr.endswith(')'): self.error('Function-like macro is missing \')\'') params = commaspace.split(paramsstr.removesuffix(')')) self.defines[name] = Define(self, params, value) else: self.defines[name] = Define(self, None, value) def predefines(self): self.define('__P9KCC__', None) for arg in self.options.initial_defines: arr = arg.split('=', 1) name = arr[0] value = None if len(arr) == 2: value = arr[1] self.define(name, value) def all_tokens(self): for fn in self.options.sourcefiles: self.src_filename = fn yield from self.preprocess(fn) # yields tokens def parse_typedef(self, it): tok = next(it) decl, tok = self.parse_decl(tok, it) self.scope.register_typedef(decl.ident, decl) return tok def parse_type_prefix(self, tok, it): type = Type(self) prefix = [] while tok == '*' or self.is_type_token(tok): if tok == '*': type.require_finalised() prefix.append(type) type = Type(self) type.set_kind('%pointer') elif tok in c_quals: type.add_qualifier(tok) elif tok in c_elementary_types: type.set_kind(tok) elif tok in c_tags: tagtype = tok type.set_kind(tagtype) tok = next(it) # Map tag identifier tagname = None if self.is_ident_token(tok): tagname = tok tok = next(it) tag = self.scope.map_tag_ident(tagtype, tagname) type.set_tag(tag) if tok != '{': continue # Parse enum/struct/union tag.parse_definition(it) elif self.scope.typedef_exists(tok): type.set_kind('%typedef') type.set_typedef(tok) else: self.error('Invalid token \'%s\' in type prefix' % (tok,)) tok = next(it) prefix.append(type) return prefix, tok def parse_type_suffix(self, tok, it, outer_type): suffix = [] while tok in ['[', '(']: if tok == '[': # Array tok = next(it) lengthexpr = None if tok == ']': lengthexpr = None else: lengthexpr, tok = self.parse_expr_to_rpn(it, tok) if tok != ']': self.error('Expected ] not \'%s\' after array length' % (tok,)) type = Type(self) type.set_kind('%array') type.set_arraylength(lengthexpr) suffix.append(type) elif tok == '(': # Function funcparams = [] self.enter_scope() tok = next(it) if tok == ')': self.error('Must specify \'void\' for zero-parameter ' 'function') is_varargs = False while True: if tok == '...': if len(funcparams) == 0: self.error('varargs function must have at least ' '1 parameter') is_varargs = True tok = next(it, '') if tok != ')': self.error('Expected \')\' after \'...\'') break paramdecl, tok = self.parse_decl(tok, it) if paramdecl.is_void(): if len(funcparams) != 0: self.error('void after other parameters') break funcparams.append(paramdecl) if tok == ')': break elif tok != ',': self.error('Expected \',\' or \')\' after parameter') tok = next(it) self.leave_scope() type = Type(self) type.set_kind('%funcparams') type.set_funcparams(funcparams) type.set_varargs(is_varargs) suffix.append(type) tok = next(it) # FIXME 1. this is only correct in basic cases # FIXME 2. also, neither this function or parse_type_prefix handl # parentheses in declarations, like `char (* stuff)()`. outer_type += suffix[::-1] return tok def parse_decl(self, tok, it): type, tok = self.parse_type_prefix(tok, it) ident = None if tok in decl_end_tokens: return Decl(self, type, None), tok elif self.is_ident_token(tok): ident = tok tok = next(it) if tok not in decl_end_tokens: tok = self.parse_type_suffix(tok, it, type) decl = Decl(self, type, ident) if ident is not None: # TODO add definition, and check if compatible if already exists pass if tok == '{': # Function body pass return decl, tok def parse_repeated_decl(self, tok, it, template_decl): decl, tok = self.parse_decl('%int', it) decl.type[0] = template_decl.type[0] return decl, tok def is_ident_token(self, tok): c = tok[0] return (c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') or c == '_' def is_type_token(self, tok): if tok in c_type_prefixes: return True elif self.scope.typedef_exists(tok): return True else: return False def parse_decl_or_expr(self, tok, it): if tok is None: tok = next(it, None) if tok == None: return False elif tok == '%typedef': tok = self.parse_typedef(it) if tok != ';': self.error('Parsed end of typedef but found no \';\'') return True # FIXME this does not detect all declarations if self.is_type_token(tok): # Declaration decl, tok = self.parse_decl(tok, it) while tok == ',': decl, tok = self.parse_repeated_decl( tok, it, decl) if tok == '{': self.parse_funcbody(it, decl) elif tok != ';': self.error('Parsed end of declaration but found no \';\'') return True else: # Expression if not self.in_func_body: self.error('Parsed this as a statement, but this isn\'t ' 'inside a function') self.parse_statement(tok, it) pass def parse_statement(self, tok, it): if tok == ';': pass else: expr, tok = self.parse_expr_to_rpn(it, tok) # TODO add expr to current bblock if tok != ';': self.error('Expected \';\' at end of statement') def parse_funcbody(self, it, funcdecl): functype = funcdecl.type[-1] if functype.kind != '%funcparams': self.error('Interpreted \'{\' as function body, but the declaration ' 'is not a function declaration') if self.scope.outer is not None: self.error('Function must be at top-level') self.enter_scope() for funcparam in functype.funcparams: self.scope.register_decl(funcparam.ident, funcparam) assert not self.in_func_body self.in_func_body = True while True: tok = next(it) if tok == '}': break elif tok is None: self.error('Reached EOF without closing \'}\' of \'%s\'' % (funcdecl.ident,)) self.parse_decl_or_expr(tok, it) self.in_func_body = False self.leave_scope() def enter_scope(self): scope = Scope(self, self.scope) self.scope = scope def leave_scope(self): assert self.scope.outer is not None self.scope = self.scope.outer def run(self): self.predefines() if self.options.preprocess_only: preprocessed_code = format(self.all_tokens()) if self.options.output == '-': outfile = sys.stdout else: outfile = open(self.options.output, 'w') print(preprocessed_code, file=outfile) return it = self.all_tokens() while self.parse_decl_or_expr(None, it): pass # TODO generate output code def yield_tokens(self, s): it = iter(s) nextch = None while True: ch = next(it, '') if nextch is None else nextch if ch == '': break nextch = next(it, '') combo = ch+nextch if combo in ['<<', '>>']: nextch = next(it, '') if nextch == '=': yield combo+nextch # <<= or >>= nextch = None # consume '=' else: yield combo # << or >> continue elif combo == '..': nextch = next(it, '') if nextch == '.': nextch = None yield '...' continue else: self.error('Bad token \'..\'') combo = ch+nextch if combo in ['==', '!=', '<=', '>=', '&&', '||', '++', '--', '+=', '-=', '*=', '/=', '~=', '|=', '&=', '^=', '%=', '->']: nextch = None # consume it yield combo elif ch in '+-*/%&|^~!=<>?:;,.()[]{}#': yield ch elif ch == '"' or ch == "'": # String start = ch string = '' ch = nextch nextch = None while True: if ch == '': self.error('Unexpected end of string') elif ch == start: break elif ch != '\\': string += ch else: ch = next(it, '') if ch == '\\' or ch == '"' or ch == "'": string += ch elif ch == 'n': string += '\n' elif ch == 'r': string += '\r' elif ch == 't': string += '\t' elif ch == '0': # TODO octal escapes string += '\0' elif ch == 'x' or ch == 'X': d1 = next(it, '') d2 = next(it, '') # FIXME the string is a UTF-16 string here. string += chr(int(d1+d2, 16)) else: self.error('Invalid escape') ch = next(it, '') yield start + string elif ch == ' ' or ch == '\t': pass elif (ch >= 'A' and ch <= 'Z') or \ (ch >= 'a' and ch <= 'z') or \ ch == '_': # Identifier, keyword or define/macro ident = ch while (nextch >= 'A' and nextch <= 'Z') or \ (nextch >= 'a' and nextch <= 'z') or \ (nextch >= '0' and nextch <= '9') or nextch == '_': ident += nextch nextch = next(it, '') is_macro = False if ident in self.defines: define = self.defines[ident] if define.params is not None and nextch == '(': # Macro with parameters args = [] arg = '' nextch = None plevel = 0 while True: ch = next(it, None) if ch == None: raise NotImplementedError('Line breaks inside ' 'argument lists of macro invocations ' 'are not yet implemented') elif ch == '(': plevel += 1 elif ch == ')': if plevel == 0: args.append(arg.strip()) break plevel -= 1 elif ch == '"' or ch == "'": # Strings could contain commas or parentheses, # so skip them entirely. quote = ch ch = next(it) while ch != quote: ch = next(it) if ch == '\\': ch = next(it) elif ch == ',': args.append(arg.strip()) arg = '' else: arg += ch yield from define.expand(args) is_macro = True elif define.params is None: # Define without parameters yield from define.expand(None) is_macro = True if is_macro: pass elif ident == '__FILE__': yield '"' + self.src_filename elif ident == '__LINE__': yield NUM_PREFIX + str(self.line) elif ident in c_keywords: yield '%' + ident else: yield ident elif ch == '0' and nextch == 'x': # Hex number nextch = next(it, '') num = '' while (nextch >= '0' and nextch <= '9') or \ (nextch >= 'A' and nextch <= 'F') or \ (nextch >= 'a' and nextch <= 'f'): num += nextch nextch = next(it, '') while nextch == 'U' or nextch == 'L': nextch = next(it, '') yield NUM_PREFIX + str(int(num, 16)) elif ch >= '0' and ch <= '9': # Number # (floating point is not implemented) num = ch while (nextch >= '0' and nextch <= '9'): num += nextch nextch = next(it, '') while nextch == 'U' or nextch == 'L': nextch = next(it, '') yield NUM_PREFIX + num else: self.error('Bad token') def bool2tok(b): return NUM_ONE if b else NUM_ZERO def escape_string(s): s = s.replace('\\', '\\\\') s = s.replace('\0','\\0') s = s.replace('\"', '\\"') s = s.replace("\'", "\\'") return s def format_token(tok): if tok.startswith('%'): # keyword return tok.removeprefix('%') elif tok.startswith(NUM_PREFIX): return tok.removeprefix(NUM_PREFIX) elif tok.startswith('"') or tok.startswith("'"): quote = tok[0] return quote + escape_string(tok[1:]) + quote else: return tok def format(tokens): s = '' indentlevel = 0 linebreak = '\n' space = '' last_tok = None for tok in tokens: # FIXME do/while gets formatted incorrectly # (with deceptive line break before while) if tok != '%else': s += (space if not tok in [',', ';'] else '') + format_token(tok) else: s = s.strip() + ' else' if tok == '{': indentlevel += 1 linebreak = '\n' + (' ' * (indentlevel * 4)) s += linebreak space = '' elif tok == '}': # FIXME this adds a line break in the last line of typedef struct indentlevel -= 1 linebreak = '\n' + (' ' * (indentlevel * 4)) if last_tok in [';', '}']: s = s[:-1].strip() + linebreak + '}' s += linebreak space = '' elif tok == ';': s += linebreak space = '' elif tok in [',']: space = ' ' else: space = ' ' last_tok = tok return s class Define: def __init__(self, compiler, params=None, value=None): self.compiler = compiler self.params = params self.value = value def expand(self, args=None): if self.params is not None and args is None: self.compiler.error("Expected arguments to macro") elif self.params is not None and len(self.params) != len(args): self.compiler.error( "Wrong number of arguments to macro, was %d but expected %d" % (len(args), len(self.params))) elif self.value is None: pass # Nothing to expand elif self.params is None: assert args is None yield from self.compiler.yield_tokens(self.value) else: make_string = False for tok in self.compiler.yield_tokens(self.value): if tok == '#': make_string = True continue try: i = self.params.index(tok) if make_string: yield '"' + args[i] make_string = False else: yield from self.compiler.yield_tokens(args[i]) except ValueError: yield tok class Decl: def __init__(self, compiler, type, ident): self.compiler = compiler self.type = type self.ident = ident def __repr__(self): return 'Decl(%s, %s)' % (repr(self.type), self.ident) def is_void(self): #print(self.type) return len(self.type) == 1 and self.type[0].kind == '%void' class EnumValueDecl(Decl): def __init__(self, compiler, ident, num): super().__init__(compiler, INT_DECLTYPE, ident) self.num = num class Type: def __init__(self, compiler): self.compiler = compiler self.kind = None self.arraylength = None self.funcparams = None self.is_varargs = False self.typedef = None self.tag = None self.quals = set() def __repr__(self): if self.kind == '%funcparams': return 'Type(%s, %s)' % (self.kind, self.funcparams) elif self.kind == '%array': return 'Type(%s, %s)' % (self.kind, self.arraylength) else: return 'Type(%s)' % (self.kind,) def set_kind(self, kind): assert kind is not None if self.kind is None: self.kind = kind elif self.kind in ['%unsigned','%signed']: if kind != '%int': self.kind += ' ' + kind else: self.compiler.error('Repeated type') def add_qualifier(self, tok): if tok in self.quals: self.compiler.error('Duplicate qualifier') self.quals.add(tok) def set_arraylength(self, lengthexpr): if lengthexpr != None: if len(lengthexpr) != 1: self.compiler.error('Array length is too complex') lenvalue = lengthexpr[0] if lenvalue.startswith(NUM_PREFIX): lenvalue = int(lenvalue.removeprefix(NUM_PREFIX)) else: # FIXME hack to be able to support enum values as array lengths try: decl = self.compiler.scope.search_decl(lenvalue) lenvalue = decl.num except KeyError: self.compiler.error('Array length must be constant integer') self.arraylength = lenvalue def set_funcparams(self, funcparams): assert funcparams is not None self.funcparams = funcparams def set_varargs(self, is_varargs): self.is_varargs = is_varargs def set_typedef(self, typedef): assert typedef is not None self.typedef = typedef def set_tag(self, tag): assert tag is not None self.tag = tag def require_finalised(self): if self.kind is None: self.compiler.error('Must specify root type before pointer') inttype = Type(None) inttype.set_kind('%int') INT_DECLTYPE = [inttype] del inttype class Tag: def __init__(self, compiler, tagtype, ident): self.compiler = compiler self.tagtype = tagtype self.ident = ident self.scope = compiler.scope def parse_definition(self, it): if self.tagtype == '%struct' or self.tagtype == '%union': self.parse_fields(it) else: self.parse_enumvalues(it) def parse_fields(self, it): self.fields_list = [] self.fields_map = {} tok = next(it) while tok != '}': fielddecl, tok = self.compiler.parse_decl(tok, it) while True: name = fielddecl.ident if name in self.fields_map: self.compiler.error('Duplicate field \'%s\'' % (name,)) self.fields_map[name] = fielddecl self.fields_list.append(fielddecl) if tok == ':': # Bitfields. For now, these are just ignored tok = next(it) bitsize, tok = self.compiler.parse_expr_to_rpn(it, tok) if tok == ',': fielddecl, tok = self.compiler.parse_repeated_decl( tok, it, fielddecl) else: break if tok != ';': self.compiler.error('Expected \';\' after field') tok = next(it) if len(self.fields_list) == 0: self.error('C89 does not allow empty %ss' % (self.tagtype,)) def parse_enumvalues(self, it): self.enumvalues_list = [] tok = next(it) num = 0 while True: name = tok tok = next(it) if tok == '=': tok = next(it) numexpr, tok = self.compiler.parse_expr_to_rpn(it, tok) numtok = self.compiler.eval_parsed_expr(numexpr) num = self.compiler.tok2int(numtok) self.add_enumvalue(name, num) if tok == ',': tok = next(it) if tok == '}': self.error('C89 does not allow trailing comma') num += 1 elif tok == '}': break else: self.error('Unexpected token in enum') def add_enumvalue(self, name, num): self.enumvalues_list.append(name) val = EnumValueDecl(self.compiler, name, num) self.scope.register_decl(name, val) class Scope: def __init__(self, compiler, outer): self.compiler = compiler self.outer = outer self.typedefs = {} self.decls = {} self.tags = {} def typedef_exists(self, name): try: self.search_typedef(name) return True except KeyError: return False def search_decl(self, name): if name in self.decls: return self.decls[name] elif self.outer is not None: self.outer.search_decl(name) else: raise KeyError('Decl \'%s\' not found' % (name,)) def search_typedef(self, name): if name in self.typedefs: return self.typedefs[name] elif self.outer is not None: self.outer.search_typedef(name) else: raise KeyError('Typedef \'%s\' not found' % (name,)) def register_typedef(self, name, decl): if name in self.typedefs: self.compiler.error('Type \'%s\' already exists' % (name,)) self.typedefs[name] = decl def register_decl(self, name, decl): if name in self.decls: self.compiler.error('Identifier \'%s\' is already declared' % (name,)) self.decls[name] = decl def search_tag_ident(self, name): if name in self.tags: return self.tags[name] elif self.outer is not None: return self.outer.search_tag_ident(name) else: raise KeyError('Tag not found') def map_tag_ident(self, tagtype, name): if name is None: return Tag(self.compiler, tagtype, None) try: tag = self.search_tag_ident(name) if tag.tagtype != tagtype: self.compiler.error('Referenced tag \'%s\' as %s but it ' 'is %s' % (name, tagtype, tag.tagtype)) return tag except KeyError: tag = Tag(self.compiler, tagtype, name) self.tags[name] = tag return tag class CSyntaxError(Exception): def __init__(self, filename, line, message): self.file = filename self.line = line self.message = message if __name__ == '__main__': cc = Ds9kCCompiler() cc.init_from_argv() try: cc.run() except CSyntaxError as e: red = '' norm = '' if sys.stderr.isatty(): red = '\x1b[1;31m' norm = '\x1b[0m' if e.file is not None: msg = "%s:%d: %serror%s: %s" % (e.file, e.line, red, norm, e.message) exitstatus = 1 else: msg = "%scommand-line error%s: %s" % (red, norm, e.message) exitstatus = 2 print(msg, file=sys.stderr) sys.exit(exitstatus) except Exception as e: # ICE go home! e.add_note("Internal compiler error at %s:%d" % (cc.src_filename, cc.line)) raise e except KeyboardInterrupt as e: e.add_note("Aborted at %s:%d" % (cc.src_filename, cc.line)) raise e