/* builtins.c -- Builtin types. Copyright © 2011-2016 Samuel Lidén Borell Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "builtins.h" #include "parser.h" #include "misc.h" #include /* forward declarations */ static const LRLASTDefList deflist_false, deflist_true; static const LRLIdent bool_scope; static const LRLIdent ident_true, ident_false; #define BTTOK(name, length) { LRL_TT_Ident, { name, length } } static const LRLToken typetokens[LRL_BT_NumTypes] = { /** count: Can contain any array index. Equivalent to size_t in C */ BTTOK("count", 5), BTTOK("wcount", 6), /* with wrap-around */ /** uint: Default unsigned integer type. At least 16 bits. */ BTTOK("uint", 4), BTTOK("wuint", 5), /* with wrap-around */ /** int: Default signed integer type. At least 16 bits. */ BTTOK("int", 3), /** eint: Either signed or unsigned, i.e. with the common n-1 bits of uint and int. */ BTTOK("eint", 4), /* Larger and smaller platform specific integer types. Primarily intented to be used for C compatibility. */ BTTOK("short", 5), BTTOK("ushort", 6), BTTOK("wushort", 7), BTTOK("eshort", 6), BTTOK("long", 4), BTTOK("ulong", 5), BTTOK("wulong", 6), BTTOK("elong", 5), BTTOK("longlong", 8), BTTOK("ulonglong", 9), BTTOK("wulonglong", 10), BTTOK("elonglong", 9), /* Integer types with certain sizes */ BTTOK("char", 4), /* same as byte, but intended for UTF-8 strings */ BTTOK("byte", 4), /* like wuint8, but can be assigned to/from int8 */ BTTOK("uint8", 5), BTTOK("wuint8", 6), BTTOK("int8", 4), BTTOK("eint8", 5), BTTOK("uint16", 6), BTTOK("wuint16", 7), BTTOK("int16", 5), BTTOK("eint16", 6), BTTOK("uint32", 6), BTTOK("wuint32", 7), BTTOK("int32", 5), BTTOK("eint32", 6), BTTOK("uint64", 6), BTTOK("wuint64", 7), BTTOK("int64", 5), BTTOK("eint64", 6), BTTOK("uint128", 7), BTTOK("wuint128", 8), BTTOK("int128", 6), BTTOK("eint128", 7), /* Floating point types */ BTTOK("float", 5), /* largest "native" float */ BTTOK("float16", 7), BTTOK("float32", 7), BTTOK("float64", 7), BTTOK("float80", 7), BTTOK("float128", 8), /* Platform specific */ BTTOK("cfloat", 6), BTTOK("cdouble", 7), BTTOK("clongdouble", 11), /* Boolean type (needed by if/while/etc.) */ BTTOK("bool", 4), }; #define BT(bt) { LRL_AST_Type_Builtin, \ LRL_InternQual_ConstMemory, LRL_UNIQUEID_UNSET, \ &typetokens[bt], &typetokens[bt], \ { { (void*)(bt), NULL, NULL, NULL } } } static const LRLASTType types[LRL_BT_NumTypes] = { BT(LRL_BT_count), BT(LRL_BT_wcount), BT(LRL_BT_uint), BT(LRL_BT_wuint), BT(LRL_BT_int), BT(LRL_BT_eint), BT(LRL_BT_short), BT(LRL_BT_ushort), BT(LRL_BT_wushort), BT(LRL_BT_eshort), BT(LRL_BT_long), BT(LRL_BT_ulong), BT(LRL_BT_wulong), BT(LRL_BT_elong), BT(LRL_BT_longlong), BT(LRL_BT_ulonglong), BT(LRL_BT_wulonglong), BT(LRL_BT_elonglong), BT(LRL_BT_char), BT(LRL_BT_byte), BT(LRL_BT_uint8), BT(LRL_BT_wuint8), BT(LRL_BT_int8), BT(LRL_BT_eint8), BT(LRL_BT_uint16), BT(LRL_BT_wuint16), BT(LRL_BT_int16), BT(LRL_BT_eint16), BT(LRL_BT_uint32), BT(LRL_BT_wuint32), BT(LRL_BT_int32), BT(LRL_BT_eint32), BT(LRL_BT_uint64), BT(LRL_BT_wuint64), BT(LRL_BT_int64), BT(LRL_BT_eint64), BT(LRL_BT_uint128), BT(LRL_BT_wuint128), BT(LRL_BT_int128), BT(LRL_BT_eint128), BT(LRL_BT_float), BT(LRL_BT_float16), BT(LRL_BT_float32), BT(LRL_BT_float64), BT(LRL_BT_float80), BT(LRL_BT_float128), BT(LRL_BT_cfloat), BT(LRL_BT_cdouble), BT(LRL_BT_clongdouble), { LRL_AST_Type_Enum, LRL_InternQual_ConstMemory, LRL_UNIQUEID_UNSET, &typetokens[LRL_BT_bool], &typetokens[LRL_BT_bool], { { (void*)&deflist_false, (void*)&bool_scope, (void*)&types[LRL_BT_int], NULL } } } }; /* Type info */ #define S LRL_BTG_signed #define U LRL_BTG_unsigned #define W LRL_BTG_wrapping #define E LRL_BTG_eint #define F LRL_BTG_float #define B LRL_BTG_bool const LRLBuiltinInfo lrl_builtin_info[LRL_BT_NumTypes] = { /* INTEGER FLOAT NOT min max min max SGN NUM SER IND BTG */ { 16,255, 0, 0, 0, 0, 3, 1, U }, /* count */ { 16,255, 0, 0, 0, 0, 3, 1, W }, /* wcount */ { 16,255, 0, 0, 0, 0, 1, 3, U }, /* uint */ { 16,255, 0, 0, 0, 0, 1, 3, W }, /* wuint */ { 15,255, 0, 0, 1, 0, 1, 3, S }, /* int */ { 15,255, 0, 0, 0, 0, 1, 3, E }, /* eint */ { 15,255, 0, 0, 1, 0, 1, 2, S }, /* short */ { 16,255, 0, 0, 0, 0, 1, 2, U }, /* ushort */ { 16,255, 0, 0, 0, 0, 1, 2, W }, /* wushort */ { 15,255, 0, 0, 0, 0, 1, 2, E }, /* eshort */ { 31,255, 0, 0, 1, 0, 1, 4, S }, /* long */ { 32,255, 0, 0, 0, 0, 1, 4, U }, /* ulong */ { 32,255, 0, 0, 0, 0, 1, 4, W }, /* wulong */ { 31,255, 0, 0, 0, 0, 1, 4, E }, /* elong */ { 63,255, 0, 0, 1, 0, 1, 5, S }, /* longlong */ { 64,255, 0, 0, 0, 0, 1, 5, U }, /* ulonglong */ { 64,255, 0, 0, 0, 0, 1, 5, W }, /* wulonglong */ { 63,255, 0, 0, 0, 0, 1, 5, E }, /* elonglong */ { 8, 8, 0, 0, 0, 0, 0, 0, U }, /* char */ { 8, 8, 0, 0, 0, 0, 0, 0, U }, /* byte */ { 8, 8, 0, 0, 0, 0, 0, 0, U }, /* uint8 */ { 8, 8, 0, 0, 0, 0, 0, 0, W }, /* wuint8 */ { 7, 7, 0, 0, 1, 0, 0, 0, S }, /* int8 */ { 7, 7, 0, 0, 0, 0, 0, 0, E }, /* eint8 */ { 16, 16, 0, 0, 0, 0, 0, 0, U }, /* uint16 */ { 16, 16, 0, 0, 0, 0, 0, 0, W }, /* wuint16 */ { 15, 15, 0, 0, 1, 0, 0, 0, S }, /* int16 */ { 15, 15, 0, 0, 0, 0, 0, 0, E }, /* eint16 */ { 32, 32, 0, 0, 0, 0, 0, 0, U }, /* uint32 */ { 32, 32, 0, 0, 0, 0, 0, 0, W }, /* wuint32 */ { 31, 31, 0, 0, 1, 0, 0, 0, S }, /* int32 */ { 31, 31, 0, 0, 0, 0, 0, 0, E }, /* eint32 */ { 64, 64, 0, 0, 0, 0, 0, 0, U }, /* uint64 */ { 64, 64, 0, 0, 0, 0, 0, 0, W }, /* wuint64 */ { 63, 63, 0, 0, 1, 0, 0, 0, S }, /* int64 */ { 63, 63, 0, 0, 0, 0, 0, 0, E }, /* eint64 */ {128,128, 0, 0, 0, 0, 0, 0, U }, /* uint128 */ {128,128, 0, 0, 0, 0, 0, 0, W }, /* wuint128 */ {127,127, 0, 0, 1, 0, 0, 0, S }, /* int128 */ {127,127, 0, 0, 0, 0, 0, 0, E }, /* eint128 */ { 24,255, 32,255, 1, 0, 0, 0, F }, /* float */ { 11, 11, 16, 16, 1, 0, 0, 0, F }, /* float16 - IEEE 754-2008 */ { 24, 24, 32, 32, 1, 0, 0, 0, F }, /* float32 - IEEE 754 */ { 53, 53, 64, 64, 1, 0, 0, 0, F }, /* float64 - IEEE 754 */ { 64, 64, 80, 80, 1, 0, 0, 0, F }, /* float80 - x86 extended */ {113,113,128,128, 1, 0, 0, 0, F }, /* float128 - IEEE 754-2008 */ { 24,255, 32,255, 1, 0, 2, 2, F }, /* cfloat */ { 53,255, 64,255, 1, 0, 2, 3, F }, /* cdouble */ { 53,255, 64,255, 1, 0, 2, 4, F }, /* clongdouble */ { 0, 0, 0, 0, 0, 1, 0, 0, B }, /* bool */ }; #undef S #undef U #undef W #undef E #undef F #undef B /* Boolean type */ const LRLToken lrl_builtin_token_zero = { LRL_TT_Integer, { "0", 1 } }; const LRLToken lrl_builtin_token_one = { LRL_TT_Integer, { "1", 1 } }; static const LRLToken linkname_false = { LRL_TT_String, { "\"false\"", 6 } }; static const LRLToken linkname_true = { LRL_TT_String, { "\"true\"", 5 } }; static const LRLASTExpr expr_zero = { LRL_AST_Value_Scalar, { LRL_Qual_Const, NULL, &types[LRL_BT_int] }, &lrl_builtin_token_zero, &lrl_builtin_token_zero, { { (void*)&lrl_builtin_token_zero, (void*)0, NULL, NULL, NULL } } }; static const LRLASTExpr expr_one = { LRL_AST_Value_Scalar, { LRL_Qual_Const, NULL, &types[LRL_BT_int] }, &lrl_builtin_token_one, &lrl_builtin_token_one, { { (void*)&lrl_builtin_token_one, (void*)0, NULL, NULL, NULL } } }; static const LRLIdent ident_false = { LRL_HASH_OF_FALSE, #if LRL_HASH_OF_TRUE >= LRL_HASH_OF_FALSE (LRLIdent*)&ident_true, #else NULL, #endif LRL_IdFl_EnumValue, NULL, (LRLASTDefOrStmt*)&deflist_false.def, "false", NULL, &bool_scope, &linkname_false, 0, NULL, { 0, 0, 0, NULL } }; static const LRLIdent ident_true = { LRL_HASH_OF_TRUE, #if LRL_HASH_OF_TRUE < LRL_HASH_OF_FALSE (LRLIdent*)&ident_false, #else NULL, #endif LRL_IdFl_EnumValue, NULL, (LRLASTDefOrStmt*)&deflist_true.def, "true", NULL, &bool_scope, &linkname_true, 0, NULL, { 0, 0, 0, NULL } }; static const LRLASTDefList deflist_false = { (LRLASTDefList*)&deflist_true, { LRL_AST_Def_Data, { { (void*)&ident_false, (void*)LRL_DeFl_Internal_DefinedByBackend, (void*)&types[LRL_BT_bool], (void*)&expr_zero } } } }; static const LRLASTDefList deflist_true = { NULL, { LRL_AST_Def_Data, { { (void*)&ident_true, (void*)LRL_DeFl_Internal_DefinedByBackend, (void*)&types[LRL_BT_bool], (void*)&expr_one } } } }; static const LRLASTDef typedef_bool = { LRL_AST_Def_Type, { { NULL, /* Identifier. It should point to the "bool" identifier actually */ (void*)LRL_DeFl_Alias, /* XXX ugly cast. is it safe? */ (LRLASTType*)&types[LRL_BT_bool], NULL } } }; #if LRL_HASH_OF_TRUE < LRL_HASH_OF_FALSE static const LRLIdent *const bool_bucket = &ident_true; #else static const LRLIdent *const bool_bucket = &ident_false; #endif static const LRLIdent bool_scope = { /* also used as a template for initializing the "real" bool identifier */ 0, NULL, 0, NULL, (LRLASTDefOrStmt*)&typedef_bool, "bool", NULL, NULL, NULL, 0, NULL, { 1, 0, 2, (LRLIdent**)&bool_bucket } }; const char *lrl_builtin_get_name(LRLBuiltinType type) { return typetokens[type].loc.start; /* null terminated */ } /* do not free or modify the returned type! */ struct LRLASTType *lrl_builtin_get_type(LRLBuiltinType type) { return (LRLASTType*)&types[type]; } int lrl_is_builtin(const struct LRLASTType *type) { return (uintptr_t)type >= (uintptr_t)&types[0] && (uintptr_t)type < (uintptr_t)&types[LRL_BT_NumTypes]; } const LRLIdent *lrl_builtin_get_bool_value(int value) { return value ? &ident_true : &ident_false; } void lrl_builtins_add(LRLIdent *root) { size_t i; LRLASTDefOrStmt *defs = malloc(sizeof(LRLASTDefOrStmt)*LRL_BT_NumTypes); /* Add builtin types */ for (i = 0; i < LRL_BT_NumTypes; i++) { /* Insert an identifier */ LRLIdent *ident = lrl_ident_insert_string( NULL, root, (char*)typetokens[i].loc.start); /* Create a definitions */ if (i != LRL_BT_bool) { defs[i].ast_type = LRL_AST_Def_Type; defs[i].def.kind.type.ident = ident; defs[i].def.kind.type.flags = LRL_DeFl_Alias; defs[i].def.kind.type.type = (LRLASTType*)&types[i]; defs[i].def.kind.type.typenames = NULL; ident->def_node = &defs[i]; } else { ident->def_node = (LRLASTDefOrStmt*)&typedef_bool; ident->contents = bool_scope.contents; } } } /* Map of decimal literals to integer types */ typedef struct { LRLBuiltinType bt; /* integer type to map to */ int len; /* maximum number (length + string) */ const char *number; } IntSize; static const IntSize intsizes_decpos[] = { { LRL_BT_eint8, 3, "127" }, { LRL_BT_uint8, 3, "255" }, { LRL_BT_eint16, 5, "32767" }, { LRL_BT_uint16, 5, "65535" }, { LRL_BT_eint32, 10, "2147483647" }, { LRL_BT_uint32, 10, "4294967295" }, { LRL_BT_eint64, 19, "9223372036854775807" }, { LRL_BT_uint64, 20, "18446744073709551615" }, { LRL_BT_eint128, 39, "170141183460469231731687303715884105727" }, { LRL_BT_uint128, 39, "340282366920938463463374607431768211455" }, { 0, 0, NULL } }; static const IntSize intsizes_decneg[] = { { LRL_BT_int8, 3, "128" }, { LRL_BT_int16, 5, "32768" }, { LRL_BT_int32, 10, "2147483648" }, { LRL_BT_int64, 19, "9223372036854775808" }, { LRL_BT_int128, 39, "170141183460469231731687303715884105728" }, { 0, 0, NULL } }; static const IntSize intsizes_hexpos[] = { { LRL_BT_eint8, 2, "7f" }, { LRL_BT_uint8, 2, "ff" }, { LRL_BT_eint16, 4, "7fff" }, { LRL_BT_uint16, 4, "ffff" }, { LRL_BT_eint32, 8, "7fffffff" }, { LRL_BT_uint32, 8, "ffffffff" }, { LRL_BT_eint64, 16, "7fffffffffffffff" }, { LRL_BT_uint64, 16, "ffffffffffffffff" }, { LRL_BT_eint128, 32, "ffffffffffffffffffffffffffffffff" }, { LRL_BT_uint128, 32, "ffffffffffffffffffffffffffffffff" }, { 0, 0, NULL } }; static const IntSize intsizes_hexneg[] = { { LRL_BT_int8, 2, "80" }, { LRL_BT_int16, 4, "8000" }, { LRL_BT_int32, 8, "80000000" }, { LRL_BT_int64, 16, "8000000000000000" }, { LRL_BT_int128, 32, "80000000000000000000000000000000" }, { 0, 0, NULL } }; /** * Determines the smallest type that fits an integer literal. * The code location start pointer should not include any +/- sign. */ LRLBuiltinType lrl_builtin_determine_int_type(const LRLCodeLocation *loc, int negative) { const char *s = loc->start; size_t charsleft = loc->length; size_t leadingzeros; int hex = 0; const IntSize *sizes; char ch; char normalized[41]; char *normp; int normlen = 0; /* Detect 0x prefix */ if (s[0] == '0' && s[1] == 'x') { hex = 1; s += 2; charsleft -= 2; } /* Skip leading zeroes */ leadingzeros = strspn(s, "0"); s += leadingzeros; charsleft -= leadingzeros; /* Read number and normalize it */ normp = normalized; while (charsleft-- && normlen < 40 && strchr("0123456789abcdefABCDEF_", *s)) { ch = *(s++); if (ch == '_') continue; /* remove underscores */ normlen++; if (ch >= 'A' && ch <= 'F') ch |= 0x20; /* to lowercase */ *(normp++) = ch; } if (!normlen) { /* Only zeros */ return LRL_BT_eint8; } *normp = '\0'; /* Look up type */ if (!hex) { if (!negative) sizes = intsizes_decpos; else sizes = intsizes_decneg; } else { if (!negative) sizes = intsizes_hexpos; else sizes = intsizes_hexneg; } for (; sizes->len; sizes++) { int cmp; if (normlen > sizes->len) continue; if (normlen < sizes->len) return sizes->bt; /* works with hex also due to ASCII digits < letters */ cmp = strncmp(normalized, sizes->number, normlen); if (cmp <= 0) return sizes->bt; } return LRL_BT_INVALID; } /** * Returns 1 if the "to" type can accept a value of the "from" type. */ int lrl_builtin_target_is_superset(LRLBuiltinType to, LRLBuiltinType from) { const LRLBuiltinInfo t = lrl_builtin_info[to]; const LRLBuiltinInfo f = lrl_builtin_info[from]; if (to == from) return 1; /* bool <--> non-bool */ if (t.special) return f.special == t.special; else if (f.special) return 0; /* signed -> unsigned */ if (f.is_signed && !t.is_signed) return 0; /* same series, and higher index => OK */ if (t.series && t.series == f.series && t.index >= f.index) return 1; /* smaller target type */ if (t.min_int < f.max_int || t.min_float < f.max_float) return 0; return 1; }