Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- diff -pruN tcc-0.9.26/i386-asm.h tcc-0.9.26o/i386-asm.h
- --- tcc-0.9.26/i386-asm.h 2013-02-16 00:24:00 +1000
- +++ tcc-0.9.26o/i386-asm.h 2013-09-05 23:14:30 +1000
- @@ -162,8 +162,8 @@ ALT(DEF_ASM_OP2(lgs, 0x0fb5, 0, OPC_MODR
- ALT(DEF_ASM_OP2(addb, 0x00, 0, OPC_ARITH | OPC_MODRM | OPC_BWL, OPT_REG, OPT_EA | OPT_REG)) /* XXX: use D bit ? */
- ALT(DEF_ASM_OP2(addb, 0x02, 0, OPC_ARITH | OPC_MODRM | OPC_BWL, OPT_EA | OPT_REG, OPT_REG))
- ALT(DEF_ASM_OP2(addb, 0x04, 0, OPC_ARITH | OPC_BWL, OPT_IM, OPT_EAX))
- -ALT(DEF_ASM_OP2(addb, 0x80, 0, OPC_ARITH | OPC_MODRM | OPC_BWL, OPT_IM, OPT_EA | OPT_REG))
- ALT(DEF_ASM_OP2(addw, 0x83, 0, OPC_ARITH | OPC_MODRM | OPC_WL, OPT_IM8S, OPT_EA | OPT_REG))
- +ALT(DEF_ASM_OP2(addb, 0x80, 0, OPC_ARITH | OPC_MODRM | OPC_BWL, OPT_IM, OPT_EA | OPT_REG))
- ALT(DEF_ASM_OP2(testb, 0x84, 0, OPC_MODRM | OPC_BWL, OPT_EA | OPT_REG, OPT_REG))
- ALT(DEF_ASM_OP2(testb, 0x84, 0, OPC_MODRM | OPC_BWL, OPT_REG, OPT_EA | OPT_REG))
- diff -pruN tcc-0.9.26/i386-gen.c tcc-0.9.26o/i386-gen.c
- --- tcc-0.9.26/i386-gen.c 2013-02-16 00:24:00 +1000
- +++ tcc-0.9.26o/i386-gen.c 2013-09-25 23:47:38 +1000
- @@ -21,7 +21,7 @@
- #ifdef TARGET_DEFS_ONLY
- /* number of available registers */
- -#define NB_REGS 4
- +#define NB_REGS 5
- #define NB_ASM_REGS 8
- /* a register can belong to several classes. The classes must be
- @@ -33,6 +33,7 @@
- #define RC_ST0 0x0008
- #define RC_ECX 0x0010
- #define RC_EDX 0x0020
- +#define RC_EBX 0x0040
- #define RC_IRET RC_EAX /* function return: integer register */
- #define RC_LRET RC_EDX /* function return: second integer register */
- #define RC_FRET RC_ST0 /* function return: float register */
- @@ -42,6 +43,7 @@ enum {
- TREG_EAX = 0,
- TREG_ECX,
- TREG_EDX,
- + TREG_EBX,
- TREG_ST0,
- };
- @@ -84,6 +86,17 @@ enum {
- #define ELF_PAGE_SIZE 0x1000
- /******************************************************/
- +/* Optimizations */
- +
- +#define OPT_PROLOG 1 /* reduce the size of the prolog and epilog */
- +#define OPT_JUMPS 2 /* remove redundant jumps, change near to short */
- +#define OPT_REGS 4 /* occasionally re-use existing values */
- +#define OPT_MULTS 8 /* use LEA for multiplication, reciprocal for ptr */
- +#define OPT_SIZE 16 /* remove function padding, only inline small epilog */
- +
- +#define NB_INT_REGS 4 /* don't work with float registers */
- +
- +/******************************************************/
- #else /* ! TARGET_DEFS_ONLY */
- /******************************************************/
- #include "tcc.h"
- @@ -92,14 +105,59 @@ ST_DATA const int reg_classes[NB_REGS] =
- /* eax */ RC_INT | RC_EAX,
- /* ecx */ RC_INT | RC_ECX,
- /* edx */ RC_INT | RC_EDX,
- + /* ebx */ RC_INT | RC_EBX,
- /* st0 */ RC_FLOAT | RC_ST0,
- };
- -static unsigned long func_sub_sp_offset;
- -static int func_ret_sub;
- +static unsigned long func_sub_sp_offset, prolog_reloc_offset;
- +static int func_ret_sub, func_uses_args, func_uses_ebx;
- +static int push_load;
- +static int **jmplst, nb_jmplst;
- +static int ins_ind, ins_len;
- +static int epilog_ind;
- +
- +typedef struct CacheValue {
- + SValue ld; /* loaded register from this value */
- + SValue st; /* stored register to this value */
- +} CacheValue;
- +static CacheValue rv_cache[NB_INT_REGS];
- +static int cache_used;
- +
- +static int q_r[2], q_state;
- +static SValue q_lv[2], q_sv[2];
- +static CacheValue q_c[NB_INT_REGS];
- +enum { Q_IGNORE = -1,
- + Q_READY = 0,
- + Q_L1 = 1,
- + Q_S1 = 2,
- + Q_L2 = 4,
- + Q_S2 = 8,
- + Q_L = Q_L1,
- + Q_LS = Q_L1 | Q_S1,
- + Q_LL = Q_L1 | Q_L2,
- + Q_LLS = Q_LS | Q_L2,
- + Q_LLSS = Q_LLS | Q_S2
- +};
- +
- +typedef struct OfsValue {
- + int indc; /* index of add constant */
- + int lenc; /* its size */
- + int indr; /* index of add register (size is always 2) */
- + int r; /* register (if indr != 0) */
- + int c; /* constant (if indc != 0) */
- + CacheValue v; /* previous value of register */
- +} OfsValue;
- +OfsValue o_v[NB_INT_REGS];
- +
- #ifdef CONFIG_TCC_BCHECK
- static unsigned long func_bound_offset;
- #endif
- +static void loadc(int);
- +static void storec(int);
- +static void store_const(int);
- +static void jmpopt(void);
- +static void adjust_reloc(int t, int s);
- +static void adjust_oind(int t, int s);
- /* XXX: make it faster ? */
- ST_FUNC void g(int c)
- @@ -114,12 +172,45 @@ ST_FUNC void g(int c)
- ST_FUNC void o(unsigned int c)
- {
- + if (q_state > Q_READY)
- + flushq();
- + if (tcc_state->optimize & OPT_JUMPS) {
- + if (ind > ins_ind) { /* ignore backtracking */
- + if (ind - ins_ind <= 15) /* ignore padding & prolog */
- + ins_len = (ins_len << 4) | (ind - ins_ind);
- + ins_ind = ind;
- + }
- + }
- + if (tcc_state->optimize & OPT_REGS) {
- + if (cache_ind != ind && cache_used) {
- + if (cache_used == 2) {
- + /* ebx is preserved across a call */
- + SValue ebx = rv_cache[TREG_EBX].ld;
- + CacheValue qebx = q_c[TREG_EBX];
- + memset(rv_cache, -1, sizeof(rv_cache));
- + memset(q_c, -1, sizeof(q_c));
- + rv_cache[TREG_EBX].ld = ebx;
- + q_c[TREG_EBX] = qebx;
- + } else {
- + memset(rv_cache, -1, sizeof(rv_cache));
- + memset(q_c, -1, sizeof(q_c));
- + }
- + memset(o_v, 0, sizeof(o_v));
- + cache_used = 0;
- + }
- + }
- while (c) {
- g(c);
- c = c >> 8;
- }
- }
- +ST_FUNC void og(int c, int v)
- +{
- + o(c);
- + g(v);
- +}
- +
- ST_FUNC void gen_le16(int v)
- {
- g(v);
- @@ -148,9 +239,19 @@ ST_FUNC void gsym_addr(int t, int a)
- ST_FUNC void gsym(int t)
- {
- + if (q_state > Q_READY)
- + flushq();
- gsym_addr(t, ind);
- }
- +static void add_jump(int t, int s)
- +{
- + int *j = tcc_malloc(2 * sizeof(int));
- + j[0] = t;
- + j[1] = s;
- + dynarray_add((void ***)&jmplst, &nb_jmplst, j);
- +}
- +
- /* psym is used to put an instruction with a data field which is a
- reference to a symbol. It is in fact the same as oad ! */
- #define psym oad
- @@ -161,6 +262,10 @@ ST_FUNC int oad(int c, int s)
- int ind1;
- o(c);
- + if (tcc_state->optimize & OPT_JUMPS) {
- + if (c == 0xe9 || (c & 0xf0ff) == 0x800f)
- + add_jump(ind, c == 0xe9 ? ins_len : (ins_len & 15));
- + }
- ind1 = ind + 4;
- if (ind1 > cur_text_section->data_allocated)
- section_realloc(cur_text_section, ind1);
- @@ -185,45 +290,242 @@ ST_FUNC void gen_addrpc32(int r, Sym *sy
- gen_le32(c - 4);
- }
- -/* generate a modrm reference. 'op_reg' contains the addtionnal 3
- +/* generate a modrm reference. 'op_reg' contains the additional 3
- opcode bits */
- -static void gen_modrm(int op_reg, int r, Sym *sym, int c)
- +static void gen_modrm(int opc, int op_reg, int r, Sym *sym, int c)
- {
- + o(opc);
- op_reg = op_reg << 3;
- if ((r & VT_VALMASK) == VT_CONST) {
- /* constant memory reference */
- - o(0x05 | op_reg);
- + if (op_reg == TREG_EAX && cur_text_section->data[ind-1] >= 0x88 &&
- + cur_text_section->data[ind-1] <= 0x8b)
- + cur_text_section->data[ind-1] ^= 0xa3 ^ 0x89; /* use eax directly */
- + else
- + g(0x05 | op_reg);
- gen_addr32(r, sym, c);
- } else if ((r & VT_VALMASK) == VT_LOCAL) {
- /* currently, we use only ebp as base */
- if (c == (char)c) {
- /* short reference */
- - o(0x45 | op_reg);
- + g(0x45 | op_reg);
- g(c);
- } else {
- - oad(0x85 | op_reg, c);
- + g(0x85 | op_reg);
- + gen_le32(c);
- }
- + if (c > 0)
- + func_uses_args = 1;
- } else {
- - g(0x00 | op_reg | (r & VT_VALMASK));
- + r &= VT_VALMASK;
- + if ((tcc_state->optimize & OPT_REGS) &&
- + (o_v[r].indr != 0 || o_v[r].indc != 0)) {
- + if (o_v[r].indr != 0) {
- + if (o_v[r].r == (op_reg >> 3)) {
- + /* can't do mov op_reg, [r + op_reg] as it won't work with
- + long longs:
- + mov eax, [ecx+eax]
- + mov edx, [ecx+eax+4] // no good
- + */
- + o_v[r].indr = 0;
- + goto no_offset;
- + }
- + adjust_oind(o_v[r].indr, 2);
- + }
- + if (o_v[r].indc != 0) {
- + adjust_oind(o_v[r].indc, o_v[r].lenc);
- + }
- + opc = push_load == 2 ? op_reg = 0, 0x30 : 0x00;
- + if (o_v[r].indc == 0) {
- + g(opc | 0x04 | op_reg);
- + g(r | (o_v[r].r << 3));
- + o_v[r].indr = -1;
- + } else {
- + opc |= (o_v[r].c == (char)o_v[r].c) ? 0x40 : 0x80;
- + if (o_v[r].indr != 0) {
- + g(opc | 0x04 | op_reg);
- + g(r | (o_v[r].r << 3));
- + o_v[r].indr = -1;
- + } else {
- + g(opc | op_reg | r);
- + }
- + if (opc & 0x40)
- + g(o_v[r].c);
- + else
- + gen_le32(o_v[r].c);
- + o_v[r].indc = -1;
- + }
- + rv_cache[r] = o_v[r].v;
- + } else {
- + no_offset:
- + g(0x00 | op_reg | r);
- + }
- + }
- +}
- +
- +static void adjust_oind(int t, int s)
- +{
- + int r;
- +
- + if (t < 0)
- + return;
- + for (r = 0; r < NB_INT_REGS; ++r) {
- + if (o_v[r].indc > t)
- + o_v[r].indc -= s;
- + if (o_v[r].indr > t)
- + o_v[r].indr -= s;
- + }
- + adjust_reloc(t, s);
- + memcpy(cur_text_section->data + t,
- + cur_text_section->data + t + s,
- + ind - (t + s));
- + ind -= s;
- +
- + if (tcc_state->optimize & OPT_JUMPS) {
- + int cnt = 0;
- + r = ins_ind;
- + ins_ind -= s;
- + s = ins_len;
- + while (s) {
- + r -= s & 15;
- + if (r == t) {
- + if (cnt == 0)
- + ins_len >>= 4;
- + else {
- + s = (1 << cnt) - 1;
- + r = ins_len & s;
- + ins_len = (unsigned)ins_len >> 4;
- + ins_len &= ~s;
- + ins_len |= r;
- + }
- + break;
- + }
- + cnt += 4;
- + s = (unsigned)s >> 4;
- + }
- + }
- +}
- +
- +static void clear_reg(int r)
- +{
- + int fr;
- +
- + rv_cache[r].ld.r = -1;
- + rv_cache[r].st.r = -1;
- + for (fr = 0; fr < NB_INT_REGS; ++fr) {
- + if ((rv_cache[fr].ld.r & VT_VALMASK) == r)
- + rv_cache[fr].ld.r = -1;
- + if ((rv_cache[fr].st.r & VT_VALMASK) == r)
- + rv_cache[fr].st.r = -1;
- + if (o_v[fr].indr != 0 && o_v[fr].r == r)
- + o_v[fr].indr = o_v[fr].indc = o_v[fr].c = 0;
- + }
- +}
- +
- +static int cmpval(SValue *v1, SValue *v2)
- +{
- + if (v1->c.i != v2->c.i)
- + return 0;
- + if (v1->r & VT_SYM)
- + return (v2->r & VT_SYM) && v1->sym == v2->sym;
- + return !(v2->r & VT_SYM);
- +}
- +
- +static void clear_val(SValue *v)
- +{
- + int r, fr;
- +
- + r = v->r;
- + for (fr = 0; fr < NB_INT_REGS; ++fr) {
- + if (r == rv_cache[fr].ld.r && cmpval(v, &rv_cache[fr].ld))
- + rv_cache[fr].ld.r = -1;
- + if (r == rv_cache[fr].st.r && cmpval(v, &rv_cache[fr].st))
- + rv_cache[fr].st.r = -1;
- }
- }
- +/* check if a register/value has already been assigned the value/register */
- +static int ccheck(int r, SValue *v)
- +{
- + int r1, fr, fc;
- + int n;
- +
- + if (!cache_used ||
- + cache_ind != ind ||
- + r >= NB_INT_REGS ||
- + q_state > Q_READY)
- + return 0;
- +
- + fc = v->c.i;
- + fr = v->r;
- + if (fr == rv_cache[r].ld.r
- + && (fr & VT_VALMASK) == VT_CONST && !(fr & VT_SYM)
- + && (fc & 0xffffff00) == (rv_cache[r].ld.c.i & 0xffffff00)) {
- + if ((fc & 0xff) != (rv_cache[r].ld.c.i & 0xff)) {
- + og(0xb0 + r, fc);
- + cache_ind = ind;
- + rv_cache[r].ld.c.i = fc;
- + o_v[r].indr = o_v[r].indc = o_v[r].c = 0;
- + }
- + return 1;
- + }
- + /* check the register itself first, then the other registers */
- + for (r1 = r, n = NB_INT_REGS; n > 0; --n) {
- + if ((fr == rv_cache[r1].ld.r && cmpval(v, &rv_cache[r1].ld)) ||
- + (fr == rv_cache[r1].st.r && cmpval(v, &rv_cache[r1].st))) {
- + if (r != r1) {
- + if (push_load) {
- + push_load = 2;
- + o(0x50 + r1);
- + } else {
- + if (fc == 0 && !(fr & VT_SYM) &&
- + (fr & VT_VALMASK) == VT_CONST)
- + o(O2(0x33, 0xc0 + r * 9)); /* xor r, r */
- + else
- + o(O2(0x89, 0xc0 + r + r1 * 8)); /* mov r1, r */
- + rv_cache[r] = rv_cache[r1];
- + }
- + cache_ind = ind;
- + }
- + o_v[r].indr = o_v[r].indc = o_v[r].c = 0;
- + return 1;
- + }
- + if (n == NB_INT_REGS)
- + r1 = (r == 0) ? 1 : 0;
- + else if (++r1 == r)
- + ++r1;
- + }
- + return 0;
- +}
- +
- /* load 'r' from value 'sv' */
- ST_FUNC void load(int r, SValue *sv)
- {
- - int v, t, ft, fc, fr;
- + int v, t, ft, fc, fr, sr = r;
- SValue v1;
- + int opc;
- #ifdef TCC_TARGET_PE
- SValue v2;
- sv = pe_getimport(sv, &v2);
- #endif
- + if (r == TREG_EBX)
- + func_uses_ebx = 1;
- +
- fr = sv->r;
- ft = sv->type.t;
- fc = sv->c.ul;
- v = fr & VT_VALMASK;
- +
- + if (tcc_state->optimize & OPT_REGS) {
- + if (q_state > Q_READY && ((fr & VT_LVAL) || v != VT_CONST))
- + flushq();
- + if (ccheck(r, sv))
- + return;
- + }
- +
- if (fr & VT_LVAL) {
- if (v == VT_LLOCAL) {
- v1.type.t = VT_INT;
- @@ -232,55 +534,215 @@ ST_FUNC void load(int r, SValue *sv)
- fr = r;
- if (!(reg_classes[fr] & RC_INT))
- fr = get_reg(RC_INT);
- + t = push_load;
- + push_load = 0;
- load(fr, &v1);
- + push_load = t;
- }
- if ((ft & VT_BTYPE) == VT_FLOAT) {
- - o(0xd9); /* flds */
- + opc = 0xd9; /* flds */
- r = 0;
- } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
- - o(0xdd); /* fldl */
- + opc = 0xdd; /* fldl */
- r = 0;
- } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
- - o(0xdb); /* fldt */
- + opc = 0xdb; /* fldt */
- r = 5;
- } else if ((ft & VT_TYPE) == VT_BYTE) {
- - o(0xbe0f); /* movsbl */
- + opc = 0xbe0f; /* movsbl */
- } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
- - o(0xb60f); /* movzbl */
- + opc = 0xb60f; /* movzbl */
- } else if ((ft & VT_TYPE) == VT_SHORT) {
- - o(0xbf0f); /* movswl */
- + opc = 0xbf0f; /* movswl */
- } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
- - o(0xb70f); /* movzwl */
- + opc = 0xb70f; /* movzwl */
- } else {
- - o(0x8b); /* movl */
- + if (push_load) {
- + push_load = 2;
- + r = 6;
- + opc = 0xff; /* pushl */
- + } else {
- + opc = 0x8b; /* movl */
- + }
- }
- - gen_modrm(r, fr, sv->sym, fc);
- + gen_modrm(opc, r, fr, sv->sym, fc);
- } else {
- if (v == VT_CONST) {
- - o(0xb8 + r); /* mov $xx, r */
- - gen_addr32(fr, sv->sym, fc);
- + if (push_load) {
- + push_load = 2;
- + if ((fr & VT_SYM) || fc != (char)fc) {
- + o(0x68); /* push $xx */
- + gen_addr32(fr, sv->sym, fc);
- + } else {
- + og(0x6a, fc); /* push $x */
- + }
- + } else {
- + if (tcc_state->optimize & OPT_REGS) {
- + if (q_state == Q_L) {
- + /* load R1 + load R2
- + if registers are different, queue R2 (long long);
- + otherwise R2 can replace R1 (don't know
- + if that would actually happen) */
- + if (r != q_r[0]) {
- + q_r[1] = r;
- + q_lv[1] = *sv;
- + q_c[1] = rv_cache[r];
- + clear_reg(r);
- + q_state = Q_LL;
- + return;
- + }
- + q_state = Q_READY;
- + } else if (q_state == Q_LS) {
- + /* load R1 + store R1 + load R2
- + if the two loads are the same, load and store R1,
- + ignore R2 (cached R1) */
- + q_state = Q_IGNORE;
- + if (r == q_r[0] && cmpval(sv, &q_lv[0])) {
- + loadc(0);
- + storec(0);
- + q_state = Q_READY;
- + return;
- + }
- + store_const(0);
- + q_state = Q_READY;
- + } else if (q_state == Q_LL) {
- + /* load R1 + load R2 + load R3
- + don't know if this would actually happen */
- + if (r == q_r[1]) {
- + q_lv[1] = *sv;
- + } else {
- + if (r != q_r[0]) {
- + q_state = Q_IGNORE;
- + loadc(0);
- + q_c[1] = rv_cache[r];
- + clear_reg(r);
- + q_state = Q_LL;
- + }
- + q_r[0] = q_r[1];
- + q_lv[0] = q_lv[1];
- + q_c[0] = q_c[1];
- + q_r[1] = r;
- + q_lv[1] = *sv;
- + }
- + return;
- + } else if (q_state == Q_LLS) {
- + /* load R1 + load R2 + store R1 + load R3
- + again, not sure if this actually happens */
- + q_state = Q_IGNORE;
- + if (r == q_r[0] && cmpval(sv, &q_lv[0])) {
- + loadc(0);
- + loadc(1);
- + storec(0);
- + q_state = Q_READY;
- + return;
- + }
- + store_const(0);
- + if (r != q_r[1])
- + loadc(1);
- + q_state = Q_READY;
- + } else if (q_state == Q_LLSS) {
- + /* load R1 + load R2 + store R1 + store R2 + load R3 */
- + q_state = Q_IGNORE;
- + if ((r == q_r[0] && cmpval(sv, &q_lv[0])) ||
- + /* take a punt on load R4,0 */
- + (q_lv[1].c.i == 0 && !(q_lv[1].r & VT_SYM))) {
- + loadc(0);
- + loadc(1);
- + storec(0);
- + storec(1);
- + if (r == q_r[0] && cmpval(sv, &q_lv[0])) {
- + q_state = Q_READY;
- + return;
- + }
- + } else {
- + store_const(0);
- + store_const(1);
- + }
- + q_state = Q_READY;
- + }
- + if (q_state == Q_READY) {
- + q_r[0] = r;
- + q_lv[0] = *sv;
- + q_c[0] = rv_cache[r];
- + clear_reg(r);
- + q_state = Q_L;
- + return;
- + }
- + }
- + if ((fr & VT_SYM) || fc != 0) {
- + o(0xb8 + r); /* mov $xx, r */
- + gen_addr32(fr, sv->sym, fc);
- + } else {
- + o(O2(0x33, 0xc0 + r * 9)); /* xor r, r */
- + }
- + }
- } else if (v == VT_LOCAL) {
- if (fc) {
- - o(0x8d); /* lea xxx(%ebp), r */
- - gen_modrm(r, VT_LOCAL, sv->sym, fc);
- + gen_modrm(0x8d, /* lea xxx(%ebp), r */
- + r, VT_LOCAL, sv->sym, fc);
- } else {
- - o(0x89);
- - o(0xe8 + r); /* mov %ebp, r */
- + if (push_load) {
- + push_load = 2;
- + o(0x55); /* push %ebp */
- + } else {
- + o(O2(0x89, 0xe8 + r)); /* mov %ebp, r */
- + }
- }
- } else if (v == VT_CMP) {
- - oad(0xb8 + r, 0); /* mov $0, r */
- - o(0x0f); /* setxx %br */
- - o(fc);
- - o(0xc0 + r);
- + o(O3(0x0f, fc, 0xc0 + r)); /* setxx %br */
- + cache_ind = ind;
- + o(O3(0x0f, 0xb6, 0xc0 + r * 9)); /* movzx br, r */
- } else if (v == VT_JMP || v == VT_JMPI) {
- t = v & 1;
- - oad(0xb8 + r, t); /* mov $1, r */
- - o(0x05eb); /* jmp after */
- + if (push_load) {
- + push_load = 2;
- + og(0x6a, t); /* push t */
- + cache_ind = ind;
- + o(0x02eb); /* jmp after */
- + } else {
- + if (!t) {
- + /* Note: changing these instructions requires corresponding
- + change in jmpopt */
- + o(O2(0x33, 0xc0 + r * 9)); /* xor r, r */
- + cache_ind = ind;
- + o(0x05eb); /* jmp after */
- + } else {
- + oad(0xb8 + r, 1); /* mov $1, r */
- + cache_ind = ind;
- + o(0x02eb); /* jmp after */
- + }
- + if (tcc_state->optimize & OPT_JUMPS)
- + add_jump(-ind, t | (r << 1));
- + }
- gsym(fc);
- - oad(0xb8 + r, t ^ 1); /* mov $0, r */
- + if (push_load) {
- + og(0x6a, t ^ 1);
- + } else if (t) {
- + o(O2(0x33, 0xc0 + r * 9));
- + } else {
- + oad(0xb8 + r, 1);
- + }
- } else if (v != r) {
- - o(0x89);
- - o(0xc0 + r + v * 8); /* mov v, r */
- + if (push_load) {
- + push_load = 2;
- + o(0x50 + v); /* push v */
- + } else {
- + o(O2(0x89, 0xc0 + r + v * 8)); /* mov v, r */
- + }
- + }
- + }
- +
- + if (tcc_state->optimize & OPT_REGS) {
- + cache_ind = ind;
- + if (sr < NB_INT_REGS) {
- + if (push_load != 2) {
- + clear_reg(sr);
- + if (v >= VT_CONST) {
- + rv_cache[sr].ld = *sv;
- + cache_used = 1;
- + }
- + o_v[sr].indr = o_v[sr].indc = o_v[sr].c = 0;
- + }
- }
- }
- }
- @@ -288,7 +750,8 @@ ST_FUNC void load(int r, SValue *sv)
- /* store register 'r' in lvalue 'v' */
- ST_FUNC void store(int r, SValue *v)
- {
- - int fr, bt, ft, fc;
- + int sr = r, fr, bt, ft, fc;
- + int opc;
- #ifdef TCC_TARGET_PE
- SValue v2;
- @@ -301,46 +764,184 @@ ST_FUNC void store(int r, SValue *v)
- bt = ft & VT_BTYPE;
- /* XXX: incorrect if float reg to reg */
- if (bt == VT_FLOAT) {
- - o(0xd9); /* fsts */
- + opc = 0xd9; /* fsts */
- r = 2;
- } else if (bt == VT_DOUBLE) {
- - o(0xdd); /* fstpl */
- + opc = 0xdd; /* fstpl */
- r = 2;
- } else if (bt == VT_LDOUBLE) {
- o(0xc0d9); /* fld %st(0) */
- - o(0xdb); /* fstpt */
- + cache_ind = ind;
- + opc = 0xdb; /* fstpt */
- r = 7;
- } else {
- - if (bt == VT_SHORT)
- - o(0x66);
- if (bt == VT_BYTE || bt == VT_BOOL)
- - o(0x88);
- + opc = 0x88;
- + else if (bt == VT_SHORT)
- + opc = 0x8966;
- else
- - o(0x89);
- + opc = 0x89;
- }
- if (fr == VT_CONST ||
- fr == VT_LOCAL ||
- (v->r & VT_LVAL)) {
- - gen_modrm(r, v->r, v->sym, fc);
- + if (tcc_state->optimize & OPT_REGS) {
- + if (q_state == Q_L) {
- + /* load R1 + store R2 */
- + if (r == q_r[0]) {
- + q_sv[0] = *v;
- + q_state = Q_LS;
- + return;
- + }
- + q_state = Q_IGNORE;
- + loadc(0);
- + q_state = Q_READY;
- + } else if (q_state == Q_LS) {
- + /* load R1 + store R1 + store R2 */
- + q_state = Q_IGNORE;
- + loadc(0);
- + storec(0);
- + q_state = Q_READY;
- + } else if (q_state == Q_LL) {
- + /* load R1 + load R2 + store R3 */
- + if (r == q_r[0]) {
- + q_sv[0] = *v;
- + q_state = Q_LLS;
- + return;
- + }
- + q_state = Q_IGNORE;
- + if (r == q_r[1]) {
- + loadc(0);
- + q_r[0] = q_r[1];
- + q_lv[0] = q_lv[1];
- + q_sv[0] = *v;
- + q_state = Q_LS;
- + return;
- + }
- + loadc(0);
- + loadc(1);
- + q_state = Q_READY;
- + } else if (q_state == Q_LLS) {
- + /* load R1 + load R2 + store R1 + store R3 */
- + if (r == q_r[1]) {
- + q_sv[1] = *v;
- + q_state = Q_LLSS;
- + return;
- + }
- + q_state = Q_IGNORE;
- + loadc(0);
- + loadc(1);
- + storec(0);
- + q_state = Q_READY;
- + } else if (q_state == Q_LLSS) {
- + /* load R1 + load R2 + store R1 + store R2 + store R3 */
- + q_state = Q_IGNORE;
- + loadc(0);
- + loadc(1);
- + storec(0);
- + storec(1);
- + q_state = Q_READY;
- + }
- + }
- + gen_modrm(opc, r, v->r, v->sym, fc);
- } else if (fr != r) {
- - o(0xc0 + fr + r * 8); /* mov r, fr */
- + og(opc, 0xc0 + fr + r * 8); /* mov r, fr */
- + }
- +
- + if (tcc_state->optimize & OPT_REGS) {
- + cache_ind = ind;
- + if (sr < NB_INT_REGS) {
- + if (fr >= VT_CONST) {
- + clear_val(v);
- + rv_cache[sr].st = *v;
- + cache_used = 1;
- + }
- + o_v[sr].indr = o_v[sr].indc = o_v[sr].c = 0;
- + }
- + }
- +}
- +
- +static void loadc(int q)
- +{
- + rv_cache[q_r[q]] = q_c[q];
- + load(q_r[q], &q_lv[q]);
- +}
- +
- +static void storec(int q)
- +{
- + store(q_r[q], &q_sv[q]);
- +}
- +
- +static void store_const(int q)
- +{
- + SValue *sv, *v;
- + int r, bt;
- + int opc;
- +
- + r = q_r[q];
- + sv = &q_lv[q];
- + v = &q_sv[q];
- + bt = v->type.t & VT_BTYPE;
- +
- + if (bt == VT_BYTE || bt == VT_BOOL)
- + opc = 0xc6;
- + else if (bt == VT_SHORT)
- + opc = 0xc766;
- + else
- + opc = 0xc7;
- + gen_modrm(opc, 0, v->r, v->sym, v->c.i);
- + if (bt == VT_BYTE || bt == VT_BOOL)
- + g(sv->c.i);
- + else if (bt == VT_SHORT)
- + gen_le16(sv->c.i);
- + else
- + gen_addr32(sv->r, sv->sym, sv->c.i);
- +
- + cache_ind = ind;
- + rv_cache[r] = q_c[q];
- +}
- +
- +ST_FUNC void flushq(void)
- +{
- + if (q_state > Q_READY) {
- + int q = q_state;
- + q_state = Q_IGNORE;
- + if (q & Q_S1)
- + store_const(0);
- + if (q == Q_LLSS) {
- + store_const(1);
- + } else {
- + if (!(q & Q_S1))
- + loadc(0);
- + if (q & Q_L2)
- + loadc(1);
- + }
- + q_state = Q_READY;
- }
- }
- static void gadd_sp(int val)
- {
- - if (val == (char)val) {
- - o(0xc483);
- - g(val);
- + if (val < 0) {
- + if (val == (char)val)
- + o(O3(0x83, 0xec, -val));
- + else
- + oad(0xec81, -val); /* sub $xxx, %esp */
- } else {
- - oad(0xc481, val); /* add $xxx, %esp */
- + if (val == (char)val)
- + o(O3(0x83, 0xc4, val));
- + else
- + oad(0xc481, val); /* add $xxx, %esp */
- }
- + cache_ind = ind;
- }
- /* 'is_jmp' is '1' if it is a jump */
- static void gcall_or_jmp(int is_jmp)
- {
- int r;
- + if (q_state > Q_READY)
- + flushq();
- if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
- /* constant case */
- if (vtop->r & VT_SYM) {
- @@ -353,12 +954,23 @@ static void gcall_or_jmp(int is_jmp)
- ind + 1, R_386_PC32, 0);
- }
- oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */
- + } else if ((vtop->r & (VT_VALMASK | VT_SYM)) == (VT_CONST | VT_SYM)) {
- + greloc(cur_text_section, vtop->sym,
- + ind + 2, R_386_32);
- + oad(0x15ff + (is_jmp << 4), vtop->c.ul); /* call/jmp [im] */
- + } else if ((vtop->r & VT_VALMASK) == VT_LOCAL) {
- + if (vtop->c.i == (char)vtop->c.i)
- + og(0x55ff + (is_jmp << 4), vtop->c.i);
- + else
- + oad(0x95ff + (is_jmp << 4), vtop->c.i); /* call/jmp [ebp+x] */
- } else {
- /* otherwise, indirect call */
- r = gv(RC_INT);
- - o(0xff); /* call/jmp *r */
- - o(0xd0 + r + (is_jmp << 4));
- + o(O2(0xff, 0xd0 + r + (is_jmp << 4))); /* call/jmp *r */
- }
- + if (/*(tcc_state->optimize & OPT_REGS) && */cache_used &&
- + !is_jmp && (short)rv_cache[TREG_EBX].ld.r != -1)
- + cache_used = 2;
- }
- static uint8_t fastcall_regs[3] = { TREG_EAX, TREG_EDX, TREG_ECX };
- @@ -376,18 +988,24 @@ ST_FUNC void gfunc_call(int nb_args)
- for(i = 0;i < nb_args; i++) {
- if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
- size = type_size(&vtop->type, &align);
- + if (size <= 4)
- + goto small_struct;
- + if (size <= 8) {
- + vtop->type.t &= ~VT_BTYPE;
- + vtop->type.t |= VT_LLONG;
- + goto small_struct;
- + }
- /* align to stack align size */
- size = (size + 3) & ~3;
- /* allocate the necessary size on stack */
- - oad(0xec81, size); /* sub $xxx, %esp */
- + gadd_sp(-size);
- /* generate structure store */
- r = get_reg(RC_INT);
- - o(0x89); /* mov %esp, r */
- - o(0xe0 + r);
- + o(O2(0x89, 0xe0 + r)); /* mov %esp, r */
- + cache_ind = ind;
- vset(&vtop->type, r | VT_LVAL, 0);
- vswap();
- vstore();
- - args_size += size;
- } else if (is_float(vtop->type.t)) {
- gv(RC_FLOAT); /* only one float register */
- if ((vtop->type.t & VT_BTYPE) == VT_FLOAT)
- @@ -396,27 +1014,34 @@ ST_FUNC void gfunc_call(int nb_args)
- size = 8;
- else
- size = 12;
- - oad(0xec81, size); /* sub $xxx, %esp */
- + gadd_sp(-size);
- if (size == 12)
- o(0x7cdb);
- else
- o(0x5cd9 + size - 4); /* fstp[s|l] 0(%esp) */
- - g(0x24);
- - g(0x00);
- - args_size += size;
- + gen_le16(0x0024);
- + cache_ind = ind;
- } else {
- + small_struct:
- /* simple type (currently always same size) */
- - /* XXX: implicit cast ? */
- + if ((tcc_state->optimize & OPT_REGS)
- + && (vtop->type.t & VT_BTYPE) != VT_LLONG) {
- + if (q_state > Q_READY)
- + flushq();
- + push_load = 1;
- + }
- r = gv(RC_INT);
- + size = 4;
- if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
- size = 8;
- o(0x50 + vtop->r2); /* push r */
- - } else {
- - size = 4;
- }
- - o(0x50 + r); /* push r */
- - args_size += size;
- + if (push_load < 2)
- + o(0x50 + r); /* push r */
- + push_load = 0;
- + cache_ind = ind;
- }
- + args_size += size;
- vtop--;
- }
- save_regs(0); /* save used temporary registers */
- @@ -445,8 +1070,18 @@ ST_FUNC void gfunc_call(int nb_args)
- gcall_or_jmp(0);
- #ifdef TCC_TARGET_PE
- - if ((func_sym->type.t & VT_BTYPE) == VT_STRUCT)
- - args_size -= 4;
- + if ((func_sym->type.t & VT_BTYPE) == VT_STRUCT) {
- + size = type_size(&func_sym->type, &align);
- + if (size > 8) {
- + args_size -= 4;
- + } else {
- + store(TREG_EAX, &ret_st);
- + if (size > 4) {
- + ret_st.c.i += 4;
- + store(TREG_EDX, &ret_st);
- + }
- + }
- + }
- #endif
- if (args_size && func_call != FUNC_STDCALL)
- gadd_sp(args_size);
- @@ -454,9 +1089,9 @@ ST_FUNC void gfunc_call(int nb_args)
- }
- #ifdef TCC_TARGET_PE
- -#define FUNC_PROLOG_SIZE 10
- +#define FUNC_PROLOG_SIZE 11
- #else
- -#define FUNC_PROLOG_SIZE 9
- +#define FUNC_PROLOG_SIZE 10
- #endif
- /* generate function prolog of type 't' */
- @@ -488,14 +1123,24 @@ ST_FUNC void gfunc_prolog(CType *func_ty
- ind += FUNC_PROLOG_SIZE;
- func_sub_sp_offset = ind;
- + prolog_reloc_offset = (cur_text_section->reloc)
- + ? cur_text_section->reloc->data_offset : 0;
- /* if the function returns a structure, then add an
- implicit pointer parameter */
- func_vt = sym->type;
- if ((func_vt.t & VT_BTYPE) == VT_STRUCT) {
- /* XXX: fastcall case ? */
- +#ifdef TCC_TARGET_PE
- + int size, align;
- + size = type_size(&func_vt, &align);
- + if (size > 8) {
- +#endif
- func_vc = addr;
- addr += 4;
- param_index++;
- +#ifdef TCC_TARGET_PE
- + }
- +#endif
- }
- /* define parameters */
- while ((sym = sym->next) != NULL) {
- @@ -511,8 +1156,8 @@ ST_FUNC void gfunc_prolog(CType *func_ty
- if (param_index < fastcall_nb_regs) {
- /* save FASTCALL register */
- loc -= 4;
- - o(0x89); /* movl */
- - gen_modrm(fastcall_regs_ptr[param_index], VT_LOCAL, NULL, loc);
- + gen_modrm(0x89, /* movl */
- + fastcall_regs_ptr[param_index], VT_LOCAL, NULL, loc);
- param_addr = loc;
- } else {
- param_addr = addr;
- @@ -522,6 +1167,8 @@ ST_FUNC void gfunc_prolog(CType *func_ty
- VT_LOCAL | lvalue_type(type->t), param_addr);
- param_index++;
- }
- + func_uses_ebx = 0;
- + func_uses_args = 0;
- func_ret_sub = 0;
- /* pascal type call ? */
- if (func_call == FUNC_STDCALL)
- @@ -560,6 +1207,8 @@ ST_FUNC void gfunc_epilog(void)
- ind = func_sub_sp_offset;
- sym_data = get_sym_ref(&char_pointer_type, lbounds_section,
- func_bound_offset, lbounds_section->data_offset);
- + if (q_state > Q_READY)
- + flushq();
- greloc(cur_text_section, sym_data,
- ind + 1, R_386_32);
- oad(0xb8, 0); /* mov %eax, xxx */
- @@ -580,14 +1229,21 @@ ST_FUNC void gfunc_epilog(void)
- o(0x585a); /* restore returned value, if any */
- }
- #endif
- - o(0xc9); /* leave */
- + epilog_ind = ind;
- + if (func_uses_ebx)
- + o(0x5b); /* pop ebx */
- + if (!(tcc_state->optimize & OPT_PROLOG) || func_uses_args || loc != 0)
- + o(0xc9); /* leave */
- if (func_ret_sub == 0) {
- o(0xc3); /* ret */
- } else {
- o(0xc2); /* ret n */
- - g(func_ret_sub);
- - g(func_ret_sub >> 8);
- + gen_le16(func_ret_sub);
- }
- +
- + if (tcc_state->optimize & OPT_JUMPS)
- + jmpopt();
- +
- /* align local size to word & save local variables */
- v = (-loc + 3) & -4;
- @@ -599,17 +1255,63 @@ ST_FUNC void gfunc_epilog(void)
- oad(0xb8, v); /* mov stacksize, %eax */
- oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
- greloc(cur_text_section, sym, ind-4, R_386_PC32);
- + if (func_uses_ebx)
- + o(0x53); /* push ebx */
- + else
- + o(0x90);
- } else
- #endif
- {
- - o(0xe58955); /* push %ebp, mov %esp, %ebp */
- - o(0xec81); /* sub esp, stacksize */
- - gen_le32(v);
- -#if FUNC_PROLOG_SIZE == 10
- - o(0x90); /* adjust to FUNC_PROLOG_SIZE */
- + if (!(tcc_state->optimize & OPT_PROLOG)) {
- + o(0xe58955); /* push %ebp, mov %esp, %ebp */
- + oad(0xec81, v); /* sub esp, stacksize */
- +#if FUNC_PROLOG_SIZE == 11
- + if (func_uses_ebx)
- + o(0x9053); /* push %ebx, nop */
- + else
- + o(0xff8b); /* mov %edi, %edi */
- +#else
- + if (func_uses_ebx)
- + o(0x53); /* push ebx */
- + else
- + o(0x90);
- #endif
- + } else {
- + int skip = ind;
- + if (v != 0 || func_uses_args) {
- + o(0xe58955); /* push %ebp, mov %esp, %ebp */
- + if (v != 0) {
- + if (v == (char)v)
- + o(O3(0x83, 0xec, v)); /* sub esp, stacksize */
- + else
- + oad(0xec81, v); /* sub esp, stacksize */
- + }
- + }
- + if (func_uses_ebx)
- + o(0x53); /* push ebx */
- + skip = FUNC_PROLOG_SIZE - (ind - skip);
- +#ifdef CONFIG_TCC_BCHECK
- + if (tcc_state->do_bounds_check
- + && func_bound_offset == lbounds_section->data_offset)
- + skip += 10;
- +#endif
- + if (skip) {
- + saved_ind -= skip;
- + memcpy(cur_text_section->data + ind,
- + cur_text_section->data + ind + skip,
- + saved_ind - ind);
- + memset(cur_text_section->data + saved_ind, 0, skip);
- + adjust_reloc(ind, skip);
- + }
- + }
- }
- ind = saved_ind;
- +#ifdef TCC_TARGET_PE
- + /* align next function to 16 bytes */
- + if (!(tcc_state->optimize & OPT_SIZE))
- + while (ind & 15)
- + g(0x90);
- +#endif
- }
- /* generate a jump to a label */
- @@ -622,13 +1324,13 @@ ST_FUNC int gjmp(int t)
- ST_FUNC void gjmp_addr(int a)
- {
- int r;
- + if (q_state > Q_READY)
- + flushq();
- r = a - ind - 2;
- - if (r == (char)r) {
- - g(0xeb);
- - g(r);
- - } else {
- + if (r == (char)r && !(tcc_state->optimize & OPT_JUMPS))
- + og(0xeb, r);
- + else
- oad(0xe9, a - ind - 5);
- - }
- }
- /* generate a test. set 'inv' to invert test. Stack entry is popped */
- @@ -639,8 +1341,8 @@ ST_FUNC int gtst(int inv, int t)
- v = vtop->r & VT_VALMASK;
- if (v == VT_CMP) {
- /* fast case : can jump directly since flags are set */
- - g(0x0f);
- - t = psym((vtop->c.i - 16) ^ inv, t);
- + t = psym(O2(0x0f, (vtop->c.i - 16) ^ inv), t);
- + cache_ind = ind;
- } else if (v == VT_JMP || v == VT_JMPI) {
- /* && or || optimization */
- if ((v & 1) == inv) {
- @@ -650,6 +1352,7 @@ ST_FUNC int gtst(int inv, int t)
- p = (int *)(cur_text_section->data + *p);
- *p = t;
- t = vtop->c.i;
- + cache_ind = ind;
- } else {
- t = gjmp(t);
- gsym(vtop->c.i);
- @@ -666,10 +1369,9 @@ ST_FUNC int gtst(int inv, int t)
- t = gjmp(t);
- } else {
- v = gv(RC_INT);
- - o(0x85);
- - o(0xc0 + v * 9);
- - g(0x0f);
- - t = psym(0x85 ^ inv, t);
- + o(O2(0x85, 0xc0 + v * 9)); cache_ind = ind;
- + t = psym(O2(0x0f, 0x85 ^ inv), t);
- + cache_ind = ind;
- }
- }
- vtop--;
- @@ -680,6 +1382,8 @@ ST_FUNC int gtst(int inv, int t)
- ST_FUNC void gen_opi(int op)
- {
- int r, fr, opc, c;
- + static int shl_ind, shl_val;
- + int add_len = 0;
- switch(op) {
- case '+':
- @@ -688,37 +1392,83 @@ ST_FUNC void gen_opi(int op)
- gen_op8:
- if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
- /* constant case */
- + int i = ind;
- vswap();
- r = gv(RC_INT);
- vswap();
- c = vtop->c.i;
- - if (c == (char)c) {
- - /* generate inc and dec for smaller code */
- - if (c==1 && opc==0) {
- - o (0x40 | r); // inc
- - } else if (c==1 && opc==5) {
- - o (0x48 | r); // dec
- - } else {
- - o(0x83);
- - o(0xc0 | (opc << 3) | r);
- - g(c);
- + if (c == 0) {
- + if (opc == 7) {
- + o(O2(0x85, 0xc0 + r * 9)); /* test r, r */
- + } else if (opc == 4) {
- + ind = i; /* replace the load */
- + o(O2(0x33, 0xc0 + r * 9)); /* xor r, r */
- }
- + } else if (opc == 4 && (c == 0xff || c == 0xffff)) {
- + o(O3(0x0f, c == 0xff ? 0xb6 : 0xb7, 0xc0 + r * 9)); /* movzx */
- + } else if (opc == 1 && c == -1) {
- + ind = i;
- + o(O3(0x83, 0xc8 + r, -1)); /* or $-1, r */
- } else {
- - o(0x81);
- - oad(0xc0 | (opc << 3) | r, c);
- + if (opc == 0 && (tcc_state->optimize & OPT_REGS)) {
- + if (o_v[r].indc <= 0) {
- + o_v[r].indc = ind;
- + o_v[r].c += c;
- + o_v[r].v = rv_cache[r];
- + add_len = 1;
- + } else if (o_v[r].indc + o_v[r].lenc == ind) {
- + o_v[r].c += c;
- + add_len = 1;
- + } else {
- + /* two non-consecutive adds - the first is an offset,
- + the second invalidates the cache */
- + o_v[r].v.ld.r = -1;
- + o_v[r].v.st.r = -1;
- + }
- + }
- + if (c == (char)c) {
- + /* generate inc and dec for smaller code */
- + if ((c==1 && opc==0) || (c==-1 && opc==5)) {
- + o(0x40 | r); // inc
- + } else if ((c==1 && opc==5) || (c==-1 && opc==0)) {
- + o(0x48 | r); // dec
- + } else {
- + o(O3(0x83, 0xc0 + r + opc * 8, c));
- + }
- + } else {
- + if (r == TREG_EAX)
- + opc = 0x05 + opc * 8;
- + else
- + opc = O2(0x81, 0xc0 + r + opc * 8);
- + oad(opc, c);
- + }
- }
- } else {
- gv2(RC_INT, RC_INT);
- r = vtop[-1].r;
- fr = vtop[0].r;
- - o((opc << 3) | 0x01);
- - o(0xc0 + r + fr * 8);
- + if (opc == 0 && (tcc_state->optimize & OPT_REGS)) {
- + if (o_v[r].indr <= 0) {
- + o_v[r].indr = ind;
- + o_v[r].r = fr;
- + o_v[r].v = rv_cache[r];
- + } else {
- + o_v[r].v.ld.r = -1;
- + o_v[r].v.st.r = -1;
- + }
- + }
- + o(O2(0x01 + opc * 8, 0xc0 + r + fr * 8));
- }
- vtop--;
- if (op >= TOK_ULT && op <= TOK_GT) {
- vtop->r = VT_CMP;
- vtop->c.i = op;
- + } else if (tcc_state->optimize & OPT_REGS) {
- + if (add_len)
- + o_v[r].lenc = ind - o_v[r].indc;
- + clear_reg(r);
- }
- + cache_ind = ind;
- break;
- case '-':
- case TOK_SUBC1: /* sub with carry generation */
- @@ -740,12 +1490,83 @@ ST_FUNC void gen_opi(int op)
- opc = 1;
- goto gen_op8;
- case '*':
- - gv2(RC_INT, RC_INT);
- - r = vtop[-1].r;
- - fr = vtop[0].r;
- + if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
- + int i = ind;
- + vswap();
- + r = gv(RC_INT);
- + vswap();
- + c = vtop->c.i;
- + if (c == 0) {
- + ind = i; /* replace the load */
- + o(O2(0x33, 0xc0 + r)); /* xor r, r */
- + } else if (c == 1) {
- + /* nothing to do */
- + } else if (c == -1) {
- + o(O2(0xf7, 0xd8 + r)); /* neg r */
- + } else if ((c & (c - 1)) == 0) {
- + int n = 1;
- + while ((c >>= 1))
- + ++n;
- + o(O3(0xc1, 0xe0 + r, n)); /* shl $xxx, r */
- + } else if (c > 0 && (tcc_state->optimize & OPT_MULTS)) {
- + int msb;
- + opc = 0;
- + msb = c & (c - 1);
- + if ((msb & (msb - 1)) == 0) {
- + if (c == (msb | (msb >> 1))) /* 3 * 2**N */
- + opc = 0x40;
- + else if (c == (msb | (msb >> 2))) /* 5 * 2**N */
- + opc = 0x80;
- + else if (c == (msb | (msb >> 3))) /* 9 * 2**N */
- + opc = 0xc0;
- + }
- + fr = 0;
- + if (opc) {
- + while (!(c & 1)) {
- + ++fr;
- + c >>= 1;
- + }
- + o(O3(0x8d, 0x04 + r * 8, opc + r * 9)); /* lea r, [r*N+r] */
- + cache_ind = ind;
- + if (fr == 1)
- + o(O2(0x03, 0xc0 + r * 9)); /* add r, r */
- + else if (fr != 0)
- + o(O3(0xc1, 0xe0 + r, fr)); /* shl r, fr */
- + } else {
- + switch(c) {
- + case 25: fr += 0x40; /* r*4+r */
- + case 15: fr += 0x40; /* r*2+r */
- + opc = 0x80; /* * r*4+r */
- + break;
- + case 81: fr += 0x40; /* r*8+r */
- + case 45: fr += 0x40; /* r*4+r */
- + case 27: fr += 0x40; /* r*2+r */
- + opc = 0xc0; /* * r*8+r */
- + break;
- + default:
- + goto no_lea;
- + }
- + o(O3(0x8d, 0x04 + r * 8, opc + r * 9)); cache_ind = ind;
- + o(O3(0x8d, 0x04 + r * 8, fr + r * 9));
- + }
- + } else {
- + no_lea:
- + if (c == (char)c)
- + o(O3(0x6b, 0xc0 + r * 9, c)); /* imul $xxx, r */
- + else
- + oad(O2(0x69, 0xc0 + r * 9), c);
- + }
- + } else {
- + gv2(RC_INT, RC_INT);
- + r = vtop[-1].r;
- + fr = vtop[0].r;
- + o(O3(0x0f, 0xaf, 0xc0 + fr + r * 8)); /* imul fr, r */
- + }
- vtop--;
- - o(0xaf0f); /* imul fr, r */
- - o(0xc0 + fr + r * 8);
- + if (tcc_state->optimize & OPT_REGS) {
- + cache_ind = ind;
- + clear_reg(r);
- + }
- break;
- case TOK_SHL:
- opc = 4;
- @@ -763,43 +1584,104 @@ ST_FUNC void gen_opi(int op)
- r = gv(RC_INT);
- vswap();
- c = vtop->c.i & 0x1f;
- - o(0xc1); /* shl/shr/sar $xxx, r */
- - o(opc | r);
- - g(c);
- + if (op == TOK_SAR && ind == shl_ind + 3 && c == shl_val) {
- + ind = shl_ind;
- + o(O3(0x0f, c == 24 ? 0xbe : 0xbf, 0xc0 + r * 9)); /* movsx */
- + } else {
- + if (op == TOK_SHL && (c == 24 || c == 16)) {
- + shl_ind = ind;
- + shl_val = c;
- + }
- + o(O3(0xc1, opc | r, c)); /* shl/shr/sar $xxx, r */
- + }
- } else {
- /* we generate the shift in ecx */
- gv2(RC_INT, RC_ECX);
- r = vtop[-1].r;
- - o(0xd3); /* shl/shr/sar %cl, r */
- - o(opc | r);
- + o(O2(0xd3, opc | r)); /* shl/shr/sar %cl, r */
- }
- vtop--;
- + if (tcc_state->optimize & OPT_REGS) {
- + cache_ind = ind;
- + clear_reg(r);
- + }
- break;
- + case TOK_PDIV:
- + if (tcc_state->optimize & OPT_MULTS)
- + if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST &&
- + vtop->c.i > 0) {
- + /* calculate suitable reciprocal, from Hacker's Delight */
- + int p;
- + unsigned ad, anc, delta, q1, r1, q2, r2;
- + const unsigned two31 = 0x80000000; // 2**31.
- +
- + ad = vtop->c.ui;
- + if ((ad & (ad - 1)) == 0) {
- + p = 1;
- + while ((ad >>= 1))
- + p++;
- + vtop->c.i = p;
- + opc = 7;
- + goto gen_shift;
- + }
- + anc = two31 - 1 - two31 % ad;
- + p = 31;
- + q1 = two31 / anc;
- + r1 = two31 - q1 * anc;
- + q2 = two31 / ad;
- + r2 = two31 - q2 * ad;
- + do {
- + ++p;
- + q1 <<= 1;
- + r1 <<= 1;
- + if (r1 >= anc) {
- + ++q1;
- + r1 -= anc;
- + }
- + q2 <<= 1;
- + r2 <<= 1;
- + if (r2 >= ad) {
- + ++q2;
- + r2 -= ad;
- + }
- + delta = ad - r2;
- + } while (q1 < delta || (q1 == delta && r1 == 0));
- + vtop->c.ui = q2 + 1;
- + p -= 32;
- + gv2(RC_EAX, RC_EDX);
- + vtop--;
- + /* since the dividend should be a multiple of the divisor, and to
- + avoid testing for negative, add one beforehand */
- + o(0x40); /* inc eax */
- + o(0xeaf7); /* imul edx */
- + if (p != 0)
- + o(O3(0xc1, 0xfa, p)); /* sar $xxx, r */
- + vtop->r = TREG_EDX;
- + break;
- + }
- case '/':
- case TOK_UDIV:
- - case TOK_PDIV:
- case '%':
- case TOK_UMOD:
- case TOK_UMULL:
- /* first operand must be in eax */
- /* XXX: need better constraint for second operand */
- - gv2(RC_EAX, RC_ECX);
- + gv2(RC_EAX, RC_ECX | RC_EBX);
- r = vtop[-1].r;
- fr = vtop[0].r;
- vtop--;
- save_reg(TREG_EDX);
- if (op == TOK_UMULL) {
- - o(0xf7); /* mul fr */
- - o(0xe0 + fr);
- + o(O2(0xf7, 0xe0 + fr)); /* mul fr */
- vtop->r2 = TREG_EDX;
- r = TREG_EAX;
- } else {
- if (op == TOK_UDIV || op == TOK_UMOD) {
- - o(0xf7d231); /* xor %edx, %edx, div fr, %eax */
- - o(0xf0 + fr);
- + o(0xd231); /* xor %edx, %edx */
- + o(O2(0xf7, 0xf0 + fr)); /* div fr, %eax */
- } else {
- - o(0xf799); /* cltd, idiv fr, %eax */
- - o(0xf8 + fr);
- + o(0x99); /* cltd */
- + o(O2(0xf7, 0xf8 + fr)); /* idiv fr, %eax */
- }
- if (op == '%' || op == TOK_UMOD)
- r = TREG_EDX;
- @@ -809,6 +1691,12 @@ ST_FUNC void gen_opi(int op)
- vtop->r = r;
- break;
- default:
- + if ((vtop[-1].r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
- + /* turn something like (1 <= argc) into (argc >= 1) */
- + vswap();
- + if (op != TOK_EQ && op != TOK_NE)
- + op ^= (op & 8) ? 3 : 5;
- + }
- opc = 7;
- goto gen_op8;
- }
- @@ -857,12 +1745,11 @@ ST_FUNC void gen_opf(int op)
- o(0xe9da); /* fucompp */
- o(0xe0df); /* fnstsw %ax */
- if (op == TOK_EQ) {
- - o(0x45e480); /* and $0x45, %ah */
- - o(0x40fC80); /* cmp $0x40, %ah */
- + o(0x44e480); /* and $0x44, %ah */
- + op = 0x9b; /* parity odd - 1 bit set is equal */
- } else if (op == TOK_NE) {
- - o(0x45e480); /* and $0x45, %ah */
- - o(0x40f480); /* xor $0x40, %ah */
- - op = TOK_NE;
- + o(0x44e480); /* and $0x44, %ah */
- + op = 0x9a; /* parity even - no bits is !equal, two is bad */
- } else if (op == TOK_GE || op == TOK_LE) {
- o(0x05c4f6); /* test $0x05, %ah */
- op = TOK_EQ;
- @@ -902,8 +1789,7 @@ ST_FUNC void gen_opf(int op)
- ft = vtop->type.t;
- fc = vtop->c.ul;
- if ((ft & VT_BTYPE) == VT_LDOUBLE) {
- - o(0xde); /* fxxxp %st, %st(1) */
- - o(0xc1 + (a << 3));
- + o(O2(0xde, 0xc1 + (a << 3))); /* fxxxp %st, %st(1) */
- } else {
- /* if saved lvalue, then we must reload it */
- r = vtop->r;
- @@ -918,10 +1804,10 @@ ST_FUNC void gen_opf(int op)
- }
- if ((ft & VT_BTYPE) == VT_DOUBLE)
- - o(0xdc);
- + op = 0xdc;
- else
- - o(0xd8);
- - gen_modrm(a, r, vtop->sym, fc);
- + op = 0xd8;
- + gen_modrm(op, a, r, vtop->sym, fc);
- }
- vtop--;
- }
- @@ -943,8 +1829,7 @@ ST_FUNC void gen_cvt_itof(int t)
- } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
- (VT_INT | VT_UNSIGNED)) {
- /* unsigned int to float/double/long double */
- - o(0x6a); /* push $0 */
- - g(0x00);
- + og(0x6a, 0x00); /* push $0 */
- o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
- o(0x242cdf); /* fildll (%esp) */
- o(0x08c483); /* add $8, %esp */
- @@ -981,12 +1866,11 @@ ST_FUNC void gen_cvt_ftoi(int t)
- ind, R_386_32);
- gen_le32(0);
- - oad(0xec81, size); /* sub $xxx, %esp */
- + gadd_sp(-size);
- if (size == 4)
- - o(0x1cdb); /* fistpl */
- + o(0x241cdb); /* fistpl */
- else
- - o(0x3cdf); /* fistpll */
- - o(0x24);
- + o(0x243cdf); /* fistpll */
- o(0x2dd9); /* ldcw xxx */
- sym = external_global_sym(TOK___tcc_fpu_control,
- &ushort_type, VT_LVAL);
- @@ -1038,6 +1922,8 @@ ST_FUNC void gen_bounded_ptr_add(void)
- save_regs(0);
- /* do a fast function call */
- sym = external_global_sym(TOK___bound_ptr_add, &func_old_type, 0);
- + if (q_state > Q_READY)
- + flushq();
- greloc(cur_text_section, sym,
- ind + 1, R_386_PC32);
- oad(0xe8, -4);
- @@ -1090,6 +1976,417 @@ ST_FUNC void gen_bounded_ptr_deref(void)
- }
- #endif
- +/* jump optimiser */
- +
- +/* 's' bytes have been removed at 't', adjust relocs to suit */
- +static void adjust_reloc(int t, int s)
- +{
- + if (cur_text_section->reloc) {
- + ElfW_Rel *end = (ElfW_Rel*)(cur_text_section->reloc->data
- + + cur_text_section->reloc->data_offset);
- + ElfW_Rel *rel = (ElfW_Rel*)(cur_text_section->reloc->data
- + + prolog_reloc_offset);
- + for (; rel != end; ++rel) {
- + if (rel->r_offset >= t)
- + rel->r_offset -= s;
- + }
- + }
- +}
- +
- +/* 's' bytes have been removed at 't', adjust jumps to suit */
- +static void adjust_jmp(int t, int s)
- +{
- + int **j, f, n, a, r;
- +
- + for (j = jmplst, n = nb_jmplst; n > 0; ++j, --n) {
- + f = **j;
- + if (f == 0 || f == t - 1)
- + continue;
- + if (f < 0) {
- + if (-f > t)
- + **j += s;
- + continue;
- + }
- + r = cur_text_section->data[f-1];
- + r = (r == 0xeb || r < 0x80);
- + if (r)
- + a = f + (char)cur_text_section->data[f] + 1;
- + else
- + a = f + *(int *)(cur_text_section->data + f) + 4;
- + if (f < t) {
- + if (a >= t) {
- + if (r)
- + cur_text_section->data[f] -= s;
- + else
- + *(int *)(cur_text_section->data + f) -= s;
- + }
- + } else {
- + if (a < t) {
- + if (r)
- + cur_text_section->data[f] += s;
- + else
- + *(int *)(cur_text_section->data + f) += s;
- + }
- + **j -= s;
- + }
- + }
- + adjust_reloc(t, s);
- +}
- +
- +/* change jumps made to code between 't' and 'e' to jumps starting at 'o' */
- +static void adjust_jump_dest(int t, int e, int o)
- +{
- + int **j, f, n, a;
- +
- + for (j = jmplst, n = nb_jmplst; n > 0; ++j, --n) {
- + f = **j;
- + if (f <= 0)
- + continue;
- + if (f >= t && f < e) {
- + **j = 0;
- + continue;
- + }
- + a = f + *(int *)(cur_text_section->data + f) + 4;
- + if (a >= t && a < e)
- + *(int *)(cur_text_section->data + f) = o + (a - t) - f - 4;
- + }
- +}
- +
- +static void remove_code(int t, int s)
- +{
- + if (cur_text_section->reloc) {
- + ElfW_Rel *end = (ElfW_Rel*)(cur_text_section->reloc->data
- + + cur_text_section->reloc->data_offset);
- + ElfW_Rel *rel = (ElfW_Rel*)(cur_text_section->reloc->data
- + + prolog_reloc_offset);
- + for (; rel != end; ++rel) {
- + if (rel->r_offset >= t && rel->r_offset < t + s) {
- + cur_text_section->reloc->data_offset -= sizeof(ElfW_Rel);
- + --end;
- + memcpy(rel, rel + 1, (end - rel) * sizeof(ElfW_Rel));
- + --rel;
- + }
- + }
- + }
- + adjust_jmp(t, s);
- + memcpy(cur_text_section->data + t, cur_text_section->data + t + s,
- + ind - (t + s));
- + epilog_ind -= s;
- + ins_ind -= s;
- + ind -= s;
- + memset(cur_text_section->data + ind, 0, s);
- +}
- +
- +static void remove_jmp(int t)
- +{
- + int s = cur_text_section->data[t-1] == 0xe9 ? 1 : 2;
- + remove_code(t - s, s + 4);
- +}
- +
- +/* replace a jmp to the epilog with the epilog */
- +static void epilog_jmp(int t)
- +{
- + int s, e;
- +
- + s = cur_text_section->data[t-1] == 0xe9 ? 1 : 2;
- + t -= s;
- + e = ind - epilog_ind;
- + memcpy(cur_text_section->data + t, cur_text_section->data + epilog_ind, e);
- + remove_code(t + e, s + 4 - e);
- +}
- +
- +static void short_jmp(int *j)
- +{
- + int t, s;
- +
- + t = *j;
- + if (cur_text_section->data[t-1] == 0xe9) { /* jmp near */
- + cur_text_section->data[t-1] = 0xeb; /* jmp short */
- + ++t;
- + s = 3;
- + } else {
- + /* convert jcc near (0x8X0F) to short (0x7X) */
- + cur_text_section->data[t-2] = cur_text_section->data[t-1] - 0x10;
- + cur_text_section->data[t-1] = cur_text_section->data[t];
- + --*j;
- + s = 4;
- + }
- + if ((char)cur_text_section->data[*j] < 0)
- + cur_text_section->data[*j] += s;
- + remove_code(t, s);
- +}
- +
- +/* optimise jumps */
- +static void jmpopt(void)
- +{
- + int **j, t, n, o, s, opt;
- + int le, lj, opc;
- + int **j1, n1, t1;
- + ElfW_Rel *rel_t, *rel_o, *end;
- +
- + /* replace jumps to jmp with the destination of the jmp */
- + for (j = jmplst, n = nb_jmplst; n > 0; ++j, --n) {
- + t = **j;
- + if (t < 0)
- + continue;
- + o = t + *(int *)(cur_text_section->data + t) + 4;
- + opt = o;
- + while (cur_text_section->data[o] == 0xe9)
- + o += *(int *)(cur_text_section->data + o + 1) + 5;
- + if (o != opt)
- + *(int *)(cur_text_section->data + t) = o - t - 4;
- + }
- +
- + /* test if the code (up to eight instructions) before a jmp matches the
- + code before its destination; if so, remove the code before the jmp, jump
- + to the earlier dest. */
- + for (j = jmplst, n = nb_jmplst; n > 0; ++j, --n) {
- + int jmp = 0;
- + t = **j;
- + if (t < 0) {
- + /* skip the jump of a boolean cast */
- + ++j;
- + --n;
- + continue;
- + }
- + if (cur_text_section->data[t-1] != 0xe9 ||
- + (cur_text_section->data[t+3] & 0x80)) /* must be forward */
- + continue;
- + o = t + *(int *)(cur_text_section->data + t) + 4;
- + if (o == epilog_ind)
- + continue;
- + /* ignore an empty jmp before the dest, since they haven't been
- + removed yet (so instruction lengths are consistent) */
- + if (cur_text_section->data[o-5] == 0xe9 &&
- + *(int *)(cur_text_section->data + o - 4) == 0) {
- + o -= 5;
- + jmp = 5;
- + }
- + --t;
- + le = 0;
- + s = (*j)[1];
- + lj = 0;
- + if (cur_text_section->reloc) {
- + end = (ElfW_Rel*)(cur_text_section->reloc->data
- + + prolog_reloc_offset);
- + rel_o = (ElfW_Rel*)(cur_text_section->reloc->data
- + + cur_text_section->reloc->data_offset);
- + while (--rel_o >= end && rel_o->r_offset > o)
- + ;
- + rel_t = rel_o;
- + while (rel_t >= end && rel_t->r_offset > t)
- + --rel_t;
- + } else {
- + end = rel_o = rel_t = NULL;
- + }
- + while (o > **j && s) {
- + lj = s & 15;
- + t -= lj;
- + o -= lj;
- + opc = cur_text_section->data[t];
- + /* check the destination of a relative call/jmp */
- + if (opc == 0xe8 || opc == 0xe9) {
- + int d1 = t + *(int *)(cur_text_section->data + t + 1) + 5;
- + int d2 = o + *(int *)(cur_text_section->data + o + 1) + 5;
- + if (d1 != d2)
- + break;
- + }
- + /* stop before a jump */
- + if ((opc == 0x0f && (cur_text_section->data[t+1] & 0xf0) == 0x80) ||
- + opc == 0xeb) {
- + break;
- + }
- + if (memcmp(cur_text_section->data + t,
- + cur_text_section->data + o, lj)) {
- + break;
- + }
- + /* check relocations match */
- + if (end) {
- + while (rel_t >= end && rel_t->r_offset >= t + lj) {
- + --rel_o;
- + --rel_t;
- + }
- + while (rel_t >= end && rel_t->r_offset > t) {
- + if (rel_t->r_info != rel_o->r_info)
- + goto ins_match_end;
- + --rel_o;
- + --rel_t;
- + }
- + }
- + le += lj;
- + lj = 0;
- + s = (unsigned)s >> 4;
- + }
- + ins_match_end:
- + if (le) {
- + t += lj;
- + o += lj;
- + adjust_jump_dest(t, t + le, o);
- + *(int *)(cur_text_section->data + t + le + 1) -= le + jmp;
- + remove_code(t, le);
- + }
- + }
- +
- + /* replace jcc/jmp pairs with jncc and remove unreferenced jmps */
- + for (j = jmplst + nb_jmplst - 1, n = nb_jmplst; n > 1; --j, --n) {
- + t = **j;
- + if (*j[-1] + 5 == t) {
- + if (cur_text_section->data[t-6] != 0xe9) {
- + if (*(int *)(cur_text_section->data + t - 5) != 5)
- + continue;
- + o = *(int *)(cur_text_section->data + t);
- + if (o != 0) {
- + o += t + 4;
- + cur_text_section->data[t-6] ^= 1;
- + *(int *)(cur_text_section->data + t - 5) = o - (t - 5) - 4;
- + }
- + }
- + **j = 0;
- + remove_jmp(t);
- + }
- + }
- +
- + /* check if all conditional jumps to a boolean are the same type
- + (quite complicated just to efficiently set 0 or 1, and it's still not
- + as good as it could be, since test already knows if it's 0) */
- + for (j = jmplst, n = nb_jmplst; n > 0; ++j, --n) {
- + int tst = 1;
- + t = **j;
- + if (t >= 0)
- + continue;
- + t = -t;
- + t1 = t - ((*j)[1] ? 7 : 4);
- + opc = 0;
- + for (j1 = jmplst, n1 = nb_jmplst; n1 > 0; ++j1, --n1) {
- + int op1, op2;
- + o = **j1;
- + if (o <= 0)
- + continue;
- + s = cur_text_section->data[o-1];
- + op1 = cur_text_section->data[o-4];
- + op2 = cur_text_section->data[o-3];
- + o += *(int *)(cur_text_section->data + o) + 4;
- + if (o == t1) {
- + s ^= 1;
- + o = t;
- + }
- + if (o == t) {
- + if (opc == 0)
- + opc = s;
- + else if (s != opc) {
- + opc = -1;
- + if (!tst)
- + break;
- + }
- + if ((*j1)[1] != 2 || op1 != 0x85 ||
- + op2 < 0xc0 || (op2 - 0xc0) % 9) {
- + tst = 0;
- + if (opc == -1)
- + break;
- + }
- + }
- + }
- + if (opc > 0) {
- + int r;
- + o = (*j)[1] & 1;
- + s = o ? 7 : 4;
- + adjust_jump_dest(t, t + 1, t - s);
- + t -= s;
- + r = (*j)[1] >> 1;
- + cur_text_section->data[t++] = 0x0f; /* setcc %br */
- + cur_text_section->data[t++] = (opc + 16) ^ o;
- + cur_text_section->data[t++] = 0xc0 + r;
- + cur_text_section->data[t++] = 0x0f; /* movzx %br, %r */
- + cur_text_section->data[t++] = 0xb6;
- + cur_text_section->data[t++] = 0xc0 + r * 9;
- + remove_code(t, 3);
- + } else if (tst) {
- + int r = cur_text_section->data[t];
- + if (r == 0x33) {
- + r = cur_text_section->data[t+1] & 7;
- + adjust_jump_dest(t, t + 1, t - 7);
- + t -= 7;
- + cur_text_section->data[t-4] = 1; /* jump over stc */
- + cur_text_section->data[t-5] ^= 1; /* invert condition */
- + } else {
- + r &= 7;
- + adjust_jump_dest(t, t + 1, t - 3);
- + t -= 4;
- + }
- + cur_text_section->data[t++] = 0xf9; /* stc */
- + cur_text_section->data[t++] = 0x1b; /* sbb %r, %r */
- + cur_text_section->data[t++] = 0xc0 + r * 9;
- + cur_text_section->data[t++] = 0x40 + r; /* inc %r */
- + remove_code(t, 5);
- + }
- + **j = 0;
- + }
- +
- + /* remove empty jumps */
- + for (j = jmplst, n = nb_jmplst; n > 0; ++j, --n) {
- + t = **j;
- + if (t <= 0)
- + continue;
- + o = *(int *)(cur_text_section->data + t);
- + if (o == 0) {
- + **j = 0;
- + remove_jmp(t);
- + }
- + }
- +
- +#ifdef CONFIG_TCC_BCHECK
- + if (!tcc_state->do_bounds_check)
- +#endif
- + if (ind - epilog_ind <= 5 ||
- + ((tcc_state->optimize & OPT_SIZE) && ind - epilog_ind <= 2)) {
- + /* inline jmps to the epilog */
- + le = 0; /* last epilog */
- + lj = 0; /* last jcc - may be able to shorten it */
- + for (j = jmplst, n = nb_jmplst; n > 0; ++j, --n) {
- + t = **j;
- + if (t <= 0)
- + continue;
- + o = t + *(int *)(cur_text_section->data + t) + 4;
- + if (o == epilog_ind) {
- + if (cur_text_section->data[t-1] == 0xe9) {
- + **j = 0;
- + epilog_jmp(t);
- + le = t - 1;
- + if (lj) {
- + *(int *)(cur_text_section->data + lj) = le - lj - 4;
- + lj = 0;
- + }
- + } else {
- + if (le)
- + *(int *)(cur_text_section->data + t) = le - t - 4;
- + else
- + lj = t;
- + }
- + }
- + }
- + }
- +
- + /* convert appropriate near jumps to short */
- + do {
- + opt = 0;
- + for (j = jmplst, n = nb_jmplst; n > 0; ++j, --n) {
- + t = **j;
- + if (t <= 0)
- + continue;
- + /* no need to test opcode, since 000000 needs to follow, which
- + translates to add [eax],al and something similar, so the char
- + test should be sufficient */
- + o = *(int *)(cur_text_section->data + t);
- + if (o == (char)o) {
- + short_jmp(*j);
- + opt = 1;
- + }
- + }
- + } while (opt);
- +
- + dynarray_reset(&jmplst, &nb_jmplst);
- +}
- +
- /* end of X86 code generator */
- /*************************************************************/
- #endif
- diff -pruN tcc-0.9.26/libtcc.c tcc-0.9.26o/libtcc.c
- --- tcc-0.9.26/libtcc.c 2013-02-16 00:24:00 +1000
- +++ tcc-0.9.26o/libtcc.c 2013-09-24 15:53:36 +1000
- @@ -999,6 +999,10 @@ LIBTCCAPI TCCState *tcc_new(void)
- data_section = new_section(s, ".data", SHT_PROGBITS, SHF_ALLOC | SHF_WRITE);
- bss_section = new_section(s, ".bss", SHT_NOBITS, SHF_ALLOC | SHF_WRITE);
- +#if defined TCC_TARGET_PE && defined TCC_TARGET_I386
- + text_section->sh_addralign = 16;
- +#endif
- +
- /* symbols are always generated for linking stage */
- symtab_section = new_symtab(s, ".symtab", SHT_SYMTAB, 0,
- ".strtab",
- @@ -1876,6 +1880,41 @@ PUB_FUNC int tcc_parse_args(TCCState *s,
- printf ("%s\n", TCC_VERSION);
- exit(0);
- case TCC_OPTION_O:
- +#ifdef TCC_TARGET_I386
- + if (*optarg == '\0')
- + s->optimize = -1;
- + else
- + do {
- + switch(*optarg++) {
- + case '0':
- + s->optimize = 0;
- + break;
- + case '1':
- + s->optimize |= OPT_PROLOG | OPT_REGS | OPT_MULTS;
- + break;
- + case '2':
- + case 'x':
- + s->optimize = -1;
- + break;
- + case 's':
- + s->optimize |= OPT_PROLOG | OPT_REGS | OPT_JUMPS | OPT_SIZE;
- + break;
- + case 'f':
- + s->optimize |= OPT_PROLOG;
- + break;
- + case 'j':
- + s->optimize |= OPT_JUMPS;
- + break;
- + case 'm':
- + s->optimize |= OPT_MULTS;
- + break;
- + case 'r':
- + s->optimize |= OPT_REGS;
- + break;
- + }
- + } while (*optarg);
- + break;
- +#endif
- case TCC_OPTION_pedantic:
- case TCC_OPTION_pipe:
- case TCC_OPTION_s:
- diff -pruN tcc-0.9.26/tcc.h tcc-0.9.26o/tcc.h
- --- tcc-0.9.26/tcc.h 2013-02-16 00:24:00 +1000
- +++ tcc-0.9.26o/tcc.h 2013-09-23 16:51:41 +1000
- @@ -355,9 +355,9 @@ typedef struct Section {
- int nb_hashed_syms; /* used to resize the hash table */
- struct Section *link; /* link to another section */
- struct Section *reloc; /* corresponding section for relocation, if any */
- - struct Section *hash; /* hash table for symbols */
- + struct Section *hash; /* hash table for symbols */
- struct Section *next;
- - char name[1]; /* section name */
- + char name[1]; /* section name */
- } Section;
- typedef struct DLLReference {
- @@ -553,6 +553,7 @@ struct TCCState {
- #ifdef TCC_TARGET_I386
- int seg_size; /* 32. Can be 16 with i386 assembler (.code16) */
- + int optimize;
- #endif
- /* array of all loaded dlls (including those referenced by loaded dlls) */
- @@ -787,7 +788,7 @@ struct TCCState {
- #define TOK_SHL 0x01 /* shift left */
- #define TOK_SAR 0x02 /* signed shift right */
- -/* assignement operators : normal operator or 0x80 */
- +/* assignment operators : normal operator or 0x80 */
- #define TOK_A_MOD 0xa5
- #define TOK_A_AND 0xa6
- #define TOK_A_MUL 0xaa
- @@ -1141,6 +1142,9 @@ ST_DATA CType char_pointer_type, func_ol
- ST_DATA SValue __vstack[1+/*to make bcheck happy*/ VSTACK_SIZE], *vtop;
- #define vstack (__vstack + 1)
- ST_DATA int rsym, anon_sym, ind, loc;
- +#ifdef TCC_TARGET_I386
- +ST_DATA int cache_ind;
- +#endif
- ST_DATA int const_wanted; /* true if constant wanted */
- ST_DATA int nocode_wanted; /* true if no code generation wanted for an expression */
- @@ -1279,12 +1283,17 @@ ST_FUNC void gen_cvt_itof(int t);
- /* ------------ i386-gen.c ------------ */
- #if defined TCC_TARGET_I386 || defined TCC_TARGET_X86_64
- +/* make an int out of opcode bytes */
- +#define O2(o1, o2) ((unsigned char)(o1) | ((unsigned char)(o2) << 8))
- +#define O3(o1, o2, o3) (O2(o1, o2) | ((unsigned char)(o3) << 16))
- +#define O4(o1, o2, o3, o4) (O3(o1, o2, o3) | ((unsigned char)(o4) << 24))
- ST_FUNC void g(int c);
- ST_FUNC int oad(int c, int s);
- ST_FUNC void gen_le16(int c);
- ST_FUNC void gen_le32(int c);
- ST_FUNC void gen_addr32(int r, Sym *sym, int c);
- ST_FUNC void gen_addrpc32(int r, Sym *sym, int c);
- +ST_FUNC void flushq(void);
- #endif
- #ifdef CONFIG_TCC_BCHECK
- diff -pruN tcc-0.9.26/tccgen.c tcc-0.9.26o/tccgen.c
- --- tcc-0.9.26/tccgen.c 2013-02-16 00:24:00 +1000
- +++ tcc-0.9.26o/tccgen.c 2013-09-25 19:24:46 +1000
- @@ -29,6 +29,14 @@
- anon_sym: anonymous symbol index
- */
- ST_DATA int rsym, anon_sym, ind, loc;
- +#ifdef TCC_TARGET_I386
- +ST_DATA int cache_ind;
- +#define SET_CACHE_IND cache_ind = ind
- +#define RESET_CACHE_IND do { flushq(); cache_ind = -1; } while (0)
- +#else
- +#define SET_CACHE_IND
- +#define RESET_CACHE_IND
- +#endif
- ST_DATA Section *text_section, *data_section, *bss_section; /* predefined sections */
- ST_DATA Section *cur_text_section; /* current section where function code is generated */
- @@ -67,6 +75,10 @@ ST_DATA char *funcname;
- ST_DATA CType char_pointer_type, func_old_type, int_type, size_type;
- +#if defined TCC_TARGET_PE && defined TCC_TARGET_I386
- +ST_DATA SValue ret_st;
- +#endif
- +
- /* ------------------------------------------------------------------------- */
- static void gen_cast(CType *type);
- static inline CType *pointed_type(CType *type);
- @@ -522,6 +534,7 @@ ST_FUNC void save_reg(int r)
- /* x86 specific: need to pop fp register ST0 if saved */
- if (r == TREG_ST0) {
- o(0xd8dd); /* fstp %st(0) */
- + SET_CACHE_IND;
- }
- #endif
- #ifndef TCC_TARGET_X86_64
- @@ -687,7 +700,7 @@ static void gbound(void)
- register value (such as structures). */
- ST_FUNC int gv(int rc)
- {
- - int r, bit_pos, bit_size, size, align, i;
- + int r, bit_pos, bit_size, size, align, i, bt;
- #ifndef TCC_TARGET_X86_64
- int rc2;
- #endif
- @@ -765,10 +778,16 @@ ST_FUNC int gv(int rc)
- #endif
- r = vtop->r & VT_VALMASK;
- + bt = vtop->type.t & VT_BTYPE;
- #ifndef TCC_TARGET_X86_64
- rc2 = RC_INT;
- if (rc == RC_IRET)
- rc2 = RC_LRET;
- + if (VT_STRUCT == bt) {
- + int align;
- + if (type_size(&vtop->type, &align) == 8)
- + bt = VT_LLONG;
- + }
- #endif
- /* need to reload if:
- - constant
- @@ -778,13 +797,13 @@ ST_FUNC int gv(int rc)
- || (vtop->r & VT_LVAL)
- || !(reg_classes[r] & rc)
- #ifndef TCC_TARGET_X86_64
- - || ((vtop->type.t & VT_BTYPE) == VT_LLONG && !(reg_classes[vtop->r2] & rc2))
- + || (bt == VT_LLONG && !(reg_classes[vtop->r2] & rc2))
- #endif
- )
- {
- r = get_reg(rc);
- #ifndef TCC_TARGET_X86_64
- - if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
- + if (bt == VT_LLONG) {
- int r2;
- unsigned long long ll;
- /* two register type load : expand to two words
- @@ -854,7 +873,7 @@ ST_FUNC int gv(int rc)
- vtop->r = r;
- #ifdef TCC_TARGET_C67
- /* uses register pairs for doubles */
- - if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
- + if (bt == VT_DOUBLE)
- vtop->r2 = r+1;
- #endif
- }
- @@ -1012,6 +1031,7 @@ ST_FUNC void vpop(void)
- /* for x86, we need to pop the FP stack */
- if (v == TREG_ST0 && !nocode_wanted) {
- o(0xd8dd); /* fstp %st(0) */
- + SET_CACHE_IND;
- } else
- #endif
- if (v == VT_JMP || v == VT_JMPI) {
- @@ -1180,7 +1200,7 @@ static void gen_opl(int op)
- c = (int)vtop->c.i;
- /* constant: simpler */
- /* NOTE: all comments are for SHL. the other cases are
- - done by swaping words */
- + done by swapping words */
- vpop();
- if (op != TOK_SHL)
- vswap();
- @@ -1596,7 +1616,7 @@ ST_FUNC void gen_op(int op)
- if (bt1 == VT_PTR || bt2 == VT_PTR) {
- /* at least one operand is a pointer */
- - /* relationnal op: must be both pointers */
- + /* relational op: must be both pointers */
- if (op >= TOK_ULT && op <= TOK_LOR) {
- check_comparison_pointer_types(vtop - 1, vtop, op);
- /* pointers are handled are unsigned */
- @@ -1670,7 +1690,7 @@ ST_FUNC void gen_op(int op)
- {
- gen_opic(op);
- }
- - /* put again type if gen_opic() swaped operands */
- + /* put again type if gen_opic() swapped operands */
- vtop->type = type1;
- }
- } else if (is_float(bt1) || is_float(bt2)) {
- @@ -1742,7 +1762,7 @@ ST_FUNC void gen_op(int op)
- else
- gen_opic(op);
- if (op >= TOK_ULT && op <= TOK_GT) {
- - /* relationnal op: the result is an int */
- + /* relational op: the result is an int */
- vtop->type.t = VT_INT;
- } else {
- vtop->type.t = t;
- @@ -1977,8 +1997,8 @@ static void gen_cast(CType *type)
- int r = gv(RC_INT);
- if (sbt != (VT_INT | VT_UNSIGNED)) {
- /* x86_64 specific: movslq */
- - o(0x6348);
- - o(0xc0 + (REG_VALUE(r) << 3) + REG_VALUE(r));
- + o(O3(0x48, 0x63,
- + 0xc0 + (REG_VALUE(r) << 3) + REG_VALUE(r)));
- }
- }
- #endif
- @@ -2385,6 +2405,13 @@ ST_FUNC void vstore(void)
- if (!nocode_wanted) {
- size = type_size(&vtop->type, &align);
- + if (size == 4)
- + goto small_struct;
- + if (size == 8) {
- + ft = VT_LLONG;
- + goto small_struct;
- + }
- +
- /* destination */
- vswap();
- vtop->type.t = VT_PTR;
- @@ -2470,6 +2497,7 @@ ST_FUNC void vstore(void)
- }
- #endif
- if (!nocode_wanted) {
- + small_struct:
- rc = RC_INT;
- if (is_float(ft)) {
- rc = RC_FLOAT;
- @@ -3876,6 +3904,10 @@ ST_FUNC void unary(void)
- if ((s->type.t & VT_BTYPE) == VT_STRUCT) {
- /* get some space for the returned structure */
- size = type_size(&s->type, &align);
- +#if defined TCC_TARGET_PE && defined TCC_TARGET_I386
- + if (size <= 8)
- + align = 4;
- +#endif
- loc = (loc - size) & -align;
- ret.type = s->type;
- ret.r = VT_LOCAL | VT_LVAL;
- @@ -3883,6 +3915,12 @@ ST_FUNC void unary(void)
- problems */
- vseti(VT_LOCAL, loc);
- ret.c = vtop->c;
- +#if defined TCC_TARGET_PE && defined TCC_TARGET_I386
- + if (size <= 8) {
- + ret_st = ret;
- + vtop--;
- + } else
- +#endif
- nb_args++;
- } else {
- ret.type = s->type;
- @@ -4090,6 +4128,7 @@ static void expr_cond(void)
- int tt, u, r1, r2, rc, t1, t2, bt1, bt2;
- SValue sv;
- CType type, type1, type2;
- + int small_struct = 0;
- if (const_wanted) {
- expr_lor_const();
- @@ -4130,8 +4169,8 @@ static void expr_cond(void)
- }
- else
- rc = RC_INT;
- - gv(rc);
- - save_regs(1);
- + gv(rc);
- + save_regs(1);
- }
- if (tok == ':' && gnu_ext) {
- gv_dup();
- @@ -4200,8 +4239,14 @@ static void expr_cond(void)
- /* now we convert second operand */
- gen_cast(&type);
- - if (VT_STRUCT == (vtop->type.t & VT_BTYPE))
- - gaddrof();
- + if (VT_STRUCT == (vtop->type.t & VT_BTYPE)) {
- + int align;
- + small_struct = type_size(&vtop->type, &align);
- + if (small_struct != 4 && small_struct != 8) {
- + small_struct = 0;
- + gaddrof();
- + }
- + }
- rc = RC_INT;
- if (is_float(type.t)) {
- rc = RC_FLOAT;
- @@ -4210,7 +4255,7 @@ static void expr_cond(void)
- rc = RC_ST0;
- }
- #endif
- - } else if ((type.t & VT_BTYPE) == VT_LLONG) {
- + } else if ((type.t & VT_BTYPE) == VT_LLONG || small_struct == 8) {
- /* for long longs, we use fixed registers to avoid having
- to handle a complicated move */
- rc = RC_IRET;
- @@ -4224,8 +4269,10 @@ static void expr_cond(void)
- /* put again first value and cast it */
- *vtop = sv;
- gen_cast(&type);
- - if (VT_STRUCT == (vtop->type.t & VT_BTYPE))
- - gaddrof();
- + if (VT_STRUCT == (vtop->type.t & VT_BTYPE)) {
- + if (!small_struct)
- + gaddrof();
- + }
- r1 = gv(rc);
- move_reg(r2, r1);
- vtop->r = r2;
- @@ -4394,8 +4441,10 @@ static void block(int *bsym, int *csym,
- gsym(d); /* patch else jmp */
- } else
- gsym(a);
- + RESET_CACHE_IND;
- } else if (tok == TOK_WHILE) {
- next();
- + RESET_CACHE_IND;
- d = ind;
- skip('(');
- gexpr();
- @@ -4490,6 +4539,18 @@ static void block(int *bsym, int *csym,
- gv(RC_IRET);
- } else {
- #endif
- +#if defined TCC_TARGET_PE && defined TCC_TARGET_I386
- + int align, size;
- + size = type_size(&func_vt, &align);
- + if (size <= 8) {
- + vtop->type = int_type;
- + if (size > 4) {
- + vtop->type.t &= ~VT_BTYPE;
- + vtop->type.t |= VT_LLONG;
- + }
- + gv(RC_IRET);
- + } else {
- +#endif
- type = func_vt;
- mk_pointer(&type);
- vset(&type, VT_LOCAL | VT_LVAL, func_vc);
- @@ -4497,7 +4558,7 @@ static void block(int *bsym, int *csym,
- vswap();
- /* copy structure value to pointer */
- vstore();
- -#ifdef TCC_ARM_EABI
- +#if defined TCC_ARM_EABI || defined TCC_TARGET_PE
- }
- #endif
- } else if (is_float(func_vt.t)) {
- @@ -4540,6 +4601,7 @@ static void block(int *bsym, int *csym,
- }
- }
- skip(';');
- + RESET_CACHE_IND;
- d = ind;
- c = ind;
- a = 0;
- @@ -4567,6 +4629,7 @@ static void block(int *bsym, int *csym,
- } else
- if (tok == TOK_DO) {
- next();
- + RESET_CACHE_IND;
- a = 0;
- b = 0;
- d = ind;
- @@ -4574,6 +4637,7 @@ static void block(int *bsym, int *csym,
- skip(TOK_WHILE);
- skip('(');
- gsym(b);
- + RESET_CACHE_IND;
- gexpr();
- c = gtst(0, 0);
- gsym_addr(c, d);
- @@ -4594,6 +4658,7 @@ static void block(int *bsym, int *csym,
- c = 0;
- block(&a, csym, &b, &c, case_reg, 0);
- /* if no default, jmp after switch */
- + RESET_CACHE_IND;
- if (c == 0)
- c = ind;
- /* default label */
- @@ -4689,6 +4754,7 @@ static void block(int *bsym, int *csym,
- } else {
- s = label_push(&global_label_stack, b, LABEL_DEFINED);
- }
- + RESET_CACHE_IND;
- s->jnext = ind;
- /* we accept this, but it is a mistake */
- block_after_label:
- @@ -4697,6 +4763,7 @@ static void block(int *bsym, int *csym,
- } else {
- if (is_expr)
- vpop();
- + RESET_CACHE_IND;
- block(bsym, csym, case_sym, def_sym, case_reg, is_expr);
- }
- } else {
- diff -pruN tcc-0.9.26/win32/build-tcc.bat tcc-0.9.26o/win32/build-tcc.bat
- --- tcc-0.9.26/win32/build-tcc.bat 2013-02-16 00:24:00 +1000
- +++ tcc-0.9.26o/win32/build-tcc.bat 2013-09-26 01:26:37 +1000
- @@ -5,18 +5,18 @@
- @set /p VERSION= < ..\VERSION
- echo>..\config.h #define TCC_VERSION "%VERSION%"
- -@if _%PROCESSOR_ARCHITEW6432%_==_AMD64_ goto x86_64
- -@if _%PROCESSOR_ARCHITECTURE%_==_AMD64_ goto x86_64
- +@for /F "delims=-" %%A in ('gcc -dumpmachine') do @if %%A==x86_64 goto x86_64
- @set target=-DTCC_TARGET_PE -DTCC_TARGET_I386
- @set CC=gcc -Os -s -fno-strict-aliasing
- @set P=32
- +@set OPT=-O
- @goto tools
- :x86_64
- @set target=-DTCC_TARGET_PE -DTCC_TARGET_X86_64
- @rem mingw 64 has an ICE with -Os
- -@set CC=x86_64-pc-mingw32-gcc -O0 -s -fno-strict-aliasing
- +@set CC=gcc -O0 -s -fno-strict-aliasing
- @set P=64
- @goto tools
- @@ -28,6 +28,7 @@ echo>..\config.h #define TCC_VERSION "%V
- if not exist libtcc\nul mkdir libtcc
- copy ..\libtcc.h libtcc\libtcc.h
- %CC% %target% -shared -DLIBTCC_AS_DLL -DONE_SOURCE ../libtcc.c -o libtcc.dll -Wl,-out-implib,libtcc/libtcc.a
- +if errorlevel 1 goto the_end
- tiny_impdef libtcc.dll -o libtcc/libtcc.def
- :tcc
- @@ -37,24 +38,26 @@ tiny_impdef libtcc.dll -o libtcc/libtcc.
- copy ..\include\*.h include
- :libtcc1.a
- -.\tcc %target% -c ../lib/libtcc1.c
- -.\tcc %target% -c lib/crt1.c
- -.\tcc %target% -c lib/wincrt1.c
- -.\tcc %target% -c lib/dllcrt1.c
- -.\tcc %target% -c lib/dllmain.c
- -.\tcc %target% -c lib/chkstk.S
- +.\tcc %target% %OPT% -c ../lib/libtcc1.c
- +.\tcc %target% %OPT% -c lib/crt1.c
- +.\tcc %target% %OPT% -c lib/wincrt1.c
- +.\tcc %target% %OPT% -c lib/dllcrt1.c
- +.\tcc %target% %OPT% -c lib/dllmain.c
- +.\tcc %target% %OPT% -c lib/chkstk.S
- goto lib%P%
- :lib32
- -.\tcc %target% -c ../lib/alloca86.S
- -.\tcc %target% -c ../lib/alloca86-bt.S
- -.\tcc %target% -c ../lib/bcheck.c
- -tiny_libmaker lib/libtcc1.a libtcc1.o alloca86.o alloca86-bt.o crt1.o wincrt1.o dllcrt1.o dllmain.o chkstk.o bcheck.o
- +.\tcc %target% %OPT% -c lib/seh.S
- +.\tcc %target% %OPT% -c ../lib/alloca86.S
- +.\tcc %target% %OPT% -c ../lib/alloca86-bt.S
- +.\tcc %target% %OPT% -c ../lib/bcheck.c
- +tiny_libmaker lib/libtcc1.a libtcc1.o alloca86.o alloca86-bt.o crt1.o wincrt1.o dllcrt1.o dllmain.o chkstk.o seh.o bcheck.o
- @goto the_end
- :lib64
- +.\tcc %target% -c lib/sjlj.S
- .\tcc %target% -c ../lib/alloca86_64.S
- -tiny_libmaker lib/libtcc1.a libtcc1.o alloca86_64.o crt1.o wincrt1.o dllcrt1.o dllmain.o chkstk.o
- +tiny_libmaker lib/libtcc1.a libtcc1.o alloca86_64.o crt1.o wincrt1.o dllcrt1.o dllmain.o chkstk.o sjlj.o
- :the_end
- del *.o
- diff -pruN tcc-0.9.26/win32/lib/chkstk.S tcc-0.9.26o/win32/lib/chkstk.S
- --- tcc-0.9.26/win32/lib/chkstk.S 2013-02-16 00:24:00 +1000
- +++ tcc-0.9.26o/win32/lib/chkstk.S 2013-09-09 15:27:51 +1000
- @@ -55,137 +55,5 @@ P0:
- jmp *8(%rax)
- /* ---------------------------------------------- */
- -/* setjmp/longjmp support */
- -
- -.globl tinyc_getbp
- -tinyc_getbp:
- - mov %rbp,%rax
- - ret
- -
- -/* ---------------------------------------------- */
- -#endif
- -/* ---------------------------------------------- */
- -
- -
- -/* ---------------------------------------------- */
- -#ifndef TCC_TARGET_X86_64
- -/* ---------------------------------------------- */
- -
- -/*
- - int _except_handler3(
- - PEXCEPTION_RECORD exception_record,
- - PEXCEPTION_REGISTRATION registration,
- - PCONTEXT context,
- - PEXCEPTION_REGISTRATION dispatcher
- - );
- -
- - int __cdecl _XcptFilter(
- - unsigned long xcptnum,
- - PEXCEPTION_POINTERS pxcptinfoptrs
- - );
- -
- - struct _sehrec {
- - void *esp; // 0
- - void *exception_pointers; // 1
- - void *prev; // 2
- - void *handler; // 3
- - void *scopetable; // 4
- - int trylevel; // 5
- - void *ebp // 6
- - };
- -
- - // this is what the assembler code below means:
- - __try
- - {
- - // ...
- - }
- - __except (_XcptFilter(GetExceptionCode(), GetExceptionInformation()))
- - {
- - exit(GetExceptionCode());
- - }
- -*/
- -
- -.globl _exception_info
- -_exception_info:
- - mov 1*4-24(%ebp),%eax
- - ret
- -
- -.globl _exception_code
- -_exception_code:
- - call _exception_info
- - mov (%eax),%eax
- - mov (%eax),%eax
- - ret
- -
- -seh_filter:
- - call _exception_info
- - push %eax
- - call _exception_code
- - push %eax
- - call _XcptFilter
- - add $ 8,%esp
- - ret
- -
- -seh_except:
- - mov 0*4-24(%ebp),%esp
- - call _exception_code
- - push %eax
- - call _exit
- -
- -// msvcrt wants scopetables aligned and in read-only segment (using .text)
- -.align 4
- -seh_scopetable:
- - .long -1
- - .long seh_filter
- - .long seh_except
- -
- -seh_handler:
- - jmp _except_handler3
- -
- -.globl ___try__
- -___try__:
- -.globl __try__
- -__try__:
- - push %ebp
- - mov 8(%esp),%ebp
- -
- -// void *esp;
- - lea 12(%esp),%eax
- - mov %eax,0*4(%ebp)
- -
- -// void *exception_pointers;
- - xor %eax,%eax
- - mov %eax,1*4(%ebp)
- -
- -// void *prev;
- - mov %fs:0,%eax
- - mov %eax,2*4(%ebp)
- -
- -// void *handler;
- - mov $ seh_handler,%eax
- - mov %eax,3*4(%ebp)
- -
- -// void *scopetable;
- - mov $ seh_scopetable,%eax
- - mov %eax,4*4(%ebp)
- -
- -// int trylevel;
- - xor %eax,%eax
- - mov %eax,5*4(%ebp)
- -
- -// register new SEH
- - lea 2*4(%ebp),%eax
- - mov %eax,%fs:0
- -
- - pop %ebp
- - ret
- -
- -/* ---------------------------------------------- */
- -#else
- -/* ---------------------------------------------- */
- -
- -/* SEH on x86-64 not implemented */
- -
- -/* ---------------------------------------------- */
- #endif
- /* ---------------------------------------------- */
- diff -pruN tcc-0.9.26/win32/lib/seh.S tcc-0.9.26o/win32/lib/seh.S
- --- tcc-0.9.26/win32/lib/seh.S 1970-01-01 10:00:00 +1000
- +++ tcc-0.9.26o/win32/lib/seh.S 2013-09-24 16:17:05 +1000
- @@ -0,0 +1,112 @@
- +/* ---------------------------------------------- */
- +/* seh.S */
- +
- +/* structured exception handling for i386 */
- +
- +/*
- + int _except_handler3(
- + PEXCEPTION_RECORD exception_record,
- + PEXCEPTION_REGISTRATION registration,
- + PCONTEXT context,
- + PEXCEPTION_REGISTRATION dispatcher
- + );
- +
- + int __cdecl _XcptFilter(
- + unsigned long xcptnum,
- + PEXCEPTION_POINTERS pxcptinfoptrs
- + );
- +
- + struct _sehrec {
- + void *esp; // 0
- + void *exception_pointers; // 1
- + void *prev; // 2
- + void *handler; // 3
- + void *scopetable; // 4
- + int trylevel; // 5
- + void *ebp // 6
- + };
- +
- + // this is what the assembler code below means:
- + __try
- + {
- + // ...
- + }
- + __except (_XcptFilter(GetExceptionCode(), GetExceptionInformation()))
- + {
- + exit(GetExceptionCode());
- + }
- +*/
- +
- +.globl _exception_info
- +_exception_info:
- + mov 1*4-24(%ebp),%eax
- + ret
- +
- +.globl _exception_code
- +_exception_code:
- + call _exception_info
- + mov (%eax),%eax
- + mov (%eax),%eax
- + ret
- +
- +seh_filter:
- + call _exception_info
- + push %eax
- + call _exception_code
- + push %eax
- + call _XcptFilter
- + add $ 8,%esp
- + ret
- +
- +seh_except:
- + mov 0*4-24(%ebp),%esp
- + call _exception_code
- + push %eax
- + call _exit
- +
- +// msvcrt wants scopetables aligned and in read-only segment (using .text)
- +.align 4
- +seh_scopetable:
- + .long -1
- + .long seh_filter
- + .long seh_except
- +
- +seh_handler:
- + jmp _except_handler3
- +
- +.align 16
- +.globl ___try__
- +___try__:
- +.globl __try__
- +__try__:
- + push %ebp
- + mov 8(%esp),%ebp
- +
- +// void *esp;
- + lea 12(%esp),%eax
- + mov %eax,0*4(%ebp)
- +
- +// void *exception_pointers;
- + xor %eax,%eax
- + mov %eax,1*4(%ebp)
- +
- +// int trylevel;
- + mov %eax,5*4(%ebp)
- +
- +// void *prev;
- + mov %fs:0,%eax
- + mov %eax,2*4(%ebp)
- +
- +// void *handler;
- + movl $ seh_handler,3*4(%ebp)
- +
- +// void *scopetable;
- + movl $ seh_scopetable,4*4(%ebp)
- +
- +// register new SEH
- + lea 2*4(%ebp),%eax
- + mov %eax,%fs:0
- +
- + pop %ebp
- + ret
- +/* ---------------------------------------------- */
- diff -pruN tcc-0.9.26/win32/lib/sjlj.S tcc-0.9.26o/win32/lib/sjlj.S
- --- tcc-0.9.26/win32/lib/sjlj.S 1970-01-01 10:00:00 +1000
- +++ tcc-0.9.26o/win32/lib/sjlj.S 2013-09-09 15:18:12 +1000
- @@ -0,0 +1,10 @@
- +/* ---------------------------------------------- */
- +/* sjlj.S */
- +
- +/* setjmp/longjmp x86-64 support function */
- +
- +.globl tinyc_getbp
- +tinyc_getbp:
- + mov %rbp,%rax
- + ret
- +/* ---------------------------------------------- */
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement