Advertisement
Guest User

Untitled

a guest
Jun 19th, 2019
921
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 96.93 KB | None | 0 0
  1. diff --git a/configure.ac b/configure.ac
  2. index a084e926..af394c65 100644
  3. --- a/configure.ac
  4. +++ b/configure.ac
  5. @@ -340,7 +340,7 @@ AC_MSG_CHECKING(whether x86 dynamic cpu core will be enabled)
  6. if test x$enable_dynamic_x86 = xno -o x$enable_dynamic_core = xno; then
  7. AC_MSG_RESULT(no)
  8. else
  9. - if test x$c_targetcpu = xx86 ; then
  10. + if test x$c_targetcpu = xx86 -o x$c_targetcpu = xx86_64; then
  11. AC_DEFINE(C_DYNAMIC_X86,1)
  12. AC_MSG_RESULT(yes)
  13. else
  14. @@ -355,7 +355,7 @@ if test x$enable_dynrec = xno -o x$enable_dynamic_core = xno; then
  15. AC_MSG_RESULT(no)
  16. else
  17. dnl x86 only enable it if dynamic-x86 is disabled.
  18. - if test x$c_targetcpu = xx86 ; then
  19. + if test x$c_targetcpu = xx86 -o x$c_targetcpu = xx86_64; then
  20. if test x$enable_dynamic_x86 = xno ; then
  21. AC_DEFINE(C_DYNREC,1)
  22. AC_MSG_RESULT(yes)
  23. @@ -363,16 +363,11 @@ dnl x86 only enable it if dynamic-x86 is disabled.
  24. AC_MSG_RESULT([no, using dynamic-x86])
  25. fi
  26. else
  27. - if test x$c_targetcpu = xx86_64 ; then
  28. - AC_DEFINE(C_DYNREC,1)
  29. - AC_MSG_RESULT(yes)
  30. - else
  31. - if test x$c_targetcpu = xarm ; then
  32. - AC_DEFINE(C_DYNREC,1)
  33. - AC_MSG_RESULT(yes)
  34. - else
  35. - AC_MSG_RESULT(no)
  36. - fi
  37. + if test x$c_targetcpu = xarm ; then
  38. + AC_DEFINE(C_DYNREC,1)
  39. + AC_MSG_RESULT(yes)
  40. + else
  41. + AC_MSG_RESULT(no)
  42. fi
  43. fi
  44. fi
  45. diff --git a/src/cpu/core_dyn_x86.cpp b/src/cpu/core_dyn_x86.cpp
  46. index 17e84221..ec65238b 100644
  47. --- a/src/cpu/core_dyn_x86.cpp
  48. +++ b/src/cpu/core_dyn_x86.cpp
  49. @@ -65,6 +65,7 @@
  50.  
  51. #if C_FPU
  52. #define CPU_FPU 1 //Enable FPU escape instructions
  53. +#define X86_DYNFPU_DH_ENABLED
  54. #endif
  55.  
  56. enum {
  57. @@ -154,7 +155,7 @@ static DynReg DynRegs[G_MAX];
  58. #define DREG(_WHICH_) &DynRegs[G_ ## _WHICH_ ]
  59.  
  60. static struct {
  61. - Bitu ea,tmpb,tmpd,stack,shift,newesp;
  62. + Bit32u ea,tmpb,tmpd,stack,shift,newesp;
  63. } extra_regs;
  64.  
  65. static void IllegalOption(const char* msg) {
  66. @@ -168,17 +169,37 @@ static struct {
  67. Bit32u readdata;
  68. } core_dyn;
  69.  
  70. +#if defined(X86_DYNFPU_DH_ENABLED)
  71. static struct {
  72. - Bit32u state[32];
  73. + Bit16u cw,host_cw;
  74. + bool state_used;
  75. + // some fields expanded here for alignment purposes
  76. + struct {
  77. + Bit32u cw;
  78. + Bit32u sw;
  79. + Bit32u tag;
  80. + Bit32u ip;
  81. + Bit32u cs;
  82. + Bit32u ea;
  83. + Bit32u ds;
  84. + Bit8u st_reg[8][10];
  85. + } state;
  86. FPU_P_Reg temp,temp2;
  87. Bit32u dh_fpu_enabled;
  88. - Bit32u state_used;
  89. - Bit32u cw,host_cw;
  90. Bit8u temp_state[128];
  91. } dyn_dh_fpu;
  92. +#endif
  93.  
  94. +#define X86 0x01
  95. +#define X86_64 0x02
  96.  
  97. +#if C_TARGETCPU == X86_64
  98. +#include "core_dyn_x86/risc_x64.h"
  99. +#elif C_TARGETCPU == X86
  100. #include "core_dyn_x86/risc_x86.h"
  101. +#else
  102. +#error DYN_X86 core not supported for this CPU target.
  103. +#endif
  104.  
  105. struct DynState {
  106. DynReg regs[G_MAX];
  107. @@ -233,31 +254,19 @@ static void dyn_restoreregister(DynReg * src_reg, DynReg * dst_reg) {
  108.  
  109. #include "core_dyn_x86/decoder.h"
  110.  
  111. -#if defined (_MSC_VER)
  112. -#define DH_FPU_SAVE_REINIT \
  113. -{ \
  114. - __asm { \
  115. - __asm fnsave dyn_dh_fpu.state[0] \
  116. - } \
  117. - dyn_dh_fpu.state_used=false; \
  118. - dyn_dh_fpu.state[0]|=0x3f; \
  119. -}
  120. -#else
  121. -#define DH_FPU_SAVE_REINIT \
  122. -{ \
  123. - __asm__ volatile ( \
  124. - "fnsave %0 \n" \
  125. - : "=m" (dyn_dh_fpu.state[0]) \
  126. - : \
  127. - : "memory" \
  128. - ); \
  129. - dyn_dh_fpu.state_used=false; \
  130. - dyn_dh_fpu.state[0]|=0x3f; \
  131. -}
  132. +Bits CPU_Core_Dyn_X86_Run(void) {
  133. + // helper class to auto-save DH_FPU state on function exit
  134. + class auto_dh_fpu {
  135. + public:
  136. + ~auto_dh_fpu(void) {
  137. +#if defined(X86_DYNFPU_DH_ENABLED)
  138. + if (dyn_dh_fpu.state_used)
  139. + gen_dh_fpu_save();
  140. #endif
  141. + };
  142. + };
  143. + auto_dh_fpu fpu_saver;
  144.  
  145. -
  146. -Bits CPU_Core_Dyn_X86_Run(void) {
  147. /* Determine the linear address of CS:EIP */
  148. restart_core:
  149. PhysPt ip_point=SegPhys(cs)+reg_eip;
  150. @@ -272,7 +281,6 @@ restart_core:
  151. goto restart_core;
  152. }
  153. if (!chandler) {
  154. - if (dyn_dh_fpu.state_used) DH_FPU_SAVE_REINIT
  155. return CPU_Core_Normal_Run();
  156. }
  157. /* Find correct Dynamic Block to run */
  158. @@ -281,10 +289,11 @@ restart_core:
  159. if (!chandler->invalidation_map || (chandler->invalidation_map[ip_point&4095]<4)) {
  160. block=CreateCacheBlock(chandler,ip_point,32);
  161. } else {
  162. - Bitu old_cycles=CPU_Cycles;
  163. + Bit32s old_cycles=CPU_Cycles;
  164. CPU_Cycles=1;
  165. + // manually save
  166. + fpu_saver = auto_dh_fpu();
  167. Bits nc_retcode=CPU_Core_Normal_Run();
  168. - if (dyn_dh_fpu.state_used) DH_FPU_SAVE_REINIT
  169. if (!nc_retcode) {
  170. CPU_Cycles=old_cycles-1;
  171. goto restart_core;
  172. @@ -304,21 +313,17 @@ run_block:
  173. #if C_DEBUG
  174. #if C_HEAVY_DEBUG
  175. if (DEBUG_HeavyIsBreakpoint()) {
  176. - if (dyn_dh_fpu.state_used) DH_FPU_SAVE_REINIT
  177. return debugCallback;
  178. }
  179. #endif
  180. #endif
  181. if (!GETFLAG(TF)) {
  182. if (GETFLAG(IF) && PIC_IRQCheck) {
  183. - if (dyn_dh_fpu.state_used) DH_FPU_SAVE_REINIT
  184. return CBRET_NONE;
  185. }
  186. goto restart_core;
  187. }
  188. cpudecoder=CPU_Core_Dyn_X86_Trap_Run;
  189. - if (!dyn_dh_fpu.state_used) return CBRET_NONE;
  190. - DH_FPU_SAVE_REINIT
  191. return CBRET_NONE;
  192. case BR_Normal:
  193. /* Maybe check if we staying in the same page? */
  194. @@ -334,12 +339,8 @@ run_block:
  195. if (DEBUG_HeavyIsBreakpoint()) return debugCallback;
  196. #endif
  197. #endif
  198. - if (!dyn_dh_fpu.state_used) return CBRET_NONE;
  199. - DH_FPU_SAVE_REINIT
  200. return CBRET_NONE;
  201. case BR_CallBack:
  202. - if (!dyn_dh_fpu.state_used) return core_dyn.callback;
  203. - DH_FPU_SAVE_REINIT
  204. return core_dyn.callback;
  205. case BR_SMCBlock:
  206. // LOG_MSG("selfmodification of running block at %x:%x",SegValue(cs),reg_eip);
  207. @@ -348,19 +349,17 @@ run_block:
  208. case BR_Opcode:
  209. CPU_CycleLeft+=CPU_Cycles;
  210. CPU_Cycles=1;
  211. - if (dyn_dh_fpu.state_used) DH_FPU_SAVE_REINIT
  212. return CPU_Core_Normal_Run();
  213. #if (C_DEBUG)
  214. case BR_OpcodeFull:
  215. CPU_CycleLeft+=CPU_Cycles;
  216. CPU_Cycles=1;
  217. - if (dyn_dh_fpu.state_used) DH_FPU_SAVE_REINIT
  218. return CPU_Core_Full_Run();
  219. #endif
  220. case BR_Link1:
  221. case BR_Link2:
  222. {
  223. - Bitu temp_ip=SegPhys(cs)+reg_eip;
  224. + Bit32u temp_ip=SegPhys(cs)+reg_eip;
  225. CodePageHandler * temp_handler=(CodePageHandler *)get_tlb_readhandler(temp_ip);
  226. if (temp_handler->flags & (cpu.code.big ? PFLAG_HASCODE32:PFLAG_HASCODE16)) {
  227. block=temp_handler->FindCacheBlock(temp_ip & 4095);
  228. @@ -371,12 +370,11 @@ run_block:
  229. }
  230. goto restart_core;
  231. }
  232. - if (dyn_dh_fpu.state_used) DH_FPU_SAVE_REINIT
  233. return CBRET_NONE;
  234. }
  235.  
  236. Bits CPU_Core_Dyn_X86_Trap_Run(void) {
  237. - Bits oldCycles = CPU_Cycles;
  238. + Bit32s oldCycles = CPU_Cycles;
  239. CPU_Cycles = 1;
  240. cpu.trap_skip = false;
  241.  
  242. @@ -449,25 +447,15 @@ void CPU_Core_Dyn_X86_Init(void) {
  243. /* Init the generator */
  244. gen_init();
  245.  
  246. +#if defined(X86_DYNFPU_DH_ENABLED)
  247. /* Init the fpu state */
  248. dyn_dh_fpu.dh_fpu_enabled=true;
  249. dyn_dh_fpu.state_used=false;
  250. dyn_dh_fpu.cw=0x37f;
  251. -#if defined (_MSC_VER)
  252. - __asm {
  253. - __asm finit
  254. - __asm fsave dyn_dh_fpu.state[0]
  255. - __asm fstcw dyn_dh_fpu.host_cw
  256. - }
  257. -#else
  258. - __asm__ volatile (
  259. - "finit \n"
  260. - "fsave %0 \n"
  261. - "fstcw %1 \n"
  262. - : "=m" (dyn_dh_fpu.state[0]), "=m" (dyn_dh_fpu.host_cw)
  263. - :
  264. - : "memory"
  265. - );
  266. + // FINIT
  267. + memset(&dyn_dh_fpu.state, 0, sizeof(dyn_dh_fpu.state));
  268. + dyn_dh_fpu.state.cw = 0x37F;
  269. + dyn_dh_fpu.state.tag = 0xFFFF;
  270. #endif
  271.  
  272. return;
  273. @@ -482,62 +470,10 @@ void CPU_Core_Dyn_X86_Cache_Close(void) {
  274. cache_close();
  275. }
  276.  
  277. -void CPU_Core_Dyn_X86_Cache_Reset(void) {
  278. - cache_reset();
  279. -}
  280. -
  281. void CPU_Core_Dyn_X86_SetFPUMode(bool dh_fpu) {
  282. +#if defined(X86_DYNFPU_DH_ENABLED)
  283. dyn_dh_fpu.dh_fpu_enabled=dh_fpu;
  284. -}
  285. -
  286. -Bit32u fpu_state[32];
  287. -
  288. -void CPU_Core_Dyn_X86_SaveDHFPUState(void) {
  289. - if (dyn_dh_fpu.dh_fpu_enabled) {
  290. - if (dyn_dh_fpu.state_used!=0) {
  291. -#if defined (_MSC_VER)
  292. - __asm {
  293. - __asm fsave fpu_state[0]
  294. - __asm finit
  295. - }
  296. -#else
  297. - __asm__ volatile (
  298. - "fsave %0 \n"
  299. - "finit \n"
  300. - : "=m" (fpu_state[0])
  301. - :
  302. - : "memory"
  303. - );
  304. -#endif
  305. - }
  306. - }
  307. -}
  308. -
  309. -void CPU_Core_Dyn_X86_RestoreDHFPUState(void) {
  310. - if (dyn_dh_fpu.dh_fpu_enabled) {
  311. - if (dyn_dh_fpu.state_used!=0) {
  312. -#if defined (_MSC_VER)
  313. - __asm {
  314. - __asm frstor fpu_state[0]
  315. - }
  316. -#else
  317. - __asm__ volatile (
  318. - "frstor %0 \n"
  319. - :
  320. - : "m" (fpu_state[0])
  321. - :
  322. - );
  323. #endif
  324. - }
  325. - }
  326. -}
  327. -
  328. -#else
  329. -
  330. -void CPU_Core_Dyn_X86_SaveDHFPUState(void) {
  331. -}
  332. -
  333. -void CPU_Core_Dyn_X86_RestoreDHFPUState(void) {
  334. }
  335.  
  336. #endif
  337. diff --git a/src/cpu/core_dyn_x86/cache.h b/src/cpu/core_dyn_x86/cache.h
  338. index 08a3526b..6cb38754 100644
  339. --- a/src/cpu/core_dyn_x86/cache.h
  340. +++ b/src/cpu/core_dyn_x86/cache.h
  341. @@ -466,6 +466,10 @@ static INLINE void cache_addd(Bit32u val) {
  342. cache.pos+=4;
  343. }
  344.  
  345. +static INLINE void cache_addq(Bit64u val) {
  346. + *(Bit64u*)cache.pos=val;
  347. + cache.pos+=8;
  348. +}
  349.  
  350. static void gen_return(BlockReturn retcode);
  351.  
  352. @@ -569,75 +573,3 @@ static void cache_close(void) {
  353. cache_code_link_blocks = NULL;
  354. cache_initialized = false; */
  355. }
  356. -
  357. -static void cache_reset(void) {
  358. - if (cache_initialized) {
  359. - for (;;) {
  360. - if (cache.used_pages) {
  361. - CodePageHandler * cpage=cache.used_pages;
  362. - CodePageHandler * npage=cache.used_pages->next;
  363. - cpage->ClearRelease();
  364. - delete cpage;
  365. - cache.used_pages=npage;
  366. - } else break;
  367. - }
  368. -
  369. - if (cache_blocks == NULL) {
  370. - cache_blocks=(CacheBlock*)malloc(CACHE_BLOCKS*sizeof(CacheBlock));
  371. - if(!cache_blocks) E_Exit("Allocating cache_blocks has failed");
  372. - }
  373. - memset(cache_blocks,0,sizeof(CacheBlock)*CACHE_BLOCKS);
  374. - cache.block.free=&cache_blocks[0];
  375. - for (Bits i=0;i<CACHE_BLOCKS-1;i++) {
  376. - cache_blocks[i].link[0].to=(CacheBlock *)1;
  377. - cache_blocks[i].link[1].to=(CacheBlock *)1;
  378. - cache_blocks[i].cache.next=&cache_blocks[i+1];
  379. - }
  380. -
  381. - if (cache_code_start_ptr==NULL) {
  382. -#if defined (WIN32)
  383. - cache_code_start_ptr=(Bit8u*)VirtualAlloc(0,CACHE_TOTAL+CACHE_MAXSIZE+PAGESIZE_TEMP-1+PAGESIZE_TEMP,
  384. - MEM_COMMIT,PAGE_EXECUTE_READWRITE);
  385. - if (!cache_code_start_ptr)
  386. - cache_code_start_ptr=(Bit8u*)malloc(CACHE_TOTAL+CACHE_MAXSIZE+PAGESIZE_TEMP-1+PAGESIZE_TEMP);
  387. -#else
  388. - cache_code_start_ptr=(Bit8u*)malloc(CACHE_TOTAL+CACHE_MAXSIZE+PAGESIZE_TEMP-1+PAGESIZE_TEMP);
  389. -#endif
  390. - if (!cache_code_start_ptr) E_Exit("Allocating dynamic core cache memory failed");
  391. -
  392. - cache_code=(Bit8u*)(((Bitu)cache_code_start_ptr + PAGESIZE_TEMP-1) & ~(PAGESIZE_TEMP-1)); //Bitu is same size as a pointer.
  393. -
  394. - cache_code_link_blocks=cache_code;
  395. - cache_code+=PAGESIZE_TEMP;
  396. -
  397. -#if (C_HAVE_MPROTECT)
  398. - if(mprotect(cache_code_link_blocks,CACHE_TOTAL+CACHE_MAXSIZE+PAGESIZE_TEMP,PROT_WRITE|PROT_READ|PROT_EXEC))
  399. - LOG_MSG("Setting execute permission on the code cache has failed!");
  400. -#endif
  401. - }
  402. -
  403. - CacheBlock * block=cache_getblock();
  404. - cache.block.first=block;
  405. - cache.block.active=block;
  406. - block->cache.start=&cache_code[0];
  407. - block->cache.size=CACHE_TOTAL;
  408. - block->cache.next=0; //Last block in the list
  409. -
  410. - /* Setup the default blocks for block linkage returns */
  411. - cache.pos=&cache_code_link_blocks[0];
  412. - link_blocks[0].cache.start=cache.pos;
  413. - gen_return(BR_Link1);
  414. - cache.pos=&cache_code_link_blocks[32];
  415. - link_blocks[1].cache.start=cache.pos;
  416. - gen_return(BR_Link2);
  417. - cache.free_pages=0;
  418. - cache.last_page=0;
  419. - cache.used_pages=0;
  420. - /* Setup the code pages */
  421. - for (Bitu i=0;i<CACHE_PAGES;i++) {
  422. - CodePageHandler * newpage=new CodePageHandler();
  423. - newpage->next=cache.free_pages;
  424. - cache.free_pages=newpage;
  425. - }
  426. - }
  427. -}
  428. diff --git a/src/cpu/core_dyn_x86/decoder.h b/src/cpu/core_dyn_x86/decoder.h
  429. index 416f10b2..163cd644 100644
  430. --- a/src/cpu/core_dyn_x86/decoder.h
  431. +++ b/src/cpu/core_dyn_x86/decoder.h
  432. @@ -17,7 +17,6 @@
  433. */
  434.  
  435.  
  436. -#define X86_DYNFPU_DH_ENABLED
  437. #define X86_INLINED_MEMACCESS
  438.  
  439.  
  440. @@ -332,7 +331,7 @@ static BlockReturn DynRunException(Bit32u eip_add,Bit32u cycle_sub,Bit32u dflags
  441. }
  442.  
  443. static void dyn_check_bool_exception(DynReg * check) {
  444. - gen_dop_byte(DOP_OR,check,0,check,0);
  445. + gen_dop_byte(DOP_TEST,check,0,check,0);
  446. save_info[used_save_info].branch_pos=gen_create_branch_long(BR_NZ);
  447. dyn_savestate(&save_info[used_save_info].state);
  448. if (!decode.cycles) decode.cycles++;
  449. @@ -344,7 +343,7 @@ static void dyn_check_bool_exception(DynReg * check) {
  450. }
  451.  
  452. static void dyn_check_bool_exception_al(void) {
  453. - cache_addw(0xc00a); // or al, al
  454. + cache_addw(0xC084); // test al,al
  455. save_info[used_save_info].branch_pos=gen_create_branch_long(BR_NZ);
  456. dyn_savestate(&save_info[used_save_info].state);
  457. if (!decode.cycles) decode.cycles++;
  458. @@ -359,7 +358,7 @@ static void dyn_check_bool_exception_al(void) {
  459.  
  460. static void dyn_check_irqrequest(void) {
  461. gen_load_host(&PIC_IRQCheck,DREG(TMPB),4);
  462. - gen_dop_word(DOP_OR,true,DREG(TMPB),DREG(TMPB));
  463. + gen_dop_word(DOP_TEST,true,DREG(TMPB),DREG(TMPB));
  464. save_info[used_save_info].branch_pos=gen_create_branch_long(BR_NZ);
  465. gen_releasereg(DREG(TMPB));
  466. dyn_savestate(&save_info[used_save_info].state);
  467. @@ -403,21 +402,29 @@ static void dyn_fill_blocks(void) {
  468. dyn_save_critical_regs();
  469. gen_return(BR_Cycles);
  470. break;
  471. +#ifdef X86_DYNFPU_DH_ENABLED
  472. case fpu_restore:
  473. dyn_loadstate(&save_info[sct].state);
  474. - gen_load_host(&dyn_dh_fpu.state_used,DREG(TMPB),4);
  475. - gen_sop_word(SOP_INC,true,DREG(TMPB));
  476. - GenReg * gr1=FindDynReg(DREG(TMPB));
  477. +#if C_TARGETCPU == X86
  478. + cache_addb(0xd9); // FNSTCW fpu.host_cw
  479. + cache_addb(0x3d);
  480. + cache_addd((Bit32u)(&dyn_dh_fpu.host_cw));
  481. cache_addb(0xdd); // FRSTOR fpu.state (fpu_restore)
  482. cache_addb(0x25);
  483. - cache_addd((Bit32u)(&(dyn_dh_fpu.state[0])));
  484. - cache_addb(0x89); // mov fpu.state_used,1
  485. - cache_addb(0x05|(gr1->index<<3));
  486. - cache_addd((Bit32u)(&(dyn_dh_fpu.state_used)));
  487. - gen_releasereg(DREG(TMPB));
  488. + cache_addd((Bit32u)(&dyn_dh_fpu.state));
  489. + cache_addb(0xC6); // mov byte [fpu.state_used], 1
  490. + cache_addb(0x05);
  491. + cache_addd((Bit32u)(&dyn_dh_fpu.state_used));
  492. + cache_addb(1);
  493. +#else // X86_64
  494. + opcode(7).setabsaddr(&dyn_dh_fpu.host_cw).Emit8(0xD9); // FNSTCW [&fpu.host_cw]
  495. + opcode(4).setabsaddr(&dyn_dh_fpu.state).Emit8(0xDD); // FRSTOR [&fpu.state]
  496. + opcode(0).setimm(1,1).setabsaddr(&dyn_dh_fpu.state_used).Emit8(0xC6); // mov byte[], imm8
  497. +#endif
  498. dyn_synchstate(&save_info[sct].state);
  499. gen_create_jump(save_info[sct].return_pos);
  500. break;
  501. +#endif
  502. }
  503. }
  504. used_save_info=0;
  505. @@ -427,7 +434,7 @@ static void dyn_fill_blocks(void) {
  506. #if !defined(X86_INLINED_MEMACCESS)
  507. static void dyn_read_byte(DynReg * addr,DynReg * dst,Bitu high) {
  508. gen_protectflags();
  509. - gen_call_function((void *)&mem_readb_checked,"%Dd%Id",addr,&core_dyn.readdata);
  510. + gen_call_function((void *)&mem_readb_checked,"%Dd%Ip",addr,&core_dyn.readdata);
  511. dyn_check_bool_exception_al();
  512. gen_mov_host(&core_dyn.readdata,dst,1,high);
  513. }
  514. @@ -439,8 +446,8 @@ static void dyn_write_byte(DynReg * addr,DynReg * val,Bitu high) {
  515. }
  516. static void dyn_read_word(DynReg * addr,DynReg * dst,bool dword) {
  517. gen_protectflags();
  518. - if (dword) gen_call_function((void *)&mem_readd_checked,"%Dd%Id",addr,&core_dyn.readdata);
  519. - else gen_call_function((void *)&mem_readw_checked,"%Dd%Id",addr,&core_dyn.readdata);
  520. + if (dword) gen_call_function((void *)&mem_readd_checked,"%Dd%Ip",addr,&core_dyn.readdata);
  521. + else gen_call_function((void *)&mem_readw_checked,"%Dd%Ip",addr,&core_dyn.readdata);
  522. dyn_check_bool_exception_al();
  523. gen_mov_host(&core_dyn.readdata,dst,dword?4:2);
  524. }
  525. @@ -452,31 +459,32 @@ static void dyn_write_word(DynReg * addr,DynReg * val,bool dword) {
  526. }
  527. static void dyn_read_byte_release(DynReg * addr,DynReg * dst,Bitu high) {
  528. gen_protectflags();
  529. - gen_call_function((void *)&mem_readb_checked,"%Ddr%Id",addr,&core_dyn.readdata);
  530. + gen_call_function((void *)&mem_readb_checked,"%Drd%Ip",addr,&core_dyn.readdata);
  531. dyn_check_bool_exception_al();
  532. gen_mov_host(&core_dyn.readdata,dst,1,high);
  533. }
  534. static void dyn_write_byte_release(DynReg * addr,DynReg * val,Bitu high) {
  535. gen_protectflags();
  536. - if (high) gen_call_function((void *)&mem_writeb_checked,"%Ddr%Dh",addr,val);
  537. - else gen_call_function((void *)&mem_writeb_checked,"%Ddr%Dd",addr,val);
  538. + if (high) gen_call_function((void *)&mem_writeb_checked,"%Drd%Dh",addr,val);
  539. + else gen_call_function((void *)&mem_writeb_checked,"%Drd%Dd",addr,val);
  540. dyn_check_bool_exception_al();
  541. }
  542. static void dyn_read_word_release(DynReg * addr,DynReg * dst,bool dword) {
  543. gen_protectflags();
  544. - if (dword) gen_call_function((void *)&mem_readd_checked,"%Ddr%Id",addr,&core_dyn.readdata);
  545. - else gen_call_function((void *)&mem_readw_checked,"%Ddr%Id",addr,&core_dyn.readdata);
  546. + if (dword) gen_call_function((void *)&mem_readd_checked,"%Drd%Ip",addr,&core_dyn.readdata);
  547. + else gen_call_function((void *)&mem_readw_checked,"%Drd%Ip",addr,&core_dyn.readdata);
  548. dyn_check_bool_exception_al();
  549. gen_mov_host(&core_dyn.readdata,dst,dword?4:2);
  550. }
  551. static void dyn_write_word_release(DynReg * addr,DynReg * val,bool dword) {
  552. gen_protectflags();
  553. - if (dword) gen_call_function((void *)&mem_writed_checked,"%Ddr%Dd",addr,val);
  554. - else gen_call_function((void *)&mem_writew_checked,"%Ddr%Dd",addr,val);
  555. + if (dword) gen_call_function((void *)&mem_writed_checked,"%Drd%Dd",addr,val);
  556. + else gen_call_function((void *)&mem_writew_checked,"%Drd%Dd",addr,val);
  557. dyn_check_bool_exception_al();
  558. }
  559.  
  560. #else
  561. +#if C_TARGETCPU == X86
  562.  
  563. static void dyn_read_intro(DynReg * addr,bool release_addr=true) {
  564. gen_protectflags();
  565. @@ -634,7 +642,7 @@ static void dyn_read_word(DynReg * addr,DynReg * dst,bool dword) {
  566. gen_fill_jump(jmp_loc);
  567. } else {
  568. gen_protectflags();
  569. - gen_call_function((void *)&mem_readw_checked,"%Dd%Id",addr,&core_dyn.readdata);
  570. + gen_call_function((void *)&mem_readw_checked,"%Dd%Ip",addr,&core_dyn.readdata);
  571. dyn_check_bool_exception_al();
  572. gen_mov_host(&core_dyn.readdata,dst,2);
  573. }
  574. @@ -680,7 +688,7 @@ static void dyn_read_word_release(DynReg * addr,DynReg * dst,bool dword) {
  575. gen_fill_jump(jmp_loc);
  576. } else {
  577. gen_protectflags();
  578. - gen_call_function((void *)&mem_readw_checked,"%Ddr%Id",addr,&core_dyn.readdata);
  579. + gen_call_function((void *)&mem_readw_checked,"%Drd%Ip",addr,&core_dyn.readdata);
  580. dyn_check_bool_exception_al();
  581. gen_mov_host(&core_dyn.readdata,dst,2);
  582. }
  583. @@ -888,11 +896,247 @@ static void dyn_write_word_release(DynReg * addr,DynReg * val,bool dword) {
  584. gen_fill_jump(jmp_loc);
  585. } else {
  586. gen_protectflags();
  587. - gen_call_function((void *)&mem_writew_checked,"%Ddr%Dd",addr,val);
  588. + gen_call_function((void *)&mem_writew_checked,"%Drd%Dd",addr,val);
  589. dyn_check_bool_exception_al();
  590. }
  591. }
  592. +#else // X86_64
  593. +bool mem_readd_checked_dcx64(PhysPt address, Bit32u* dst) {
  594. + return get_tlb_readhandler(address)->readd_checked(address, dst);
  595. +}
  596. +bool mem_readw_checked_dcx64(PhysPt address, Bit16u* dst) {
  597. + return get_tlb_readhandler(address)->readw_checked(address, dst);
  598. +}
  599. +bool mem_writed_checked_dcx64(PhysPt address, Bitu val) {
  600. + return get_tlb_writehandler(address)->writed_checked(address, val);
  601. +}
  602. +bool mem_writew_checked_dcx64(PhysPt address, Bitu val) {
  603. + return get_tlb_writehandler(address)->writew_checked(address, val);
  604. +}
  605. +bool mem_readb_checked_dcx64(PhysPt address, Bit8u* dst) {
  606. + return get_tlb_readhandler(address)->readb_checked(address, dst);
  607. +}
  608. +bool mem_writeb_checked_dcx64(PhysPt address, Bitu val) {
  609. + return get_tlb_writehandler(address)->writeb_checked(address, val);
  610. +}
  611. +
  612. +static void dyn_read_word_internal(DynReg * addr,DynReg * dst,bool dword,bool release) {
  613. + DynState callstate;
  614. + gen_protectflags();
  615. +
  616. + x64gen.regs[X64_REG_RAX]->Clear();
  617. + x64gen.regs[X64_REG_RAX]->notusable = true;
  618. + GenReg *gensrc = FindDynReg(addr);
  619. + if (dword && release) gen_releasereg(addr);
  620. + GenReg *gendst = FindDynReg(dst,dword);
  621. + if (!dword && release) gen_releasereg(addr);
  622. + x64gen.regs[X64_REG_RAX]->notusable = false;
  623. + dyn_savestate(&callstate);
  624. +
  625. + Bit8u *page_brk;
  626. + opcode(0).set64().setea(gensrc->index,-1,0,dword?3:1).Emit8(0x8D); // lea rax, [dst+(dword?3:1)]
  627. + if (dword) {
  628. + opcode(0).set64().setimm(~0xFFF,4).Emit8Reg(0x25); // and rax, ~0xFFF
  629. + opcode(gensrc->index).set64().setrm(0).Emit8(0x39); // cmp rax,src
  630. + page_brk=gen_create_branch(BR_NBE);
  631. + } else {
  632. + opcode(0,false).setimm(0xFFF,2).Emit8Reg(0xA9); // test ax,0xFFF
  633. + page_brk=gen_create_branch(BR_Z);
  634. + }
  635. +
  636. + opcode(5).setrm(0).setimm(12,1).Emit8(0xC1); // shr eax,12
  637. + // mov rax, [8*rax+paging.tlb.read(rbp)]
  638. + opcode(0).set64().setea(5,0,3,(Bits)paging.tlb.read-(Bits)&cpu_regs).Emit8(0x8B);
  639. + opcode(0).set64().setrm(0).Emit8(0x85); // test rax,rax
  640. + Bit8u *nomap=gen_create_branch(BR_Z);
  641. + //mov dst, [RAX+src]
  642. + opcode(gendst->index,dword).setea(0,gensrc->index).Emit8(0x8B);
  643. + Bit8u* jmp_loc = gen_create_short_jump();
  644. +
  645. + gen_fill_branch(page_brk);
  646. + gen_load_imm(0, (Bitu)(dword?(void*)mem_unalignedreadd_checked:(void*)mem_unalignedreadw_checked));
  647. + Bit8u* page_jmp = gen_create_short_jump();
  648. + gen_fill_branch(nomap);
  649. + gen_load_imm(0, (Bitu)(dword?(void*)mem_readd_checked_dcx64:(void*)mem_readw_checked_dcx64));
  650. + gen_fill_short_jump(page_jmp);
  651. +
  652. + if (gensrc->index != ARG0_REG) {
  653. + x64gen.regs[reg_args[0]]->Clear();
  654. + opcode(ARG0_REG).setrm(gensrc->index).Emit8(0x8B);
  655. + }
  656. + x64gen.regs[reg_args[1]]->Clear();
  657. + gen_load_imm(ARG1_REG, (Bitu)dst->data);
  658. + gendst->Clear();
  659. + gen_call_ptr();
  660. + dyn_check_bool_exception_al();
  661. +
  662. + dyn_synchstate(&callstate);
  663. + dst->flags |= DYNFLG_CHANGED;
  664. + gen_fill_short_jump(jmp_loc);
  665. +}
  666. +
  667. +static void dyn_read_word(DynReg * addr,DynReg * dst,bool dword) {
  668. + dyn_read_word_internal(addr,dst,dword,false);
  669. +}
  670. +static void dyn_read_word_release(DynReg * addr,DynReg * dst,bool dword) {
  671. + dyn_read_word_internal(addr,dst,dword,true);
  672. +}
  673. +static void dyn_read_byte_internal(DynReg * addr,DynReg * dst,bool high,bool release) {
  674. + DynState callstate;
  675. + gen_protectflags();
  676. +
  677. + x64gen.regs[X64_REG_RAX]->Clear();
  678. + x64gen.regs[X64_REG_RAX]->notusable = true;
  679. + GenReg *gensrc = FindDynReg(addr);
  680. + GenReg *gendst = FindDynReg(dst);
  681. + if (release) gen_releasereg(addr);
  682. + x64gen.regs[X64_REG_RAX]->notusable = false;
  683. + dyn_savestate(&callstate);
  684. +
  685. + if (gendst->index>3) IllegalOption("dyn_read_byte");
  686. +
  687. + opcode(0).setrm(gensrc->index).Emit8(0x8B); // mov eax, src
  688. + opcode(5).setrm(0).setimm(12,1).Emit8(0xC1); // shr eax,12
  689. + // mov rax, [8*rax+paging.tlb.read(rbp)]
  690. + opcode(0).set64().setea(5,0,3,(Bits)paging.tlb.read-(Bits)&cpu_regs).Emit8(0x8B);
  691. + opcode(0).set64().setrm(0).Emit8(0x85); // test rax,rax
  692. + Bit8u *nomap=gen_create_branch(BR_Z);
  693. +
  694. + int src = gensrc->index;
  695. + if (high && src>=8) { // can't use REX prefix with high-byte reg
  696. + opcode(0).set64().setrm(src).Emit8(0x03); // add rax, src
  697. + src = -1;
  698. + }
  699. + // mov dst, byte [rax+src]
  700. + opcode(gendst->index,true,high?4:0).setea(0,src).Emit8(0x8A);
  701. + Bit8u* jmp_loc=gen_create_short_jump();
  702. +
  703. + gen_fill_branch(nomap);
  704. + if (gensrc->index != ARG0_REG) {
  705. + x64gen.regs[reg_args[0]]->Clear();
  706. + opcode(ARG0_REG).setrm(gensrc->index).Emit8(0x8B); // mov ARG0,src
  707. + }
  708. + x64gen.regs[reg_args[1]]->Clear();
  709. + gen_load_imm(ARG1_REG, (Bitu)(high?((Bit8u*)dst->data)+1:dst->data));
  710. + gendst->Clear();
  711. + gen_call_ptr((void*)mem_readb_checked_dcx64);
  712. + dyn_check_bool_exception_al();
  713. +
  714. + dyn_synchstate(&callstate);
  715. + dst->flags |= DYNFLG_CHANGED;
  716. + gen_fill_short_jump(jmp_loc);
  717. +}
  718. +static void dyn_read_byte(DynReg * addr,DynReg * dst,bool high) {
  719. + dyn_read_byte_internal(addr,dst,high,false);
  720. +}
  721. +static void dyn_read_byte_release(DynReg * addr,DynReg * dst,bool high) {
  722. + dyn_read_byte_internal(addr,dst,high,true);
  723. +}
  724. +static void dyn_write_word_internal(DynReg * addr,DynReg * val,bool dword,bool release) {
  725. + DynState callstate;
  726. + gen_protectflags();
  727. +
  728. + x64gen.regs[X64_REG_RAX]->Clear();
  729. + x64gen.regs[X64_REG_RAX]->notusable = true;
  730. + GenReg *gendst = FindDynReg(addr);
  731. + GenReg *genval = FindDynReg(val);
  732. + if (release) gen_releasereg(addr);
  733. + x64gen.regs[X64_REG_RAX]->notusable = false;
  734. + dyn_savestate(&callstate);
  735. +
  736. + Bit8u *page_brk;
  737. + opcode(0).set64().setea(gendst->index,-1,0,dword?3:1).Emit8(0x8D); // lea rax, [dst+(dword?3:1)]
  738. + if (dword) {
  739. + opcode(0).set64().setimm(~0xFFF,4).Emit8Reg(0x25); // and rax, ~0xFFF
  740. + opcode(gendst->index).set64().setrm(0).Emit8(0x39); // cmp rax,dst
  741. + page_brk=gen_create_branch(BR_NBE);
  742. + } else {
  743. + opcode(0,false).setimm(0xFFF,2).Emit8Reg(0xA9); // test ax,0xFFF
  744. + page_brk=gen_create_branch(BR_Z);
  745. + }
  746. +
  747. + opcode(5).setrm(0).setimm(12,1).Emit8(0xC1); // shr eax,12
  748. + // mov rax, [8*rax+paging.tlb.write(rbp)]
  749. + opcode(0).set64().setea(5,0,3,(Bits)paging.tlb.write-(Bits)&cpu_regs).Emit8(0x8B);
  750. + opcode(0).set64().setrm(0).Emit8(0x85); // test rax,rax
  751. + Bit8u *nomap=gen_create_branch(BR_Z);
  752. + //mov [RAX+src], dst
  753. + opcode(genval->index,dword).setea(0,gendst->index).Emit8(0x89);
  754. + Bit8u* jmp_loc = gen_create_short_jump();
  755. +
  756. + gen_fill_branch(page_brk);
  757. + gen_load_imm(0, (Bitu)(dword?(void*)mem_unalignedwrited_checked:(void*)mem_unalignedwritew_checked));
  758. + Bit8u* page_jmp = gen_create_short_jump();
  759. + gen_fill_branch(nomap);
  760. + gen_load_imm(0, (Bitu)(dword?(void*)mem_writed_checked_dcx64:(void*)mem_writew_checked_dcx64));
  761. + gen_fill_short_jump(page_jmp);
  762. +
  763. + if (gendst->index != ARG0_REG) {
  764. + x64gen.regs[reg_args[0]]->Clear();
  765. + opcode(ARG0_REG).setrm(gendst->index).Emit8(0x8B);
  766. + }
  767. + gen_load_arg_reg(1, val, dword ? "d":"w");
  768. + gen_call_ptr();
  769. + dyn_check_bool_exception_al();
  770. + dyn_synchstate(&callstate);
  771. + gen_fill_short_jump(jmp_loc);
  772. +}
  773. +static void dyn_write_word(DynReg * addr,DynReg * val,bool dword) {
  774. + dyn_write_word_internal(addr, val, dword, false);
  775. +}
  776. +static void dyn_write_word_release(DynReg * addr,DynReg * val,bool dword) {
  777. + dyn_write_word_internal(addr, val, dword, true);
  778. +}
  779. +static void dyn_write_byte_internal(DynReg * addr,DynReg * val,bool high,bool release) {
  780. + DynState callstate;
  781. + gen_protectflags();
  782. +
  783. + x64gen.regs[X64_REG_RAX]->Clear();
  784. + x64gen.regs[X64_REG_RAX]->notusable = true;
  785. + GenReg *gendst = FindDynReg(addr);
  786. + GenReg *genval = FindDynReg(val);
  787. + if (release) gen_releasereg(addr);
  788. + x64gen.regs[X64_REG_RAX]->notusable = false;
  789. + dyn_savestate(&callstate);
  790. +
  791. + if (genval->index>3) IllegalOption("dyn_write_byte");
  792. +
  793. + opcode(0).setrm(gendst->index).Emit8(0x8B); // mov eax, dst
  794. + opcode(5).setrm(0).setimm(12,1).Emit8(0xC1); // shr eax,12
  795. + // mov rax, [8*rax+paging.tlb.write(rbp)]
  796. + opcode(0).set64().setea(5,0,3,(Bits)paging.tlb.write-(Bits)&cpu_regs).Emit8(0x8B);
  797. + opcode(0).set64().setrm(0).Emit8(0x85); // test rax,rax
  798. + Bit8u *nomap=gen_create_branch(BR_Z);
  799. +
  800. + int dst = gendst->index;
  801. + if (high && dst>=8) { // can't use REX prefix with high-byte reg
  802. + opcode(0).set64().setrm(dst).Emit8(0x03); // add rax, dst
  803. + dst = -1;
  804. + }
  805. + // mov byte [rax+src], val
  806. + opcode(genval->index,true,high?4:0).setea(0,dst).Emit8(0x88);
  807. +
  808. + Bit8u* jmp_loc=gen_create_short_jump();
  809. + gen_fill_branch(nomap);
  810.  
  811. + if (gendst->index != ARG0_REG) {
  812. + x64gen.regs[reg_args[0]]->Clear();
  813. + opcode(ARG0_REG).setrm(gendst->index).Emit8(0x8B); // mov ARG0,dst
  814. + }
  815. + gen_load_arg_reg(1, val, high ? "h":"l");
  816. + gen_call_ptr((void*)mem_writeb_checked_dcx64);
  817. + dyn_check_bool_exception_al();
  818. +
  819. + dyn_synchstate(&callstate);
  820. + gen_fill_short_jump(jmp_loc);
  821. +}
  822. +static void dyn_write_byte(DynReg * addr,DynReg * src,bool high) {
  823. + dyn_write_byte_internal(addr,src,high,false);
  824. +}
  825. +static void dyn_write_byte_release(DynReg * addr,DynReg * src,bool high) {
  826. + dyn_write_byte_internal(addr,src,high,true);
  827. +}
  828. +#endif
  829. #endif
  830.  
  831.  
  832. @@ -938,9 +1182,9 @@ static void dyn_pop(DynReg * dynreg,bool checked=true) {
  833. gen_dop_word(DOP_ADD,true,DREG(STACK),DREG(SS));
  834. if (checked) {
  835. if (decode.big_op) {
  836. - gen_call_function((void *)&mem_readd_checked,"%Drd%Id",DREG(STACK),&core_dyn.readdata);
  837. + gen_call_function((void *)&mem_readd_checked,"%Drd%Ip",DREG(STACK),&core_dyn.readdata);
  838. } else {
  839. - gen_call_function((void *)&mem_readw_checked,"%Drd%Id",DREG(STACK),&core_dyn.readdata);
  840. + gen_call_function((void *)&mem_readw_checked,"%Drd%Ip",DREG(STACK),&core_dyn.readdata);
  841. }
  842. dyn_check_bool_exception_al();
  843. gen_mov_host(&core_dyn.readdata,dynreg,decode.big_op?4:2);
  844. @@ -1739,8 +1983,8 @@ static void dyn_pop_ev(void) {
  845. if (decode.modrm.mod<3) {
  846. dyn_fill_ea();
  847. // dyn_write_word_release(DREG(EA),DREG(TMPW),decode.big_op);
  848. - if (decode.big_op) gen_call_function((void *)&mem_writed_inline,"%Ddr%Dd",DREG(EA),DREG(TMPW));
  849. - else gen_call_function((void *)&mem_writew_inline,"%Ddr%Dd",DREG(EA),DREG(TMPW));
  850. + if (decode.big_op) gen_call_function((void *)&mem_writed_inline,"%Drd%Dd",DREG(EA),DREG(TMPW));
  851. + else gen_call_function((void *)&mem_writew_inline,"%Drd%Dd",DREG(EA),DREG(TMPW));
  852. } else {
  853. gen_dop_word(DOP_MOV,decode.big_op,&DynRegs[decode.modrm.rm],DREG(TMPW));
  854. }
  855. @@ -1861,7 +2105,7 @@ static void dyn_loop(LoopTypes type) {
  856. branch2=gen_create_branch(BR_Z);
  857. break;
  858. case LOOP_JCXZ:
  859. - gen_dop_word(DOP_OR,decode.big_addr,DREG(ECX),DREG(ECX));
  860. + gen_dop_word(DOP_TEST,decode.big_addr,DREG(ECX),DREG(ECX));
  861. gen_releasereg(DREG(ECX));
  862. branch2=gen_create_branch(BR_NZ);
  863. break;
  864. @@ -1982,8 +2226,8 @@ static void dyn_add_iocheck_var(Bit8u accessed_port,Bitu access_size) {
  865. #define dh_fpu_startup() { \
  866. fpu_used=true; \
  867. gen_protectflags(); \
  868. - gen_load_host(&dyn_dh_fpu.state_used,DREG(TMPB),4); \
  869. - gen_dop_word_imm(DOP_CMP,true,DREG(TMPB),0); \
  870. + gen_load_host(&dyn_dh_fpu.state_used,DREG(TMPB),1); \
  871. + gen_dop_byte(DOP_TEST,DREG(TMPB),0,DREG(TMPB),0); \
  872. gen_releasereg(DREG(TMPB)); \
  873. save_info[used_save_info].branch_pos=gen_create_branch_long(BR_Z); \
  874. dyn_savestate(&save_info[used_save_info].state); \
  875. @@ -2009,15 +2253,15 @@ static CacheBlock * CreateCacheBlock(CodePageHandler * codepage,PhysPt start,Bit
  876. decode.block->page.start=decode.page.index;
  877. codepage->AddCacheBlock(decode.block);
  878.  
  879. - gen_save_host_direct(&cache.block.running,(Bit32u)decode.block);
  880. for (i=0;i<G_MAX;i++) {
  881. DynRegs[i].flags&=~(DYNFLG_ACTIVE|DYNFLG_CHANGED);
  882. DynRegs[i].genreg=0;
  883. }
  884. gen_reinit();
  885. + gen_save_host_direct(&cache.block.running,(Bitu)decode.block);
  886. /* Start with the cycles check */
  887. gen_protectflags();
  888. - gen_dop_word_imm(DOP_CMP,true,DREG(CYCLES),0);
  889. + gen_dop_word(DOP_TEST,true,DREG(CYCLES),DREG(CYCLES));
  890. save_info[used_save_info].branch_pos=gen_create_branch_long(BR_LE);
  891. save_info[used_save_info].type=cycle_check;
  892. used_save_info++;
  893. diff --git a/src/cpu/core_dyn_x86/dyn_fpu.h b/src/cpu/core_dyn_x86/dyn_fpu.h
  894. index ec4afc68..9d261c1a 100644
  895. --- a/src/cpu/core_dyn_x86/dyn_fpu.h
  896. +++ b/src/cpu/core_dyn_x86/dyn_fpu.h
  897. @@ -64,29 +64,29 @@ static void dyn_eatree() {
  898. Bitu group=(decode.modrm.val >> 3) & 7;
  899. switch (group){
  900. case 0x00: /* FADD ST,STi */
  901. - gen_call_function((void*)&FPU_FADD_EA,"%Ddr",DREG(TMPB));
  902. + gen_call_function((void*)&FPU_FADD_EA,"%Drd",DREG(TMPB));
  903. break;
  904. case 0x01: /* FMUL ST,STi */
  905. - gen_call_function((void*)&FPU_FMUL_EA,"%Ddr",DREG(TMPB));
  906. + gen_call_function((void*)&FPU_FMUL_EA,"%Drd",DREG(TMPB));
  907. break;
  908. case 0x02: /* FCOM STi */
  909. - gen_call_function((void*)&FPU_FCOM_EA,"%Ddr",DREG(TMPB));
  910. + gen_call_function((void*)&FPU_FCOM_EA,"%Drd",DREG(TMPB));
  911. break;
  912. case 0x03: /* FCOMP STi */
  913. - gen_call_function((void*)&FPU_FCOM_EA,"%Ddr",DREG(TMPB));
  914. + gen_call_function((void*)&FPU_FCOM_EA,"%Drd",DREG(TMPB));
  915. gen_call_function((void*)&FPU_FPOP,"");
  916. break;
  917. case 0x04: /* FSUB ST,STi */
  918. - gen_call_function((void*)&FPU_FSUB_EA,"%Ddr",DREG(TMPB));
  919. + gen_call_function((void*)&FPU_FSUB_EA,"%Drd",DREG(TMPB));
  920. break;
  921. case 0x05: /* FSUBR ST,STi */
  922. - gen_call_function((void*)&FPU_FSUBR_EA,"%Ddr",DREG(TMPB));
  923. + gen_call_function((void*)&FPU_FSUBR_EA,"%Drd",DREG(TMPB));
  924. break;
  925. case 0x06: /* FDIV ST,STi */
  926. - gen_call_function((void*)&FPU_FDIV_EA,"%Ddr",DREG(TMPB));
  927. + gen_call_function((void*)&FPU_FDIV_EA,"%Drd",DREG(TMPB));
  928. break;
  929. case 0x07: /* FDIVR ST,STi */
  930. - gen_call_function((void*)&FPU_FDIVR_EA,"%Ddr",DREG(TMPB));
  931. + gen_call_function((void*)&FPU_FDIVR_EA,"%Drd",DREG(TMPB));
  932. break;
  933. default:
  934. break;
  935. @@ -101,36 +101,36 @@ static void dyn_fpu_esc0(){
  936. Bitu sub=(decode.modrm.val & 7);
  937. switch (group){
  938. case 0x00: //FADD ST,STi /
  939. - gen_call_function((void*)&FPU_FADD,"%Ddr%Ddr",DREG(TMPB),DREG(EA));
  940. + gen_call_function((void*)&FPU_FADD,"%Drd%Drd",DREG(TMPB),DREG(EA));
  941. break;
  942. case 0x01: // FMUL ST,STi /
  943. - gen_call_function((void*)&FPU_FMUL,"%Ddr%Ddr",DREG(TMPB),DREG(EA));
  944. + gen_call_function((void*)&FPU_FMUL,"%Drd%Drd",DREG(TMPB),DREG(EA));
  945. break;
  946. case 0x02: // FCOM STi /
  947. - gen_call_function((void*)&FPU_FCOM,"%Ddr%Ddr",DREG(TMPB),DREG(EA));
  948. + gen_call_function((void*)&FPU_FCOM,"%Drd%Drd",DREG(TMPB),DREG(EA));
  949. break;
  950. case 0x03: // FCOMP STi /
  951. - gen_call_function((void*)&FPU_FCOM,"%Ddr%Ddr",DREG(TMPB),DREG(EA));
  952. + gen_call_function((void*)&FPU_FCOM,"%Drd%Drd",DREG(TMPB),DREG(EA));
  953. gen_call_function((void*)&FPU_FPOP,"");
  954. break;
  955. case 0x04: // FSUB ST,STi /
  956. - gen_call_function((void*)&FPU_FSUB,"%Ddr%Ddr",DREG(TMPB),DREG(EA));
  957. + gen_call_function((void*)&FPU_FSUB,"%Drd%Drd",DREG(TMPB),DREG(EA));
  958. break;
  959. case 0x05: // FSUBR ST,STi /
  960. - gen_call_function((void*)&FPU_FSUBR,"%Ddr%Ddr",DREG(TMPB),DREG(EA));
  961. + gen_call_function((void*)&FPU_FSUBR,"%Drd%Drd",DREG(TMPB),DREG(EA));
  962. break;
  963. case 0x06: // FDIV ST,STi /
  964. - gen_call_function((void*)&FPU_FDIV,"%Ddr%Ddr",DREG(TMPB),DREG(EA));
  965. + gen_call_function((void*)&FPU_FDIV,"%Drd%Drd",DREG(TMPB),DREG(EA));
  966. break;
  967. case 0x07: // FDIVR ST,STi /
  968. - gen_call_function((void*)&FPU_FDIVR,"%Ddr%Ddr",DREG(TMPB),DREG(EA));
  969. + gen_call_function((void*)&FPU_FDIVR,"%Drd%Drd",DREG(TMPB),DREG(EA));
  970. break;
  971. default:
  972. break;
  973. }
  974. } else {
  975. dyn_fill_ea();
  976. - gen_call_function((void*)&FPU_FLD_F32_EA,"%Ddr",DREG(EA));
  977. + gen_call_function((void*)&FPU_FLD_F32_EA,"%Drd",DREG(EA));
  978. gen_load_host(&TOP,DREG(TMPB),4);
  979. dyn_eatree();
  980. }
  981. @@ -149,18 +149,18 @@ static void dyn_fpu_esc1(){
  982. gen_dop_word_imm(DOP_AND,true,DREG(EA),7);
  983. gen_call_function((void*)&FPU_PREP_PUSH,"");
  984. gen_load_host(&TOP,DREG(TMPB),4);
  985. - gen_call_function((void*)&FPU_FST,"%Ddr%Ddr",DREG(EA),DREG(TMPB));
  986. + gen_call_function((void*)&FPU_FST,"%Drd%Drd",DREG(EA),DREG(TMPB));
  987. break;
  988. case 0x01: /* FXCH STi */
  989. dyn_fpu_top();
  990. - gen_call_function((void*)&FPU_FXCH,"%Ddr%Ddr",DREG(TMPB),DREG(EA));
  991. + gen_call_function((void*)&FPU_FXCH,"%Drd%Drd",DREG(TMPB),DREG(EA));
  992. break;
  993. case 0x02: /* FNOP */
  994. gen_call_function((void*)&FPU_FNOP,"");
  995. break;
  996. case 0x03: /* FSTP STi */
  997. dyn_fpu_top();
  998. - gen_call_function((void*)&FPU_FST,"%Ddr%Ddr",DREG(TMPB),DREG(EA));
  999. + gen_call_function((void*)&FPU_FST,"%Drd%Drd",DREG(TMPB),DREG(EA));
  1000. gen_call_function((void*)&FPU_FPOP,"");
  1001. break;
  1002. case 0x04:
  1003. @@ -290,29 +290,29 @@ static void dyn_fpu_esc1(){
  1004. gen_protectflags();
  1005. gen_call_function((void*)&FPU_PREP_PUSH,"");
  1006. gen_load_host(&TOP,DREG(TMPB),4);
  1007. - gen_call_function((void*)&FPU_FLD_F32,"%Ddr%Ddr",DREG(EA),DREG(TMPB));
  1008. + gen_call_function((void*)&FPU_FLD_F32,"%Drd%Drd",DREG(EA),DREG(TMPB));
  1009. break;
  1010. case 0x01: /* UNKNOWN */
  1011. LOG(LOG_FPU,LOG_WARN)("ESC EA 1:Unhandled group %d subfunction %d",group,sub);
  1012. break;
  1013. case 0x02: /* FST float*/
  1014. - gen_call_function((void*)&FPU_FST_F32,"%Ddr",DREG(EA));
  1015. + gen_call_function((void*)&FPU_FST_F32,"%Drd",DREG(EA));
  1016. break;
  1017. case 0x03: /* FSTP float*/
  1018. - gen_call_function((void*)&FPU_FST_F32,"%Ddr",DREG(EA));
  1019. + gen_call_function((void*)&FPU_FST_F32,"%Drd",DREG(EA));
  1020. gen_call_function((void*)&FPU_FPOP,"");
  1021. break;
  1022. case 0x04: /* FLDENV */
  1023. - gen_call_function((void*)&FPU_FLDENV,"%Ddr",DREG(EA));
  1024. + gen_call_function((void*)&FPU_FLDENV,"%Drd",DREG(EA));
  1025. break;
  1026. case 0x05: /* FLDCW */
  1027. - gen_call_function((void *)&FPU_FLDCW,"%Ddr",DREG(EA));
  1028. + gen_call_function((void *)&FPU_FLDCW,"%Drd",DREG(EA));
  1029. break;
  1030. case 0x06: /* FSTENV */
  1031. - gen_call_function((void *)&FPU_FSTENV,"%Ddr",DREG(EA));
  1032. + gen_call_function((void *)&FPU_FSTENV,"%Drd",DREG(EA));
  1033. break;
  1034. case 0x07: /* FNSTCW*/
  1035. - gen_call_function((void *)&FPU_FNSTCW,"%Ddr",DREG(EA));
  1036. + gen_call_function((void *)&FPU_FNSTCW,"%Drd",DREG(EA));
  1037. break;
  1038. default:
  1039. LOG(LOG_FPU,LOG_WARN)("ESC EA 1:Unhandled group %d subfunction %d",group,sub);
  1040. @@ -335,7 +335,7 @@ static void dyn_fpu_esc2(){
  1041. gen_dop_word_imm(DOP_ADD,true,DREG(EA),1);
  1042. gen_dop_word_imm(DOP_AND,true,DREG(EA),7);
  1043. gen_load_host(&TOP,DREG(TMPB),4);
  1044. - gen_call_function((void *)&FPU_FUCOM,"%Ddr%Ddr",DREG(TMPB),DREG(EA));
  1045. + gen_call_function((void *)&FPU_FUCOM,"%Drd%Drd",DREG(TMPB),DREG(EA));
  1046. gen_call_function((void *)&FPU_FPOP,"");
  1047. gen_call_function((void *)&FPU_FPOP,"");
  1048. break;
  1049. @@ -350,7 +350,7 @@ static void dyn_fpu_esc2(){
  1050. }
  1051. } else {
  1052. dyn_fill_ea();
  1053. - gen_call_function((void*)&FPU_FLD_I32_EA,"%Ddr",DREG(EA));
  1054. + gen_call_function((void*)&FPU_FLD_I32_EA,"%Drd",DREG(EA));
  1055. gen_load_host(&TOP,DREG(TMPB),4);
  1056. dyn_eatree();
  1057. }
  1058. @@ -395,24 +395,24 @@ static void dyn_fpu_esc3(){
  1059. gen_call_function((void*)&FPU_PREP_PUSH,"");
  1060. gen_protectflags();
  1061. gen_load_host(&TOP,DREG(TMPB),4);
  1062. - gen_call_function((void*)&FPU_FLD_I32,"%Ddr%Ddr",DREG(EA),DREG(TMPB));
  1063. + gen_call_function((void*)&FPU_FLD_I32,"%Drd%Drd",DREG(EA),DREG(TMPB));
  1064. break;
  1065. case 0x01: /* FISTTP */
  1066. LOG(LOG_FPU,LOG_WARN)("ESC 3 EA:Unhandled group %d subfunction %d",group,sub);
  1067. break;
  1068. case 0x02: /* FIST */
  1069. - gen_call_function((void*)&FPU_FST_I32,"%Ddr",DREG(EA));
  1070. + gen_call_function((void*)&FPU_FST_I32,"%Drd",DREG(EA));
  1071. break;
  1072. case 0x03: /* FISTP */
  1073. - gen_call_function((void*)&FPU_FST_I32,"%Ddr",DREG(EA));
  1074. + gen_call_function((void*)&FPU_FST_I32,"%Drd",DREG(EA));
  1075. gen_call_function((void*)&FPU_FPOP,"");
  1076. break;
  1077. case 0x05: /* FLD 80 Bits Real */
  1078. gen_call_function((void*)&FPU_PREP_PUSH,"");
  1079. - gen_call_function((void*)&FPU_FLD_F80,"%Ddr",DREG(EA));
  1080. + gen_call_function((void*)&FPU_FLD_F80,"%Drd",DREG(EA));
  1081. break;
  1082. case 0x07: /* FSTP 80 Bits Real */
  1083. - gen_call_function((void*)&FPU_FST_F80,"%Ddr",DREG(EA));
  1084. + gen_call_function((void*)&FPU_FST_F80,"%Drd",DREG(EA));
  1085. gen_call_function((void*)&FPU_FPOP,"");
  1086. break;
  1087. default:
  1088. @@ -429,36 +429,36 @@ static void dyn_fpu_esc4(){
  1089. dyn_fpu_top();
  1090. switch(group){
  1091. case 0x00: /* FADD STi,ST*/
  1092. - gen_call_function((void*)&FPU_FADD,"%Ddr%Ddr",DREG(EA),DREG(TMPB));
  1093. + gen_call_function((void*)&FPU_FADD,"%Drd%Drd",DREG(EA),DREG(TMPB));
  1094. break;
  1095. case 0x01: /* FMUL STi,ST*/
  1096. - gen_call_function((void*)&FPU_FMUL,"%Ddr%Ddr",DREG(EA),DREG(TMPB));
  1097. + gen_call_function((void*)&FPU_FMUL,"%Drd%Drd",DREG(EA),DREG(TMPB));
  1098. break;
  1099. case 0x02: /* FCOM*/
  1100. - gen_call_function((void*)&FPU_FCOM,"%Ddr%Ddr",DREG(TMPB),DREG(EA));
  1101. + gen_call_function((void*)&FPU_FCOM,"%Drd%Drd",DREG(TMPB),DREG(EA));
  1102. break;
  1103. case 0x03: /* FCOMP*/
  1104. - gen_call_function((void*)&FPU_FCOM,"%Ddr%Ddr",DREG(TMPB),DREG(EA));
  1105. + gen_call_function((void*)&FPU_FCOM,"%Drd%Drd",DREG(TMPB),DREG(EA));
  1106. gen_call_function((void*)&FPU_FPOP,"");
  1107. break;
  1108. case 0x04: /* FSUBR STi,ST*/
  1109. - gen_call_function((void*)&FPU_FSUBR,"%Ddr%Ddr",DREG(EA),DREG(TMPB));
  1110. + gen_call_function((void*)&FPU_FSUBR,"%Drd%Drd",DREG(EA),DREG(TMPB));
  1111. break;
  1112. case 0x05: /* FSUB STi,ST*/
  1113. - gen_call_function((void*)&FPU_FSUB,"%Ddr%Ddr",DREG(EA),DREG(TMPB));
  1114. + gen_call_function((void*)&FPU_FSUB,"%Drd%Drd",DREG(EA),DREG(TMPB));
  1115. break;
  1116. case 0x06: /* FDIVR STi,ST*/
  1117. - gen_call_function((void*)&FPU_FDIVR,"%Ddr%Ddr",DREG(EA),DREG(TMPB));
  1118. + gen_call_function((void*)&FPU_FDIVR,"%Drd%Drd",DREG(EA),DREG(TMPB));
  1119. break;
  1120. case 0x07: /* FDIV STi,ST*/
  1121. - gen_call_function((void*)&FPU_FDIV,"%Ddr%Ddr",DREG(EA),DREG(TMPB));
  1122. + gen_call_function((void*)&FPU_FDIV,"%Drd%Drd",DREG(EA),DREG(TMPB));
  1123. break;
  1124. default:
  1125. break;
  1126. }
  1127. } else {
  1128. dyn_fill_ea();
  1129. - gen_call_function((void*)&FPU_FLD_F64_EA,"%Ddr",DREG(EA));
  1130. + gen_call_function((void*)&FPU_FLD_F64_EA,"%Drd",DREG(EA));
  1131. gen_load_host(&TOP,DREG(TMPB),4);
  1132. dyn_eatree();
  1133. }
  1134. @@ -472,23 +472,23 @@ static void dyn_fpu_esc5(){
  1135. dyn_fpu_top();
  1136. switch(group){
  1137. case 0x00: /* FFREE STi */
  1138. - gen_call_function((void*)&FPU_FFREE,"%Ddr",DREG(EA));
  1139. + gen_call_function((void*)&FPU_FFREE,"%Drd",DREG(EA));
  1140. break;
  1141. case 0x01: /* FXCH STi*/
  1142. - gen_call_function((void*)&FPU_FXCH,"%Ddr%Ddr",DREG(TMPB),DREG(EA));
  1143. + gen_call_function((void*)&FPU_FXCH,"%Drd%Drd",DREG(TMPB),DREG(EA));
  1144. break;
  1145. case 0x02: /* FST STi */
  1146. - gen_call_function((void*)&FPU_FST,"%Ddr%Ddr",DREG(TMPB),DREG(EA));
  1147. + gen_call_function((void*)&FPU_FST,"%Drd%Drd",DREG(TMPB),DREG(EA));
  1148. break;
  1149. case 0x03: /* FSTP STi*/
  1150. - gen_call_function((void*)&FPU_FST,"%Ddr%Ddr",DREG(TMPB),DREG(EA));
  1151. + gen_call_function((void*)&FPU_FST,"%Drd%Drd",DREG(TMPB),DREG(EA));
  1152. gen_call_function((void*)&FPU_FPOP,"");
  1153. break;
  1154. case 0x04: /* FUCOM STi */
  1155. - gen_call_function((void*)&FPU_FUCOM,"%Ddr%Ddr",DREG(TMPB),DREG(EA));
  1156. + gen_call_function((void*)&FPU_FUCOM,"%Drd%Drd",DREG(TMPB),DREG(EA));
  1157. break;
  1158. case 0x05: /*FUCOMP STi */
  1159. - gen_call_function((void*)&FPU_FUCOM,"%Ddr%Ddr",DREG(TMPB),DREG(EA));
  1160. + gen_call_function((void*)&FPU_FUCOM,"%Drd%Drd",DREG(TMPB),DREG(EA));
  1161. gen_call_function((void*)&FPU_FPOP,"");
  1162. break;
  1163. default:
  1164. @@ -504,30 +504,30 @@ static void dyn_fpu_esc5(){
  1165. gen_call_function((void*)&FPU_PREP_PUSH,"");
  1166. gen_protectflags();
  1167. gen_load_host(&TOP,DREG(TMPB),4);
  1168. - gen_call_function((void*)&FPU_FLD_F64,"%Ddr%Ddr",DREG(EA),DREG(TMPB));
  1169. + gen_call_function((void*)&FPU_FLD_F64,"%Drd%Drd",DREG(EA),DREG(TMPB));
  1170. break;
  1171. case 0x01: /* FISTTP longint*/
  1172. LOG(LOG_FPU,LOG_WARN)("ESC 5 EA:Unhandled group %d subfunction %d",group,sub);
  1173. break;
  1174. case 0x02: /* FST double real*/
  1175. - gen_call_function((void*)&FPU_FST_F64,"%Ddr",DREG(EA));
  1176. + gen_call_function((void*)&FPU_FST_F64,"%Drd",DREG(EA));
  1177. break;
  1178. case 0x03: /* FSTP double real*/
  1179. - gen_call_function((void*)&FPU_FST_F64,"%Ddr",DREG(EA));
  1180. + gen_call_function((void*)&FPU_FST_F64,"%Drd",DREG(EA));
  1181. gen_call_function((void*)&FPU_FPOP,"");
  1182. break;
  1183. case 0x04: /* FRSTOR */
  1184. - gen_call_function((void*)&FPU_FRSTOR,"%Ddr",DREG(EA));
  1185. + gen_call_function((void*)&FPU_FRSTOR,"%Drd",DREG(EA));
  1186. break;
  1187. case 0x06: /* FSAVE */
  1188. - gen_call_function((void*)&FPU_FSAVE,"%Ddr",DREG(EA));
  1189. + gen_call_function((void*)&FPU_FSAVE,"%Drd",DREG(EA));
  1190. break;
  1191. case 0x07: /*FNSTSW */
  1192. gen_protectflags();
  1193. gen_load_host(&TOP,DREG(TMPB),4);
  1194. gen_call_function((void*)&FPU_SET_TOP,"%Dd",DREG(TMPB));
  1195. gen_load_host(&fpu.sw,DREG(TMPB),4);
  1196. - gen_call_function((void*)&mem_writew,"%Ddr%Ddr",DREG(EA),DREG(TMPB));
  1197. + gen_call_function((void*)&mem_writew,"%Drd%Drd",DREG(EA),DREG(TMPB));
  1198. break;
  1199. default:
  1200. LOG(LOG_FPU,LOG_WARN)("ESC 5 EA:Unhandled group %d subfunction %d",group,sub);
  1201. @@ -543,13 +543,13 @@ static void dyn_fpu_esc6(){
  1202. dyn_fpu_top();
  1203. switch(group){
  1204. case 0x00: /*FADDP STi,ST*/
  1205. - gen_call_function((void*)&FPU_FADD,"%Ddr%Ddr",DREG(EA),DREG(TMPB));
  1206. + gen_call_function((void*)&FPU_FADD,"%Drd%Drd",DREG(EA),DREG(TMPB));
  1207. break;
  1208. case 0x01: /* FMULP STi,ST*/
  1209. - gen_call_function((void*)&FPU_FMUL,"%Ddr%Ddr",DREG(EA),DREG(TMPB));
  1210. + gen_call_function((void*)&FPU_FMUL,"%Drd%Drd",DREG(EA),DREG(TMPB));
  1211. break;
  1212. case 0x02: /* FCOMP5*/
  1213. - gen_call_function((void*)&FPU_FCOM,"%Ddr%Ddr",DREG(TMPB),DREG(EA));
  1214. + gen_call_function((void*)&FPU_FCOM,"%Drd%Drd",DREG(TMPB),DREG(EA));
  1215. break; /* TODO IS THIS ALLRIGHT ????????? */
  1216. case 0x03: /*FCOMPP*/
  1217. if(sub != 1) {
  1218. @@ -559,20 +559,20 @@ static void dyn_fpu_esc6(){
  1219. gen_load_host(&TOP,DREG(EA),4);
  1220. gen_dop_word_imm(DOP_ADD,true,DREG(EA),1);
  1221. gen_dop_word_imm(DOP_AND,true,DREG(EA),7);
  1222. - gen_call_function((void*)&FPU_FCOM,"%Ddr%Ddr",DREG(TMPB),DREG(EA));
  1223. + gen_call_function((void*)&FPU_FCOM,"%Drd%Drd",DREG(TMPB),DREG(EA));
  1224. gen_call_function((void*)&FPU_FPOP,""); /* extra pop at the bottom*/
  1225. break;
  1226. case 0x04: /* FSUBRP STi,ST*/
  1227. - gen_call_function((void*)&FPU_FSUBR,"%Ddr%Ddr",DREG(EA),DREG(TMPB));
  1228. + gen_call_function((void*)&FPU_FSUBR,"%Drd%Drd",DREG(EA),DREG(TMPB));
  1229. break;
  1230. case 0x05: /* FSUBP STi,ST*/
  1231. - gen_call_function((void*)&FPU_FSUB,"%Ddr%Ddr",DREG(EA),DREG(TMPB));
  1232. + gen_call_function((void*)&FPU_FSUB,"%Drd%Drd",DREG(EA),DREG(TMPB));
  1233. break;
  1234. case 0x06: /* FDIVRP STi,ST*/
  1235. - gen_call_function((void*)&FPU_FDIVR,"%Ddr%Ddr",DREG(EA),DREG(TMPB));
  1236. + gen_call_function((void*)&FPU_FDIVR,"%Drd%Drd",DREG(EA),DREG(TMPB));
  1237. break;
  1238. case 0x07: /* FDIVP STi,ST*/
  1239. - gen_call_function((void*)&FPU_FDIV,"%Ddr%Ddr",DREG(EA),DREG(TMPB));
  1240. + gen_call_function((void*)&FPU_FDIV,"%Drd%Drd",DREG(EA),DREG(TMPB));
  1241. break;
  1242. default:
  1243. break;
  1244. @@ -580,7 +580,7 @@ static void dyn_fpu_esc6(){
  1245. gen_call_function((void*)&FPU_FPOP,"");
  1246. } else {
  1247. dyn_fill_ea();
  1248. - gen_call_function((void*)&FPU_FLD_I16_EA,"%Ddr",DREG(EA));
  1249. + gen_call_function((void*)&FPU_FLD_I16_EA,"%Drd",DREG(EA));
  1250. gen_load_host(&TOP,DREG(TMPB),4);
  1251. dyn_eatree();
  1252. }
  1253. @@ -594,24 +594,24 @@ static void dyn_fpu_esc7(){
  1254. switch (group){
  1255. case 0x00: /* FFREEP STi*/
  1256. dyn_fpu_top();
  1257. - gen_call_function((void*)&FPU_FFREE,"%Ddr",DREG(EA));
  1258. + gen_call_function((void*)&FPU_FFREE,"%Drd",DREG(EA));
  1259. gen_call_function((void*)&FPU_FPOP,"");
  1260. break;
  1261. case 0x01: /* FXCH STi*/
  1262. dyn_fpu_top();
  1263. - gen_call_function((void*)&FPU_FXCH,"%Ddr%Ddr",DREG(TMPB),DREG(EA));
  1264. + gen_call_function((void*)&FPU_FXCH,"%Drd%Drd",DREG(TMPB),DREG(EA));
  1265. break;
  1266. case 0x02: /* FSTP STi*/
  1267. case 0x03: /* FSTP STi*/
  1268. dyn_fpu_top();
  1269. - gen_call_function((void*)&FPU_FST,"%Ddr%Ddr",DREG(TMPB),DREG(EA));
  1270. + gen_call_function((void*)&FPU_FST,"%Drd%Drd",DREG(TMPB),DREG(EA));
  1271. gen_call_function((void*)&FPU_FPOP,"");
  1272. break;
  1273. case 0x04:
  1274. switch(sub){
  1275. case 0x00: /* FNSTSW AX*/
  1276. gen_load_host(&TOP,DREG(TMPB),4);
  1277. - gen_call_function((void*)&FPU_SET_TOP,"%Ddr",DREG(TMPB));
  1278. + gen_call_function((void*)&FPU_SET_TOP,"%Drd",DREG(TMPB));
  1279. gen_mov_host(&fpu.sw,DREG(EAX),2);
  1280. break;
  1281. default:
  1282. @@ -629,34 +629,34 @@ static void dyn_fpu_esc7(){
  1283. case 0x00: /* FILD Bit16s */
  1284. gen_call_function((void*)&FPU_PREP_PUSH,"");
  1285. gen_load_host(&TOP,DREG(TMPB),4);
  1286. - gen_call_function((void*)&FPU_FLD_I16,"%Ddr%Ddr",DREG(EA),DREG(TMPB));
  1287. + gen_call_function((void*)&FPU_FLD_I16,"%Drd%Drd",DREG(EA),DREG(TMPB));
  1288. break;
  1289. case 0x01:
  1290. LOG(LOG_FPU,LOG_WARN)("ESC 7 EA:Unhandled group %d subfunction %d",group,sub);
  1291. break;
  1292. case 0x02: /* FIST Bit16s */
  1293. - gen_call_function((void*)&FPU_FST_I16,"%Ddr",DREG(EA));
  1294. + gen_call_function((void*)&FPU_FST_I16,"%Drd",DREG(EA));
  1295. break;
  1296. case 0x03: /* FISTP Bit16s */
  1297. - gen_call_function((void*)&FPU_FST_I16,"%Ddr",DREG(EA));
  1298. + gen_call_function((void*)&FPU_FST_I16,"%Drd",DREG(EA));
  1299. gen_call_function((void*)&FPU_FPOP,"");
  1300. break;
  1301. case 0x04: /* FBLD packed BCD */
  1302. gen_call_function((void*)&FPU_PREP_PUSH,"");
  1303. gen_load_host(&TOP,DREG(TMPB),4);
  1304. - gen_call_function((void*)&FPU_FBLD,"%Ddr%Ddr",DREG(EA),DREG(TMPB));
  1305. + gen_call_function((void*)&FPU_FBLD,"%Drd%Drd",DREG(EA),DREG(TMPB));
  1306. break;
  1307. case 0x05: /* FILD Bit64s */
  1308. gen_call_function((void*)&FPU_PREP_PUSH,"");
  1309. gen_load_host(&TOP,DREG(TMPB),4);
  1310. - gen_call_function((void*)&FPU_FLD_I64,"%Ddr%Ddr",DREG(EA),DREG(TMPB));
  1311. + gen_call_function((void*)&FPU_FLD_I64,"%Drd%Drd",DREG(EA),DREG(TMPB));
  1312. break;
  1313. case 0x06: /* FBSTP packed BCD */
  1314. - gen_call_function((void*)&FPU_FBST,"%Ddr",DREG(EA));
  1315. + gen_call_function((void*)&FPU_FBST,"%Drd",DREG(EA));
  1316. gen_call_function((void*)&FPU_FPOP,"");
  1317. break;
  1318. case 0x07: /* FISTP Bit64s */
  1319. - gen_call_function((void*)&FPU_FST_I64,"%Ddr",DREG(EA));
  1320. + gen_call_function((void*)&FPU_FST_I64,"%Drd",DREG(EA));
  1321. gen_call_function((void*)&FPU_FPOP,"");
  1322. break;
  1323. default:
  1324. diff --git a/src/cpu/core_dyn_x86/dyn_fpu_dh.h b/src/cpu/core_dyn_x86/dyn_fpu_dh.h
  1325. index 03b6bd5d..6899d5d8 100644
  1326. --- a/src/cpu/core_dyn_x86/dyn_fpu_dh.h
  1327. +++ b/src/cpu/core_dyn_x86/dyn_fpu_dh.h
  1328. @@ -148,6 +148,16 @@ static void FPU_FRSTOR_DH(PhysPt addr){
  1329. }
  1330. }
  1331.  
  1332. +static void dh_fpu_mem(Bit8u inst, Bitu reg=decode.modrm.reg, void* mem=&dyn_dh_fpu.temp.m1) {
  1333. +#if C_TARGETCPU == X86
  1334. + cache_addb(inst);
  1335. + cache_addb(0x05|(reg<<3));
  1336. + cache_addd((Bit32u)(mem));
  1337. +#else // X86_64
  1338. + opcode(reg).setabsaddr(mem).Emit8(inst);
  1339. +#endif
  1340. +}
  1341. +
  1342. static void dh_fpu_esc0(){
  1343. dyn_get_modrm();
  1344. if (decode.modrm.val >= 0xc0) {
  1345. @@ -155,10 +165,8 @@ static void dh_fpu_esc0(){
  1346. cache_addb(decode.modrm.val);
  1347. } else {
  1348. dyn_fill_ea();
  1349. - gen_call_function((void*)&FPU_FLD_32,"%Ddr",DREG(EA));
  1350. - cache_addb(0xd8);
  1351. - cache_addb(0x05|(decode.modrm.reg<<3));
  1352. - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1)));
  1353. + gen_call_function((void*)&FPU_FLD_32,"%Drd",DREG(EA));
  1354. + dh_fpu_mem(0xd8);
  1355. }
  1356. }
  1357.  
  1358. @@ -173,46 +181,34 @@ static void dh_fpu_esc1(){
  1359. dyn_fill_ea();
  1360. switch(group){
  1361. case 0x00: /* FLD float*/
  1362. - gen_call_function((void*)&FPU_FLD_32,"%Ddr",DREG(EA));
  1363. - cache_addb(0xd9);
  1364. - cache_addb(0x05|(decode.modrm.reg<<3));
  1365. - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1)));
  1366. + gen_call_function((void*)&FPU_FLD_32,"%Drd",DREG(EA));
  1367. + dh_fpu_mem(0xd9);
  1368. break;
  1369. case 0x01: /* UNKNOWN */
  1370. LOG(LOG_FPU,LOG_WARN)("ESC EA 1:Unhandled group %d subfunction %d",group,sub);
  1371. break;
  1372. case 0x02: /* FST float*/
  1373. - cache_addb(0xd9);
  1374. - cache_addb(0x05|(decode.modrm.reg<<3));
  1375. - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1)));
  1376. - gen_call_function((void*)&FPU_FST_32,"%Ddr",DREG(EA));
  1377. + dh_fpu_mem(0xd9);
  1378. + gen_call_function((void*)&FPU_FST_32,"%Drd",DREG(EA));
  1379. break;
  1380. case 0x03: /* FSTP float*/
  1381. - cache_addb(0xd9);
  1382. - cache_addb(0x05|(decode.modrm.reg<<3));
  1383. - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1)));
  1384. - gen_call_function((void*)&FPU_FST_32,"%Ddr",DREG(EA));
  1385. + dh_fpu_mem(0xd9);
  1386. + gen_call_function((void*)&FPU_FST_32,"%Drd",DREG(EA));
  1387. break;
  1388. case 0x04: /* FLDENV */
  1389. - gen_call_function((void*)&FPU_FLDENV_DH,"%Ddr",DREG(EA));
  1390. - cache_addb(0xd9);
  1391. - cache_addb(0x05|(decode.modrm.reg<<3));
  1392. - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1)));
  1393. + gen_call_function((void*)&FPU_FLDENV_DH,"%Drd",DREG(EA));
  1394. + dh_fpu_mem(0xd9);
  1395. break;
  1396. case 0x05: /* FLDCW */
  1397. - gen_call_function((void *)&FPU_FLDCW_DH,"%Ddr",DREG(EA));
  1398. - cache_addb(0xd9);
  1399. - cache_addb(0x05|(decode.modrm.reg<<3));
  1400. - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1)));
  1401. + gen_call_function((void *)&FPU_FLDCW_DH,"%Drd",DREG(EA));
  1402. + dh_fpu_mem(0xd9);
  1403. break;
  1404. case 0x06: /* FSTENV */
  1405. - cache_addb(0xd9);
  1406. - cache_addb(0x05|(decode.modrm.reg<<3));
  1407. - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1)));
  1408. - gen_call_function((void*)&FPU_FSTENV_DH,"%Ddr",DREG(EA));
  1409. + dh_fpu_mem(0xd9);
  1410. + gen_call_function((void*)&FPU_FSTENV_DH,"%Drd",DREG(EA));
  1411. break;
  1412. case 0x07: /* FNSTCW*/
  1413. - gen_call_function((void*)&FPU_FNSTCW_DH,"%Ddr",DREG(EA));
  1414. + gen_call_function((void*)&FPU_FNSTCW_DH,"%Drd",DREG(EA));
  1415. break;
  1416. default:
  1417. LOG(LOG_FPU,LOG_WARN)("ESC EA 1:Unhandled group %d subfunction %d",group,sub);
  1418. @@ -228,10 +224,8 @@ static void dh_fpu_esc2(){
  1419. cache_addb(decode.modrm.val);
  1420. } else {
  1421. dyn_fill_ea();
  1422. - gen_call_function((void*)&FPU_FLD_32,"%Ddr",DREG(EA));
  1423. - cache_addb(0xda);
  1424. - cache_addb(0x05|(decode.modrm.reg<<3));
  1425. - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1)));
  1426. + gen_call_function((void*)&FPU_FLD_32,"%Drd",DREG(EA));
  1427. + dh_fpu_mem(0xda);
  1428. }
  1429. }
  1430.  
  1431. @@ -274,37 +268,27 @@ static void dh_fpu_esc3(){
  1432. dyn_fill_ea();
  1433. switch(group){
  1434. case 0x00: /* FILD */
  1435. - gen_call_function((void*)&FPU_FLD_32,"%Ddr",DREG(EA));
  1436. - cache_addb(0xdb);
  1437. - cache_addb(0x05|(decode.modrm.reg<<3));
  1438. - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1)));
  1439. + gen_call_function((void*)&FPU_FLD_32,"%Drd",DREG(EA));
  1440. + dh_fpu_mem(0xdb);
  1441. break;
  1442. case 0x01: /* FISTTP */
  1443. LOG(LOG_FPU,LOG_WARN)("ESC 3 EA:Unhandled group %d subfunction %d",group,sub);
  1444. break;
  1445. case 0x02: /* FIST */
  1446. - cache_addb(0xdb);
  1447. - cache_addb(0x05|(decode.modrm.reg<<3));
  1448. - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1)));
  1449. - gen_call_function((void*)&FPU_FST_32,"%Ddr",DREG(EA));
  1450. + dh_fpu_mem(0xdb);
  1451. + gen_call_function((void*)&FPU_FST_32,"%Drd",DREG(EA));
  1452. break;
  1453. case 0x03: /* FISTP */
  1454. - cache_addb(0xdb);
  1455. - cache_addb(0x05|(decode.modrm.reg<<3));
  1456. - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1)));
  1457. - gen_call_function((void*)&FPU_FST_32,"%Ddr",DREG(EA));
  1458. + dh_fpu_mem(0xdb);
  1459. + gen_call_function((void*)&FPU_FST_32,"%Drd",DREG(EA));
  1460. break;
  1461. case 0x05: /* FLD 80 Bits Real */
  1462. - gen_call_function((void*)&FPU_FLD_80,"%Ddr",DREG(EA));
  1463. - cache_addb(0xdb);
  1464. - cache_addb(0x05|(decode.modrm.reg<<3));
  1465. - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1)));
  1466. + gen_call_function((void*)&FPU_FLD_80,"%Drd",DREG(EA));
  1467. + dh_fpu_mem(0xdb);
  1468. break;
  1469. case 0x07: /* FSTP 80 Bits Real */
  1470. - cache_addb(0xdb);
  1471. - cache_addb(0x05|(decode.modrm.reg<<3));
  1472. - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1)));
  1473. - gen_call_function((void*)&FPU_FST_80,"%Ddr",DREG(EA));
  1474. + dh_fpu_mem(0xdb);
  1475. + gen_call_function((void*)&FPU_FST_80,"%Drd",DREG(EA));
  1476. break;
  1477. default:
  1478. LOG(LOG_FPU,LOG_WARN)("ESC 3 EA:Unhandled group %d subfunction %d",group,sub);
  1479. @@ -321,10 +305,8 @@ static void dh_fpu_esc4(){
  1480. cache_addb(decode.modrm.val);
  1481. } else {
  1482. dyn_fill_ea();
  1483. - gen_call_function((void*)&FPU_FLD_64,"%Ddr",DREG(EA));
  1484. - cache_addb(0xdc);
  1485. - cache_addb(0x05|(decode.modrm.reg<<3));
  1486. - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1)));
  1487. + gen_call_function((void*)&FPU_FLD_64,"%Drd",DREG(EA));
  1488. + dh_fpu_mem(0xdc);
  1489. }
  1490. }
  1491.  
  1492. @@ -339,45 +321,32 @@ static void dh_fpu_esc5(){
  1493. Bitu sub=(decode.modrm.val & 7);
  1494. switch(group){
  1495. case 0x00: /* FLD double real*/
  1496. - gen_call_function((void*)&FPU_FLD_64,"%Ddr",DREG(EA));
  1497. - cache_addb(0xdd);
  1498. - cache_addb(0x05|(decode.modrm.reg<<3));
  1499. - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1)));
  1500. + gen_call_function((void*)&FPU_FLD_64,"%Drd",DREG(EA));
  1501. + dh_fpu_mem(0xdd);
  1502. break;
  1503. case 0x01: /* FISTTP longint*/
  1504. LOG(LOG_FPU,LOG_WARN)("ESC 5 EA:Unhandled group %d subfunction %d",group,sub);
  1505. break;
  1506. case 0x02: /* FST double real*/
  1507. - cache_addb(0xdd);
  1508. - cache_addb(0x05|(decode.modrm.reg<<3));
  1509. - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1)));
  1510. - gen_call_function((void*)&FPU_FST_64,"%Ddr",DREG(EA));
  1511. + dh_fpu_mem(0xdd);
  1512. + gen_call_function((void*)&FPU_FST_64,"%Drd",DREG(EA));
  1513. break;
  1514. case 0x03: /* FSTP double real*/
  1515. - cache_addb(0xdd);
  1516. - cache_addb(0x05|(decode.modrm.reg<<3));
  1517. - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1)));
  1518. - gen_call_function((void*)&FPU_FST_64,"%Ddr",DREG(EA));
  1519. + dh_fpu_mem(0xdd);
  1520. + gen_call_function((void*)&FPU_FST_64,"%Drd",DREG(EA));
  1521. break;
  1522. case 0x04: /* FRSTOR */
  1523. - gen_call_function((void*)&FPU_FRSTOR_DH,"%Ddr",DREG(EA));
  1524. - cache_addb(0xdd);
  1525. - cache_addb(0x05|(decode.modrm.reg<<3));
  1526. - cache_addd((Bit32u)(&(dyn_dh_fpu.temp_state[0])));
  1527. + gen_call_function((void*)&FPU_FRSTOR_DH,"%Drd",DREG(EA));
  1528. + dh_fpu_mem(0xdd, decode.modrm.reg, &(dyn_dh_fpu.temp_state[0]));
  1529. break;
  1530. case 0x06: /* FSAVE */
  1531. - cache_addb(0xdd);
  1532. - cache_addb(0x05|(decode.modrm.reg<<3));
  1533. - cache_addd((Bit32u)(&(dyn_dh_fpu.temp_state[0])));
  1534. - gen_call_function((void*)&FPU_FSAVE_DH,"%Ddr",DREG(EA));
  1535. - cache_addb(0xdb);
  1536. - cache_addb(0xe3);
  1537. + dh_fpu_mem(0xdd, decode.modrm.reg, &(dyn_dh_fpu.temp_state[0]));
  1538. + gen_call_function((void*)&FPU_FSAVE_DH,"%Drd",DREG(EA));
  1539. + cache_addw(0xE3DB);
  1540. break;
  1541. case 0x07: /* FNSTSW */
  1542. - cache_addb(0xdd);
  1543. - cache_addb(0x05|(decode.modrm.reg<<3));
  1544. - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1)));
  1545. - gen_call_function((void*)&FPU_FST_16,"%Ddr",DREG(EA));
  1546. + dh_fpu_mem(0xdd);
  1547. + gen_call_function((void*)&FPU_FST_16,"%Drd",DREG(EA));
  1548. break;
  1549. default:
  1550. LOG(LOG_FPU,LOG_WARN)("ESC 5 EA:Unhandled group %d subfunction %d",group,sub);
  1551. @@ -394,10 +363,8 @@ static void dh_fpu_esc6(){
  1552. cache_addb(decode.modrm.val);
  1553. } else {
  1554. dyn_fill_ea();
  1555. - gen_call_function((void*)&FPU_FLD_16,"%Ddr",DREG(EA));
  1556. - cache_addb(0xde);
  1557. - cache_addb(0x05|(decode.modrm.reg<<3));
  1558. - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1)));
  1559. + gen_call_function((void*)&FPU_FLD_16,"%Drd",DREG(EA));
  1560. + dh_fpu_mem(0xde);
  1561. }
  1562. }
  1563.  
  1564. @@ -423,9 +390,7 @@ static void dh_fpu_esc7(){
  1565. case 0x04:
  1566. switch(sub){
  1567. case 0x00: /* FNSTSW AX*/
  1568. - cache_addb(0xdd);
  1569. - cache_addb(0x05|(0x07<<3));
  1570. - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1)));
  1571. + dh_fpu_mem(0xdd, 7);
  1572. gen_load_host(&(dyn_dh_fpu.temp.m1),DREG(TMPB),4);
  1573. gen_dop_word(DOP_MOV,false,DREG(EAX),DREG(TMPB));
  1574. gen_releasereg(DREG(TMPB));
  1575. @@ -443,49 +408,35 @@ static void dh_fpu_esc7(){
  1576. dyn_fill_ea();
  1577. switch(group){
  1578. case 0x00: /* FILD Bit16s */
  1579. - gen_call_function((void*)&FPU_FLD_16,"%Ddr",DREG(EA));
  1580. - cache_addb(0xdf);
  1581. - cache_addb(0x05|(decode.modrm.reg<<3));
  1582. - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1)));
  1583. + gen_call_function((void*)&FPU_FLD_16,"%Drd",DREG(EA));
  1584. + dh_fpu_mem(0xdf);
  1585. break;
  1586. case 0x01:
  1587. LOG(LOG_FPU,LOG_WARN)("ESC 7 EA:Unhandled group %d subfunction %d",group,sub);
  1588. break;
  1589. case 0x02: /* FIST Bit16s */
  1590. - cache_addb(0xdf);
  1591. - cache_addb(0x05|(decode.modrm.reg<<3));
  1592. - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1)));
  1593. - gen_call_function((void*)&FPU_FST_16,"%Ddr",DREG(EA));
  1594. + dh_fpu_mem(0xdf);
  1595. + gen_call_function((void*)&FPU_FST_16,"%Drd",DREG(EA));
  1596. break;
  1597. case 0x03: /* FISTP Bit16s */
  1598. - cache_addb(0xdf);
  1599. - cache_addb(0x05|(decode.modrm.reg<<3));
  1600. - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1)));
  1601. - gen_call_function((void*)&FPU_FST_16,"%Ddr",DREG(EA));
  1602. + dh_fpu_mem(0xdf);
  1603. + gen_call_function((void*)&FPU_FST_16,"%Drd",DREG(EA));
  1604. break;
  1605. case 0x04: /* FBLD packed BCD */
  1606. - gen_call_function((void*)&FPU_FLD_80,"%Ddr",DREG(EA));
  1607. - cache_addb(0xdf);
  1608. - cache_addb(0x05|(decode.modrm.reg<<3));
  1609. - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1)));
  1610. + gen_call_function((void*)&FPU_FLD_80,"%Drd",DREG(EA));
  1611. + dh_fpu_mem(0xdf);
  1612. break;
  1613. case 0x05: /* FILD Bit64s */
  1614. - gen_call_function((void*)&FPU_FLD_64,"%Ddr",DREG(EA));
  1615. - cache_addb(0xdf);
  1616. - cache_addb(0x05|(decode.modrm.reg<<3));
  1617. - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1)));
  1618. + gen_call_function((void*)&FPU_FLD_64,"%Drd",DREG(EA));
  1619. + dh_fpu_mem(0xdf);
  1620. break;
  1621. case 0x06: /* FBSTP packed BCD */
  1622. - cache_addb(0xdf);
  1623. - cache_addb(0x05|(decode.modrm.reg<<3));
  1624. - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1)));
  1625. - gen_call_function((void*)&FPU_FST_80,"%Ddr",DREG(EA));
  1626. + dh_fpu_mem(0xdf);
  1627. + gen_call_function((void*)&FPU_FST_80,"%Drd",DREG(EA));
  1628. break;
  1629. case 0x07: /* FISTP Bit64s */
  1630. - cache_addb(0xdf);
  1631. - cache_addb(0x05|(decode.modrm.reg<<3));
  1632. - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1)));
  1633. - gen_call_function((void*)&FPU_FST_64,"%Ddr",DREG(EA));
  1634. + dh_fpu_mem(0xdf);
  1635. + gen_call_function((void*)&FPU_FST_64,"%Drd",DREG(EA));
  1636. break;
  1637. default:
  1638. LOG(LOG_FPU,LOG_WARN)("ESC 7 EA:Unhandled group %d subfunction %d",group,sub);
  1639. diff --git a/src/cpu/core_dyn_x86/risc_x64.h b/src/cpu/core_dyn_x86/risc_x64.h
  1640. new file mode 100644
  1641. index 00000000..477cf2ef
  1642. --- /dev/null
  1643. +++ b/src/cpu/core_dyn_x86/risc_x64.h
  1644. @@ -0,0 +1,1199 @@
  1645. +/*
  1646. + * Copyright (C) 2002-2019 The DOSBox Team
  1647. + *
  1648. + * This program is free software; you can redistribute it and/or modify
  1649. + * it under the terms of the GNU General Public License as published by
  1650. + * the Free Software Foundation; either version 2 of the License, or
  1651. + * (at your option) any later version.
  1652. + *
  1653. + * This program is distributed in the hope that it will be useful,
  1654. + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  1655. + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  1656. + * GNU General Public License for more details.
  1657. + *
  1658. + * You should have received a copy of the GNU General Public License along
  1659. + * with this program; if not, write to the Free Software Foundation, Inc.,
  1660. + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  1661. + */
  1662. +
  1663. +#if defined(_WIN64)
  1664. +enum {
  1665. + X64_REG_RAX=0,
  1666. + X64_REG_RBX,
  1667. + X64_REG_RCX,
  1668. + X64_REG_RDX,
  1669. + // volatiles
  1670. + X64_REG_R8,
  1671. + X64_REG_R9,
  1672. + X64_REG_R10,
  1673. + X64_REG_R11,
  1674. + // non-volatiles
  1675. + X64_REG_R12,
  1676. + X64_REG_R13,
  1677. + X64_REG_R14,
  1678. + X64_REG_R15,
  1679. + X64_REG_RSI,
  1680. + X64_REG_RDI,
  1681. + X64_REGS
  1682. +};
  1683. +static const int reg_args[4] = {X64_REG_RCX, X64_REG_RDX, X64_REG_R8, X64_REG_R9};
  1684. +#define ARG0_REG 1
  1685. +#define ARG1_REG 2
  1686. +#else
  1687. +enum {
  1688. + // (high)byte-accessible
  1689. + X64_REG_RAX=0,
  1690. + X64_REG_RBX,
  1691. + X64_REG_RCX,
  1692. + X64_REG_RDX,
  1693. + // volatiles
  1694. + X64_REG_RSI,
  1695. + X64_REG_RDI,
  1696. + X64_REG_R8,
  1697. + X64_REG_R9,
  1698. + X64_REG_R10,
  1699. + X64_REG_R11,
  1700. + // non-volatiles
  1701. + X64_REG_R12,
  1702. + X64_REG_R13,
  1703. + X64_REG_R14,
  1704. + X64_REG_R15,
  1705. + // delimiter
  1706. + X64_REGS
  1707. +};
  1708. +static const int reg_args[4] = {X64_REG_RDI, X64_REG_RSI, X64_REG_RDX, X64_REG_RCX};
  1709. +#define ARG0_REG 7
  1710. +#define ARG1_REG 6
  1711. +#endif
  1712. +
  1713. +static struct {
  1714. + bool flagsactive;
  1715. + Bitu last_used;
  1716. + GenReg * regs[X64_REGS];
  1717. +} x64gen;
  1718. +
  1719. +class opcode {
  1720. +public:
  1721. + opcode(void) : is_word(false), imm_size(0), rex(0) {}
  1722. + opcode(int reg,bool dword=true,Bit8u acc=1) : is_word(!dword), imm_size(0), rex(0) {
  1723. + setreg(reg, acc);
  1724. + }
  1725. +
  1726. + opcode& setword() {is_word=true; return *this;}
  1727. + opcode& set64(void) {rex|=0x48;return *this;}
  1728. + opcode& setimm(Bit64u _imm, int size) {imm=_imm;imm_size=size;return *this;}
  1729. +
  1730. + opcode& setreg(int r, Bit8u acc=1); // acc: 0=low byte, 1=word/dword, 4=high byte
  1731. + opcode& setrm(int r, Bit8u acc=1);
  1732. + opcode& setabsaddr(void* addr);
  1733. + opcode& setea(int rbase, int rscale=-1, int scale=0, Bit32s off=0);
  1734. +
  1735. + void Emit8Reg(Bit8u op);
  1736. + void Emit8(Bit8u op);
  1737. + void Emit16(Bit16u op);
  1738. +
  1739. +private:
  1740. + bool is_word;
  1741. + int reg;
  1742. + Bit64u imm;
  1743. + int imm_size;
  1744. +
  1745. + Bit8u rex, modrm, sib;
  1746. + Bits offset;
  1747. +
  1748. + void EmitImm(void) {
  1749. + switch(imm_size) {
  1750. + case 1: cache_addb((Bit8u)imm);break;
  1751. + case 2: cache_addw((Bit16u)imm);break;
  1752. + case 4: cache_addd((Bit32u)imm);break;
  1753. + case 8: cache_addq(imm);break;
  1754. + }
  1755. + }
  1756. +
  1757. + void EmitSibOffImm(void) {
  1758. + if (modrm<0xC0) {
  1759. + if ((modrm&7)==4) cache_addb(sib);
  1760. + switch (modrm>>6) {
  1761. + case 0:
  1762. + if ((modrm&7)==5) {
  1763. + // update offset to be RIP relative
  1764. + offset -= (Bits)cache.pos + 4 + imm_size;
  1765. + if ((Bit32s)offset != offset) IllegalOption("opcode::Emit: bad RIP address");
  1766. + } else if ((modrm&7)!=4 || (sib&7)!=5)
  1767. + break;
  1768. + case 2: cache_addd((Bit32u)offset); break;
  1769. + case 1: cache_addb((Bit8u)offset); break;
  1770. + }
  1771. + }
  1772. + EmitImm();
  1773. + }
  1774. +};
  1775. +
  1776. +void opcode::Emit8Reg(Bit8u op) {
  1777. + if (is_word) cache_addb(0x66);
  1778. + if (reg>=8) rex |= 0x41;
  1779. + if (rex) cache_addb(rex);
  1780. + cache_addb(op|(reg&7));
  1781. + EmitImm();
  1782. +}
  1783. +
  1784. +void opcode::Emit8(Bit8u op) {
  1785. + if (is_word) cache_addb(0x66);
  1786. + if (rex) cache_addb(rex);
  1787. + cache_addw(op+(modrm<<8));
  1788. + EmitSibOffImm();
  1789. +}
  1790. +
  1791. +void opcode::Emit16(Bit16u op) {
  1792. + if (is_word) cache_addb(0x66);
  1793. + if (rex) cache_addb(rex);
  1794. + cache_addw(op);
  1795. + cache_addb(modrm);
  1796. + EmitSibOffImm();
  1797. +}
  1798. +
  1799. +opcode& opcode::setreg(int r, Bit8u acc) {
  1800. + if (acc==4) {
  1801. + if (r>3 || rex) IllegalOption("opcode::setreg: cannot encode high byte");
  1802. + r += 4;
  1803. + }
  1804. + else if (acc==0 && r>3) rex |= 0x40;
  1805. + reg = r;
  1806. + return *this;
  1807. +}
  1808. +
  1809. +opcode& opcode::setrm(int r, Bit8u acc) {
  1810. + if (reg>=8) rex |= 0x44;
  1811. + if (r>=8) rex |= 0x41;
  1812. + if (acc==4) {
  1813. + if (r>3 || rex) IllegalOption("opcode::setrm: cannot encode high byte");
  1814. + r += 4;
  1815. + }
  1816. + else if (acc==0 && r>3) rex |= 0x40;
  1817. + modrm = 0xC0+((reg&7)<<3)+(r&7);
  1818. + return *this;
  1819. +}
  1820. +
  1821. +opcode& opcode::setabsaddr(void* addr) {
  1822. + /* address must be in one of three ranges:
  1823. + * < 0x80000000 or >= 0xFFFFFFFF80000000 (signed 32-bit absolute) enc: modrm+sib+4 bytes
  1824. + * cache.pos +/- 2GB (RIP relative) enc: modrm+4 bytes
  1825. + * &cpu_regs +/- 2GB (RBP relative) enc: modrm+1 or 4 bytes
  1826. + */
  1827. + if (reg>=8) rex |= 0x44;
  1828. + modrm = (reg&7)<<3;
  1829. + offset = (Bits)addr - (Bits)&cpu_regs;
  1830. + if ((Bit32s)offset == offset) { // [RBP+(Bit8s/Bit32s)]
  1831. + if ((Bit8s)offset == offset) modrm += 0x45;
  1832. + else modrm += 0x85;
  1833. + } else {
  1834. + offset = (Bits)addr;
  1835. + if ((Bit32s)offset == offset) { // [abs Bit32s]
  1836. + modrm += 4;
  1837. + sib = 0x25;
  1838. + } else modrm += 5; // [RIP+Bit32s]
  1839. + }
  1840. +
  1841. + return *this;
  1842. +}
  1843. +
  1844. +opcode& opcode::setea(int rbase, int rscale, int scale, Bit32s off) {
  1845. + if (reg>=8) rex |= 0x44;
  1846. + if (rbase>=8) rex |= 0x41, rbase &= 7;
  1847. + if (rscale>=8) rex |= 0x42, rscale &= 7;
  1848. + modrm = (reg&7)<<3;
  1849. + offset = off;
  1850. +
  1851. + if (rbase<0 || rscale>=0 || rbase==4) { // sib required
  1852. + modrm += 4;
  1853. + if (rscale>=0) sib = (scale<<6)+(rscale<<3);
  1854. + else sib = 4<<3;
  1855. + if (rbase>=0) sib += rbase;
  1856. + else sib += 5;
  1857. + } else modrm += rbase;
  1858. +
  1859. + if (rbase==5 || (off && rbase>=0)) {
  1860. + if ((Bit8s)off == off) modrm += 0x40;
  1861. + else modrm += 0x80;
  1862. + }
  1863. +
  1864. + return *this;
  1865. +}
  1866. +
  1867. +
  1868. +class GenReg {
  1869. +public:
  1870. + GenReg(Bit8u _index) : index(_index) {
  1871. + notusable=false;dynreg=0;
  1872. + }
  1873. + DynReg * dynreg;
  1874. + Bitu last_used; //Keeps track of last assigned regs
  1875. + const Bit8u index;
  1876. + bool notusable;
  1877. + void Load(DynReg * _dynreg,bool stale=false) {
  1878. + if (!_dynreg) return;
  1879. + if (GCC_UNLIKELY((Bitu)dynreg)) Clear();
  1880. + dynreg=_dynreg;
  1881. + last_used=x64gen.last_used;
  1882. + dynreg->flags&=~DYNFLG_CHANGED;
  1883. + dynreg->genreg=this;
  1884. + if ((!stale) && (dynreg->flags & (DYNFLG_LOAD|DYNFLG_ACTIVE))) {
  1885. + opcode(index).setabsaddr(dynreg->data).Emit8(0x8B); // mov r32, []
  1886. + }
  1887. + dynreg->flags|=DYNFLG_ACTIVE;
  1888. + }
  1889. + void Save(void) {
  1890. + if (GCC_UNLIKELY(!((Bitu)dynreg))) IllegalOption("GenReg->Save");
  1891. + dynreg->flags&=~DYNFLG_CHANGED;
  1892. + opcode(index).setabsaddr(dynreg->data).Emit8(0x89); // mov [], r32
  1893. + }
  1894. + void Release(void) {
  1895. + if (GCC_UNLIKELY(!((Bitu)dynreg))) return;
  1896. + if (dynreg->flags&DYNFLG_CHANGED && dynreg->flags&DYNFLG_SAVE) {
  1897. + Save();
  1898. + }
  1899. + dynreg->flags&=~(DYNFLG_CHANGED|DYNFLG_ACTIVE);
  1900. + dynreg->genreg=0;dynreg=0;
  1901. + }
  1902. + void Clear(void) {
  1903. + if (!dynreg) return;
  1904. + if (dynreg->flags&DYNFLG_CHANGED) {
  1905. + Save();
  1906. + }
  1907. + dynreg->genreg=0;dynreg=0;
  1908. + }
  1909. +};
  1910. +
  1911. +static BlockReturn gen_runcodeInit(Bit8u *code);
  1912. +static BlockReturn (*gen_runcode)(Bit8u *code) = gen_runcodeInit;
  1913. +
  1914. +static BlockReturn gen_runcodeInit(Bit8u *code) {
  1915. + Bit8u* oldpos = cache.pos;
  1916. + cache.pos = &cache_code_link_blocks[128];
  1917. + gen_runcode = (BlockReturn(*)(Bit8u*))cache.pos;
  1918. +
  1919. + cache_addb(0xA1);cache_addq((Bitu)&reg_flags); // mov eax, [reg_flags]
  1920. + cache_addb(0x55); // push rbp
  1921. + cache_addw(0x5741); // push r15
  1922. + cache_addw(0x5641); // push r14
  1923. + cache_addb(0x25);cache_addd(FMASK_TEST); // and eax, imm32
  1924. + cache_addw(0x5541); // push r13
  1925. + cache_addw(0x5441); // push r12
  1926. + cache_addb(0x53); // push rbx
  1927. +#if defined(_WIN64)
  1928. + cache_addb(0x57); // push rdi
  1929. + cache_addb(0x56); // push rsi
  1930. +#endif
  1931. + cache_addb(0x49);cache_addw(0xE789); // mov r15, rsp
  1932. + cache_addw(0xBD48);cache_addq((Bitu)&cpu_regs); // mov rbp, &cpu_regs
  1933. + cache_addd(0xF0E48348); // and rsp, ~15
  1934. + cache_addd(0x08ec8348); // sub rsp, 8
  1935. + cache_addw(0x5741); // push r15
  1936. +
  1937. + cache_addb(0xe8); // call jmp_block
  1938. + Bit8u *diff = cache.pos;
  1939. + cache_addd(0);
  1940. + // eax = return value, ecx = flags
  1941. + cache_addw(0xC289); // mov edx, eax
  1942. + cache_addb(0xA1);cache_addq((Bitu)&reg_flags); // mov eax, [reg_flags]
  1943. + cache_addw(0xE181);cache_addd(FMASK_TEST); // and ecx, FMASK_TEST
  1944. + cache_addb(0x25);cache_addd(~FMASK_TEST); // and eax, ~FMASK_TEST
  1945. + cache_addw(0xC809); // or eax, ecx
  1946. + cache_addb(0xA3);cache_addq((Bitu)&reg_flags); // mov [reg_flags], eax
  1947. + cache_addw(0xD089); // mov eax, edx
  1948. + cache_addb(0x5c); // pop rsp
  1949. +#if defined(_WIN64)
  1950. + cache_addb(0x5e); // pop rsi
  1951. + cache_addb(0x5f); // pop rdi
  1952. +#endif
  1953. + cache_addb(0x5b); // pop rbx
  1954. + cache_addw(0x5c41); // pop r12
  1955. + cache_addw(0x5d41); // pop r13
  1956. + cache_addw(0x5e41); // pop r14
  1957. + cache_addw(0x5f41); // pop r15
  1958. + cache_addb(0x5d); // pop rbp
  1959. + cache_addb(0xc3); // ret
  1960. + *(Bit32u*)(diff) = (Bit32u)(cache.pos - diff - 4);
  1961. + // jmp_block:
  1962. + cache_addb(0x50); // push rax
  1963. + cache_addb(0xff); // jmp ARG1
  1964. + cache_addb(0xE0+ARG0_REG);
  1965. +
  1966. + cache.pos = oldpos;
  1967. + return gen_runcode(code);
  1968. +}
  1969. +
  1970. +static GenReg * FindDynReg(DynReg * dynreg,bool stale=false) {
  1971. + x64gen.last_used++;
  1972. + if (dynreg->genreg) {
  1973. + dynreg->genreg->last_used=x64gen.last_used;
  1974. + return dynreg->genreg;
  1975. + }
  1976. + /* Find best match for selected global reg */
  1977. + Bits i;
  1978. + Bits first_used,first_index;
  1979. + first_used=-1;
  1980. + if (dynreg->flags & DYNFLG_HAS8) {
  1981. + /* Has to be rax,rbx,rcx,rdx */
  1982. + for (i=first_index=0;i<=3;i++) {
  1983. + GenReg * genreg=x64gen.regs[i];
  1984. + if (genreg->notusable) continue;
  1985. + if (!(genreg->dynreg)) {
  1986. + genreg->Load(dynreg,stale);
  1987. + return genreg;
  1988. + }
  1989. + if (genreg->last_used<(Bitu)first_used) {
  1990. + first_used=genreg->last_used;
  1991. + first_index=i;
  1992. + }
  1993. + }
  1994. + } else {
  1995. + for (i=first_index=X64_REGS-1;i>=0;i--) {
  1996. + GenReg * genreg=x64gen.regs[i];
  1997. + if (genreg->notusable) continue;
  1998. + if (!(genreg->dynreg)) {
  1999. + genreg->Load(dynreg,stale);
  2000. + return genreg;
  2001. + }
  2002. + if (genreg->last_used<(Bitu)first_used) {
  2003. + first_used=genreg->last_used;
  2004. + first_index=i;
  2005. + }
  2006. + }
  2007. + }
  2008. + /* No free register found use earliest assigned one */
  2009. + GenReg * newreg=x64gen.regs[first_index];
  2010. + newreg->Load(dynreg,stale);
  2011. + return newreg;
  2012. +}
  2013. +
  2014. +static void ForceDynReg(GenReg * genreg,DynReg * dynreg) {
  2015. + genreg->last_used = ++x64gen.last_used;
  2016. + if (dynreg->genreg) {
  2017. + if (dynreg->genreg==genreg) return;
  2018. + if (genreg->dynreg) genreg->Clear();
  2019. + // mov dst32, src32
  2020. + opcode(genreg->index).setrm(dynreg->genreg->index).Emit8(0x8B);
  2021. + dynreg->genreg->dynreg=0;
  2022. + dynreg->genreg=genreg;
  2023. + genreg->dynreg=dynreg;
  2024. + } else genreg->Load(dynreg);
  2025. +}
  2026. +
  2027. +static void gen_preloadreg(DynReg * dynreg) {
  2028. + FindDynReg(dynreg);
  2029. +}
  2030. +
  2031. +static void gen_releasereg(DynReg * dynreg) {
  2032. + GenReg * genreg=dynreg->genreg;
  2033. + if (genreg) genreg->Release();
  2034. + else dynreg->flags&=~(DYNFLG_ACTIVE|DYNFLG_CHANGED);
  2035. +}
  2036. +
  2037. +static void gen_setupreg(DynReg * dnew,DynReg * dsetup) {
  2038. + dnew->flags=dsetup->flags;
  2039. + if (dnew->genreg==dsetup->genreg) return;
  2040. + /* Not the same genreg must be wrong */
  2041. + if (dnew->genreg) {
  2042. + /* Check if the genreg i'm changing is actually linked to me */
  2043. + if (dnew->genreg->dynreg==dnew) dnew->genreg->dynreg=0;
  2044. + }
  2045. + dnew->genreg=dsetup->genreg;
  2046. + if (dnew->genreg) dnew->genreg->dynreg=dnew;
  2047. +}
  2048. +
  2049. +static void gen_synchreg(DynReg * dnew,DynReg * dsynch) {
  2050. + /* First make sure the registers match */
  2051. + if (dnew->genreg!=dsynch->genreg) {
  2052. + if (dnew->genreg) dnew->genreg->Clear();
  2053. + if (dsynch->genreg) {
  2054. + dsynch->genreg->Load(dnew);
  2055. + }
  2056. + }
  2057. + /* Always use the loadonce flag from either state */
  2058. + dnew->flags|=(dsynch->flags & dnew->flags&DYNFLG_ACTIVE);
  2059. + if ((dnew->flags ^ dsynch->flags) & DYNFLG_CHANGED) {
  2060. + /* Ensure the changed value gets saved */
  2061. + if (dnew->flags & DYNFLG_CHANGED) {
  2062. + dnew->genreg->Save();
  2063. + } else dnew->flags|=DYNFLG_CHANGED;
  2064. + }
  2065. +}
  2066. +
  2067. +static void gen_needflags(void) {
  2068. + if (!x64gen.flagsactive) {
  2069. + x64gen.flagsactive=true;
  2070. + cache_addb(0x9d); //POPFQ
  2071. + }
  2072. +}
  2073. +
  2074. +static void gen_protectflags(void) {
  2075. + if (x64gen.flagsactive) {
  2076. + x64gen.flagsactive=false;
  2077. + cache_addb(0x9c); //PUSHFQ
  2078. + }
  2079. +}
  2080. +
  2081. +static void gen_discardflags(void) {
  2082. + if (!x64gen.flagsactive) {
  2083. + x64gen.flagsactive=true;
  2084. + cache_addd(0x08C48348); //ADD RSP,8
  2085. + }
  2086. +}
  2087. +
  2088. +static void gen_needcarry(void) {
  2089. + gen_needflags();
  2090. +}
  2091. +
  2092. +static void gen_setzeroflag(void) {
  2093. + if (x64gen.flagsactive) IllegalOption("gen_setzeroflag");
  2094. + cache_addd(0x40240C83); //OR DWORD [RSP],0x40
  2095. +}
  2096. +
  2097. +static void gen_clearzeroflag(void) {
  2098. + if (x64gen.flagsactive) IllegalOption("gen_clearzeroflag");
  2099. + cache_addd(0xBF242483); //AND DWORD [RSP],~0x40
  2100. +}
  2101. +
  2102. +static bool skip_flags=false;
  2103. +
  2104. +static void set_skipflags(bool state) {
  2105. + if (!state) gen_discardflags();
  2106. + skip_flags=state;
  2107. +}
  2108. +
  2109. +static void gen_reinit(void) {
  2110. + x64gen.last_used=0;
  2111. + x64gen.flagsactive=false;
  2112. + for (Bitu i=0;i<X64_REGS;i++) {
  2113. + x64gen.regs[i]->dynreg=0;
  2114. + }
  2115. +}
  2116. +
  2117. +static void gen_load_host(void * data,DynReg * dr1,Bitu size) {
  2118. + opcode op = opcode(FindDynReg(dr1,true)->index).setabsaddr(data);
  2119. + switch (size) {
  2120. + case 1: // movzx r32, byte[]
  2121. + op.Emit16(0xB60F);
  2122. + break;
  2123. + case 2: // movzx r32, word[]
  2124. + op.Emit16(0xB70F);
  2125. + break;
  2126. + case 4: // mov r32, []
  2127. + op.Emit8(0x8B);
  2128. + break;
  2129. + default:
  2130. + IllegalOption("gen_load_host");
  2131. + }
  2132. + dr1->flags|=DYNFLG_CHANGED;
  2133. +}
  2134. +
  2135. +static void gen_mov_host(void * data,DynReg * dr1,Bitu size,Bit8u di1=0) {
  2136. + int idx = FindDynReg(dr1,size==4)->index;
  2137. + opcode op;
  2138. + Bit8u tmp;
  2139. + switch (size) {
  2140. + case 1:
  2141. + op.setreg(idx,di1);
  2142. + tmp = 0x8A; // mov r8, []
  2143. + break;
  2144. + case 2: op.setword(); // mov r16, []
  2145. + case 4: op.setreg(idx);
  2146. + tmp = 0x8B; // mov r32, []
  2147. + break;
  2148. + default:
  2149. + IllegalOption("gen_mov_host");
  2150. + }
  2151. + op.setabsaddr(data).Emit8(tmp);
  2152. + dr1->flags|=DYNFLG_CHANGED;
  2153. +}
  2154. +
  2155. +static void gen_load_arg_reg(int argno,DynReg *dr,const char *s) {
  2156. + bool release;
  2157. + GenReg *gen = x64gen.regs[reg_args[argno]];
  2158. + GenReg *src = dr->genreg;
  2159. + opcode op(gen->index);
  2160. +
  2161. + if (*s=='r') {
  2162. + s++;
  2163. + release = true;
  2164. + } else release = false;
  2165. +
  2166. + gen->Clear();
  2167. +
  2168. + switch (*s) {
  2169. + case 'h':
  2170. + if (src) {
  2171. + if (src->index>3 || gen->index>3) {
  2172. + // shld r32,r32,24
  2173. + opcode(src->index).setimm(24,1).setrm(gen->index).Emit16(0xA40F);
  2174. + op.setrm(gen->index,0);
  2175. + } else op.setrm(src->index,4);
  2176. + } else op.setabsaddr(((Bit8u*)dr->data)+1);
  2177. + op.Emit16(0xB60F); // movzx r32, r/m8
  2178. + break;
  2179. + case 'l':
  2180. + if (src) op.setrm(src->index,0);
  2181. + else op.setabsaddr(dr->data);
  2182. + op.Emit16(0xB60F); // movzx r32, r/m8
  2183. + break;
  2184. + case 'w':
  2185. + if (src) op.setrm(src->index);
  2186. + else op.setabsaddr(dr->data);
  2187. + op.Emit16(0xB70F); // movzx r32, r/m16
  2188. + break;
  2189. + case 'd':
  2190. + if (src) {
  2191. + if (src != gen) op.setrm(src->index).Emit8(0x8B);
  2192. + } else op.setabsaddr(dr->data).Emit8(0x8B);
  2193. + break;
  2194. + default:
  2195. + IllegalOption("gen_call_function param:DREG");
  2196. + }
  2197. + if (release) gen_releasereg(dr);
  2198. +}
  2199. +static void gen_load_imm(int index,Bitu imm) {
  2200. + if (imm==0)
  2201. + opcode(index).setrm(index).Emit8(0x33); // xor r32,r32
  2202. + else if ((Bit32u)imm==imm)
  2203. + opcode(index).setimm(imm,4).Emit8Reg(0xB8); // MOV r32, imm32
  2204. + else if ((Bit32s)imm==imm)
  2205. + opcode(0).set64().setimm(imm,4).setrm(index).Emit8(0xC7); // mov r64, simm32
  2206. + else
  2207. + opcode(index).set64().setabsaddr((void*)imm).Emit8(0x8D); // lea r64, [imm]
  2208. +}
  2209. +
  2210. +static void gen_dop_byte(DualOps op,DynReg * dr1,Bit8u di1,DynReg * dr2,Bit8u di2) {
  2211. + Bit8u tmp;
  2212. + opcode i(FindDynReg(dr1)->index,true,di1);
  2213. + i.setrm(FindDynReg(dr2)->index,di2);
  2214. +
  2215. + switch (op) {
  2216. + case DOP_ADD: tmp=0x02; break;
  2217. + case DOP_ADC: tmp=0x12; break;
  2218. + case DOP_SUB: tmp=0x2a; break;
  2219. + case DOP_SBB: tmp=0x1a; break;
  2220. + case DOP_CMP: tmp=0x3a; goto nochange;
  2221. + case DOP_XOR: tmp=0x32; break;
  2222. + case DOP_AND: tmp=0x22; if ((dr1==dr2) && (di1==di2)) goto nochange; break;
  2223. + case DOP_OR: tmp=0x0a; if ((dr1==dr2) && (di1==di2)) goto nochange; break;
  2224. + case DOP_TEST: tmp=0x84; goto nochange;
  2225. + case DOP_MOV: if ((dr1==dr2) && (di1==di2)) return; tmp=0x8a; break;
  2226. + case DOP_XCHG: tmp=0x86; dr2->flags|=DYNFLG_CHANGED; break;
  2227. + default:
  2228. + IllegalOption("gen_dop_byte");
  2229. + }
  2230. + dr1->flags|=DYNFLG_CHANGED;
  2231. +nochange:
  2232. + i.Emit8(tmp);
  2233. +}
  2234. +
  2235. +static void gen_dop_byte_imm(DualOps op,DynReg * dr1,Bit8u di1,Bitu imm) {
  2236. + Bit8u tmp=0x80;
  2237. + int dst = FindDynReg(dr1)->index;
  2238. + opcode i;
  2239. + i.setimm(imm,1);
  2240. + imm &= 0xff;
  2241. +
  2242. + switch (op) {
  2243. + case DOP_ADD: i.setreg(0); if (!imm) goto nochange; break;
  2244. + case DOP_ADC: i.setreg(2); break;
  2245. + case DOP_SUB: i.setreg(5); if (!imm) goto nochange; break;
  2246. + case DOP_SBB: i.setreg(3); break;
  2247. + case DOP_CMP: i.setreg(7); goto nochange; //Doesn't change
  2248. + case DOP_XOR: i.setreg(6); if (!imm) goto nochange; break;
  2249. + case DOP_AND: i.setreg(4); if (imm==255) goto nochange; break;
  2250. + case DOP_OR: i.setreg(1); if (!imm) goto nochange; break;
  2251. + case DOP_TEST: i.setreg(0);tmp=0xF6;goto nochange;
  2252. + case DOP_MOV: i.setreg(dst,di1).Emit8Reg(0xB0);
  2253. + dr1->flags|=DYNFLG_CHANGED;
  2254. + return;
  2255. + default:
  2256. + IllegalOption("gen_dop_byte_imm");
  2257. + }
  2258. + dr1->flags|=DYNFLG_CHANGED;
  2259. +nochange:
  2260. + i.setrm(dst,di1).Emit8(tmp);
  2261. +}
  2262. +
  2263. +static void gen_dop_byte_imm_mem(DualOps op,DynReg * dr1,Bit8u di1,void* data) {
  2264. + opcode i = opcode(FindDynReg(dr1)->index,true,di1).setabsaddr(data);
  2265. + Bit8u tmp;
  2266. + switch (op) {
  2267. + case DOP_ADD: tmp=0x02; break;
  2268. + case DOP_ADC: tmp=0x12; break;
  2269. + case DOP_SUB: tmp=0x2a; break;
  2270. + case DOP_SBB: tmp=0x1a; break;
  2271. + case DOP_CMP: tmp=0x3a; goto nochange; //Doesn't change
  2272. + case DOP_XOR: tmp=0x32; break;
  2273. + case DOP_AND: tmp=0x22; break;
  2274. + case DOP_OR: tmp=0x0a; break;
  2275. + case DOP_TEST: tmp=0x84; goto nochange; //Doesn't change
  2276. + case DOP_MOV: tmp=0x85; break;
  2277. + default:
  2278. + IllegalOption("gen_dop_byte_imm_mem");
  2279. + }
  2280. + dr1->flags|=DYNFLG_CHANGED;
  2281. +nochange:
  2282. + i.Emit8(tmp);
  2283. +}
  2284. +
  2285. +static void gen_sop_byte(SingleOps op,DynReg * dr1,Bit8u di1) {
  2286. + Bit8u tmp;
  2287. + int dst = FindDynReg(dr1)->index;
  2288. + opcode i;
  2289. +
  2290. + switch (op) {
  2291. + case SOP_INC: i.setreg(0);tmp=0xFE; break;
  2292. + case SOP_DEC: i.setreg(1);tmp=0xFE; break;
  2293. + case SOP_NOT: i.setreg(2);tmp=0xF6; break;
  2294. + case SOP_NEG: i.setreg(3);tmp=0xF6; break;
  2295. + default:
  2296. + IllegalOption("gen_sop_byte");
  2297. + }
  2298. + i.setrm(dst,di1).Emit8(tmp);
  2299. + dr1->flags|=DYNFLG_CHANGED;
  2300. +}
  2301. +
  2302. +static void gen_extend_word(bool sign,DynReg * ddr,DynReg * dsr) {
  2303. + int src = FindDynReg(dsr)->index;
  2304. + opcode(FindDynReg(ddr,true)->index).setrm(src).Emit16(sign ? 0xBF0F:0xB70F);
  2305. + ddr->flags|=DYNFLG_CHANGED;
  2306. +}
  2307. +
  2308. +static void gen_extend_byte(bool sign,bool dword,DynReg * ddr,DynReg * dsr,Bit8u dsi) {
  2309. + int src = FindDynReg(dsr)->index;
  2310. + int dst = FindDynReg(ddr,dword)->index;
  2311. + if (dsi && (src>3 || dst>=8)) { // high-byte + REX = extra work required
  2312. + // high-byte + REX prefix = extra work required:
  2313. + // move source high-byte to dest low-byte then extend dest
  2314. + gen_protectflags(); // shld changes flags, movzx/movsx does not
  2315. +
  2316. + // shld r32, r32, 24
  2317. + opcode(src).setimm(24,1).setrm(dst).Emit16(0xA40F);
  2318. + src = dst;
  2319. + dsi = 0;
  2320. + }
  2321. + opcode(dst,dword).setrm(src,dsi).Emit16(sign ? 0xBE0F:0xB60F);
  2322. + ddr->flags|=DYNFLG_CHANGED;
  2323. +}
  2324. +
  2325. +static void gen_lea(DynReg * ddr,DynReg * dsr1,DynReg * dsr2,Bitu scale,Bit32s imm) {
  2326. + GenReg * gdr=FindDynReg(ddr,ddr!=dsr1 && ddr!=dsr2);
  2327. +
  2328. + if (!dsr1 && dsr2 && scale<2) {
  2329. + // change [2*reg] to [reg+reg]
  2330. + // or [0+1*reg] to [reg+0*reg]
  2331. + // (SIB with no base requires 32-bit offset)
  2332. + dsr1 = dsr2;
  2333. + if (!scale) dsr2 = NULL;
  2334. + else scale = 0;
  2335. + }
  2336. +
  2337. + int idx1 = dsr1 ? FindDynReg(dsr1)->index : -1;
  2338. + int idx2 = dsr2 ? FindDynReg(dsr2)->index : -1;
  2339. +
  2340. + if (!scale && !imm && dsr2 && idx1==13 && idx2!=13) {
  2341. + // avoid emitting displacement for r13 base (swap base/index)
  2342. + int s = idx1;
  2343. + idx1 = idx2;
  2344. + idx2 = s;
  2345. + }
  2346. +
  2347. + opcode(gdr->index).setea(idx1, idx2, scale, imm).Emit8(0x8D);
  2348. + ddr->flags|=DYNFLG_CHANGED;
  2349. +}
  2350. +
  2351. +static void gen_lea_imm_mem(DynReg * ddr,DynReg * dsr,void* data) {
  2352. + gen_load_host(data, ddr, 4);
  2353. + gen_lea(ddr, ddr, dsr, 0, 0);
  2354. +}
  2355. +
  2356. +static void gen_dop_word(DualOps op,bool dword,DynReg * dr1,DynReg * dr2) {
  2357. + Bit8u tmp;
  2358. + GenReg *gr2 = FindDynReg(dr2);
  2359. + GenReg *gr1 = FindDynReg(dr1,dword && op==DOP_MOV);
  2360. +
  2361. + switch (op) {
  2362. + case DOP_ADD: tmp=0x03; break;
  2363. + case DOP_ADC: tmp=0x13; break;
  2364. + case DOP_SUB: tmp=0x2b; break;
  2365. + case DOP_SBB: tmp=0x1b; break;
  2366. + case DOP_CMP: tmp=0x3b; goto nochange;
  2367. + case DOP_XOR: tmp=0x33; break;
  2368. + case DOP_AND: tmp=0x23; if (dr1==dr2) goto nochange; break;
  2369. + case DOP_OR: tmp=0x0b; if (dr1==dr2) goto nochange; break;
  2370. + case DOP_TEST: tmp=0x85; goto nochange;
  2371. + case DOP_MOV: if (dr1==dr2) return; tmp=0x8b; break;
  2372. + case DOP_XCHG:
  2373. + dr2->flags|=DYNFLG_CHANGED;
  2374. + if (dword && !((dr1->flags&DYNFLG_HAS8) ^ (dr2->flags&DYNFLG_HAS8))) {
  2375. + dr1->genreg=gr2;gr2->dynreg=dr1;
  2376. + dr2->genreg=gr1;gr1->dynreg=dr2;
  2377. + dr1->flags|=DYNFLG_CHANGED;
  2378. + return;
  2379. + }
  2380. + tmp=0x87;
  2381. + break;
  2382. + default:
  2383. + IllegalOption("gen_dop_word");
  2384. + }
  2385. + dr1->flags|=DYNFLG_CHANGED;
  2386. +nochange:
  2387. + opcode(gr1->index,dword).setrm(gr2->index).Emit8(tmp);
  2388. +}
  2389. +
  2390. +static void gen_dop_word_imm(DualOps op,bool dword,DynReg * dr1,Bits imm) {
  2391. + Bit8u tmp=0x81;
  2392. + int dst = FindDynReg(dr1,dword && op==DOP_MOV)->index;
  2393. + opcode i;
  2394. + if (!dword) i.setword();
  2395. + if (op <= DOP_OR && (Bit8s)imm==imm) {
  2396. + i.setimm(imm, 1);
  2397. + tmp = 0x83;
  2398. + } else i.setimm(imm, dword?4:2);
  2399. +
  2400. + switch (op) {
  2401. + case DOP_ADD: i.setreg(0); if (!imm) goto nochange; break;
  2402. + case DOP_ADC: i.setreg(2); break;
  2403. + case DOP_SUB: i.setreg(5); if (!imm) goto nochange; break;
  2404. + case DOP_SBB: i.setreg(3); break;
  2405. + case DOP_CMP: i.setreg(7); goto nochange; //Doesn't change
  2406. + case DOP_XOR: i.setreg(6); if (!imm) goto nochange; break;
  2407. + case DOP_AND: i.setreg(4); if (imm==-1) goto nochange; break;
  2408. + case DOP_OR: i.setreg(1); if (!imm) goto nochange; break;
  2409. + case DOP_TEST: i.setreg(0);tmp=0xF7; goto nochange; //Doesn't change
  2410. + case DOP_MOV: i.setreg(dst).Emit8Reg(0xB8); dr1->flags|=DYNFLG_CHANGED; return;
  2411. + default:
  2412. + IllegalOption("gen_dop_word_imm");
  2413. + }
  2414. + dr1->flags|=DYNFLG_CHANGED;
  2415. +nochange:
  2416. + i.setrm(dst).Emit8(tmp);
  2417. +}
  2418. +
  2419. +static void gen_dop_word_imm_mem(DualOps op,bool dword,DynReg * dr1,void* data) {
  2420. + opcode i = opcode(FindDynReg(dr1,dword && op==DOP_MOV)->index,dword).setabsaddr(data);
  2421. + Bit8u tmp;
  2422. + switch (op) {
  2423. + case DOP_ADD: tmp=0x03; break;
  2424. + case DOP_ADC: tmp=0x13; break;
  2425. + case DOP_SUB: tmp=0x2b; break;
  2426. + case DOP_SBB: tmp=0x1b; break;
  2427. + case DOP_CMP: tmp=0x3b; goto nochange; //Doesn't change
  2428. + case DOP_XOR: tmp=0x33; break;
  2429. + case DOP_AND: tmp=0x23; break;
  2430. + case DOP_OR: tmp=0x0b; break;
  2431. + case DOP_TEST: tmp=0x85; goto nochange; //Doesn't change
  2432. + case DOP_MOV: tmp=0x8b; break;
  2433. + case DOP_XCHG: tmp=0x87; break;
  2434. + default:
  2435. + IllegalOption("gen_dop_word_imm_mem");
  2436. + }
  2437. + dr1->flags|=DYNFLG_CHANGED;
  2438. +nochange:
  2439. + i.Emit8(tmp);
  2440. +}
  2441. +
  2442. +static void gen_dop_word_var(DualOps op,bool dword,DynReg * dr1,void* drd) {
  2443. + gen_dop_word_imm_mem(op,dword,dr1,drd);
  2444. +}
  2445. +
  2446. +static void gen_imul_word(bool dword,DynReg * dr1,DynReg * dr2) {
  2447. + // imul reg, reg
  2448. + opcode(FindDynReg(dr1)->index,dword).setrm(FindDynReg(dr2)->index).Emit16(0xAF0F);
  2449. + dr1->flags|=DYNFLG_CHANGED;
  2450. +}
  2451. +
  2452. +static void gen_imul_word_imm(bool dword,DynReg * dr1,DynReg * dr2,Bits imm) {
  2453. + opcode op = opcode(FindDynReg(dr1)->index,dword).setrm(FindDynReg(dr2)->index);
  2454. + if ((Bit8s)imm==imm) op.setimm(imm,1).Emit8(0x6B);
  2455. + else op.setimm(imm,dword?4:2).Emit8(0x69);
  2456. + dr1->flags|=DYNFLG_CHANGED;
  2457. +}
  2458. +
  2459. +static void gen_sop_word(SingleOps op,bool dword,DynReg * dr1) {
  2460. + opcode i;
  2461. + Bit8u tmp;
  2462. + if (!dword) i.setword();
  2463. + switch (op) {
  2464. + case SOP_INC: i.setreg(0);tmp=0xFF;break;
  2465. + case SOP_DEC: i.setreg(1);tmp=0xFF;break;
  2466. + case SOP_NOT: i.setreg(2);tmp=0xF7;break;
  2467. + case SOP_NEG: i.setreg(3);tmp=0xF7;break;
  2468. + default:
  2469. + IllegalOption("gen_sop_word");
  2470. + }
  2471. + i.setrm(FindDynReg(dr1)->index).Emit8(tmp);
  2472. + dr1->flags|=DYNFLG_CHANGED;
  2473. +}
  2474. +
  2475. +static void gen_shift_byte_cl(Bitu op,DynReg * dr1,Bit8u di1,DynReg * drecx) {
  2476. + ForceDynReg(x64gen.regs[X64_REG_RCX],drecx);
  2477. + opcode(op).setrm(FindDynReg(dr1)->index,di1).Emit8(0xD2);
  2478. + dr1->flags|=DYNFLG_CHANGED;
  2479. +}
  2480. +
  2481. +static void gen_shift_byte_imm(Bitu op,DynReg * dr1,Bit8u di1,Bit8u imm) {
  2482. + opcode(op).setimm(imm,1).setrm(FindDynReg(dr1)->index,di1).Emit8(0xC0);
  2483. + dr1->flags|=DYNFLG_CHANGED;
  2484. +}
  2485. +
  2486. +static void gen_shift_word_cl(Bitu op,bool dword,DynReg * dr1,DynReg * drecx) {
  2487. + ForceDynReg(x64gen.regs[X64_REG_RCX],drecx);
  2488. + opcode(op,dword).setrm(FindDynReg(dr1)->index).Emit8(0xD3);
  2489. + dr1->flags|=DYNFLG_CHANGED;
  2490. +}
  2491. +
  2492. +static void gen_shift_word_imm(Bitu op,bool dword,DynReg * dr1,Bit8u imm) {
  2493. + opcode(op,dword).setimm(imm,1).setrm(FindDynReg(dr1)->index).Emit8(0xC1);
  2494. + dr1->flags|=DYNFLG_CHANGED;
  2495. +}
  2496. +
  2497. +static void gen_cbw(bool dword,DynReg * dyn_ax) {
  2498. + ForceDynReg(x64gen.regs[X64_REG_RAX],dyn_ax);
  2499. + dyn_ax->flags|=DYNFLG_CHANGED;
  2500. + if (!dword) cache_addw(0x9866);
  2501. + cache_addb(0x98);
  2502. +}
  2503. +
  2504. +static void gen_cwd(bool dword,DynReg * dyn_ax,DynReg * dyn_dx) {
  2505. + ForceDynReg(x64gen.regs[X64_REG_RAX],dyn_ax);
  2506. + ForceDynReg(x64gen.regs[X64_REG_RDX],dyn_dx);
  2507. + dyn_ax->flags|=DYNFLG_CHANGED;
  2508. + dyn_dx->flags|=DYNFLG_CHANGED;
  2509. + if (!dword) cache_addw(0x9966);
  2510. + else cache_addb(0x99);
  2511. +}
  2512. +
  2513. +static void gen_mul_byte(bool imul,DynReg * dyn_ax,DynReg * dr1,Bit8u di1) {
  2514. + ForceDynReg(x64gen.regs[X64_REG_RAX],dyn_ax);
  2515. + opcode(imul?5:4).setrm(FindDynReg(dr1)->index,di1).Emit8(0xF6);
  2516. + dyn_ax->flags|=DYNFLG_CHANGED;
  2517. +}
  2518. +
  2519. +static void gen_mul_word(bool imul,DynReg * dyn_ax,DynReg * dyn_dx,bool dword,DynReg * dr1) {
  2520. + ForceDynReg(x64gen.regs[X64_REG_RAX],dyn_ax);
  2521. + ForceDynReg(x64gen.regs[X64_REG_RDX],dyn_dx);
  2522. + opcode(imul?5:4,dword).setrm(FindDynReg(dr1)->index).Emit8(0xF7);
  2523. + dyn_ax->flags|=DYNFLG_CHANGED;
  2524. + dyn_dx->flags|=DYNFLG_CHANGED;
  2525. +}
  2526. +
  2527. +static void gen_dshift_imm(bool dword,bool left,DynReg * dr1,DynReg * dr2,Bitu imm) {
  2528. + // shld/shrd imm
  2529. + opcode(FindDynReg(dr2)->index,dword).setimm(imm,1).setrm(FindDynReg(dr1)->index).Emit16(left ? 0xA40F:0xAC0F);
  2530. + dr1->flags|=DYNFLG_CHANGED;
  2531. +}
  2532. +
  2533. +static void gen_dshift_cl(bool dword,bool left,DynReg * dr1,DynReg * dr2,DynReg * drecx) {
  2534. + ForceDynReg(x64gen.regs[X64_REG_RCX],drecx);
  2535. + // shld/shrd cl
  2536. + opcode(FindDynReg(dr2)->index,dword).setrm(FindDynReg(dr1)->index).Emit16(left ? 0xA50F:0xAD0F);
  2537. + dr1->flags|=DYNFLG_CHANGED;
  2538. +}
  2539. +
  2540. +static void gen_call_ptr(void *func=NULL, bool release_flags=false) {
  2541. + x64gen.regs[X64_REG_RAX]->Clear();
  2542. + x64gen.regs[X64_REG_RCX]->Clear();
  2543. + x64gen.regs[X64_REG_RDX]->Clear();
  2544. +#if !defined(_WIN64)
  2545. + x64gen.regs[X64_REG_RSI]->Clear();
  2546. + x64gen.regs[X64_REG_RDI]->Clear();
  2547. +#endif
  2548. + x64gen.regs[X64_REG_R8]->Clear();
  2549. + x64gen.regs[X64_REG_R9]->Clear();
  2550. + x64gen.regs[X64_REG_R10]->Clear();
  2551. + x64gen.regs[X64_REG_R11]->Clear();
  2552. +
  2553. +#if defined(_WIN64)
  2554. + cache_addd(0x20EC8348); // sub rsp, 32
  2555. +#endif
  2556. + /* Do the actual call to the procedure */
  2557. + if (func==NULL) cache_addw(0xD0FF);
  2558. + else {
  2559. + Bits diff = (Bits)func - (Bits)cache.pos - 5;
  2560. + if ((Bit32s)diff == diff) opcode(0).setimm(diff,4).Emit8Reg(0xE8); // call rel32
  2561. + else {
  2562. + gen_load_imm(0, (Bitu)func);
  2563. + cache_addw(0xD0FF);
  2564. + }
  2565. + }
  2566. + // release flags only after call is done (stack must be aligned)
  2567. +#if defined(_WIN64)
  2568. + if (release_flags) cache_addd(0x28C48348); // add RSP,40
  2569. + else cache_addd(0x20C48348); // add RSP,32
  2570. +#else
  2571. + if (release_flags) cache_addd(0x08C48348); // add RSP,8
  2572. +#endif
  2573. +}
  2574. +
  2575. +static void gen_call_function(void * func,char const* ops,...) {
  2576. + Bits paramcount=0;
  2577. + bool release_flags=false;
  2578. + struct ParamInfo {
  2579. + const char * line;
  2580. + Bitu value;
  2581. + } pinfo[32];
  2582. + ParamInfo * retparam=0;
  2583. + Bitu f = (Bitu)func;
  2584. +
  2585. + /* Clear the EAX Genreg for usage */
  2586. + x64gen.regs[X64_REG_RAX]->Clear();
  2587. + x64gen.regs[X64_REG_RAX]->notusable=true;
  2588. + /* Save the flags */
  2589. + if (GCC_UNLIKELY(!skip_flags)) gen_protectflags();
  2590. + /* Scan for the amount of params */
  2591. + if (ops) {
  2592. + va_list params;
  2593. + va_start(params,ops);
  2594. + Bits pindex,pcount=0;
  2595. + while (*ops) {
  2596. + if (*ops=='%') {
  2597. + pinfo[pcount].line=ops+1;
  2598. + pinfo[pcount].value=va_arg(params,Bitu);
  2599. + pcount++;
  2600. + }
  2601. + ops++;
  2602. + }
  2603. + va_end(params);
  2604. +
  2605. + paramcount=0;
  2606. + for (pindex=0;pindex<pcount;pindex++) {
  2607. + GenReg *gen;
  2608. + Bitu imm;
  2609. + const char * scan=pinfo[pindex].line;
  2610. + switch (*scan++) {
  2611. + case 'I': /* immediate value */
  2612. + gen = x64gen.regs[reg_args[paramcount++]];
  2613. + gen->Clear();
  2614. + gen->notusable = true;
  2615. + imm = pinfo[pindex].value;
  2616. + if (*scan!='p') imm=(Bit32u)imm;
  2617. + gen_load_imm(gen->index,imm);
  2618. + break;
  2619. + case 'D': /* Dynamic register */
  2620. + x64gen.regs[reg_args[paramcount]]->notusable=true;
  2621. + gen_load_arg_reg(paramcount++, (DynReg*)pinfo[pindex].value, scan);
  2622. + break;
  2623. + case 'R': /* Dynamic register to get the return value */
  2624. + retparam =&pinfo[pindex];
  2625. + pinfo[pindex].line=scan;
  2626. + break;
  2627. + case 'F': /* Release flags from stack */
  2628. + release_flags=true;
  2629. + gen = x64gen.regs[reg_args[paramcount++]];
  2630. + gen->Clear();
  2631. + gen->notusable = true;
  2632. + opcode(gen->index).setea(4).Emit8(0x8B); // mov reg, [rsp]
  2633. + break;
  2634. + default:
  2635. + IllegalOption("gen_call_function unknown param");
  2636. + }
  2637. + }
  2638. + }
  2639. +
  2640. + gen_call_ptr(func, release_flags);
  2641. +
  2642. + while (paramcount>0)
  2643. + x64gen.regs[reg_args[--paramcount]]->notusable = false;
  2644. +
  2645. + /* Save the return value in correct register */
  2646. + if (retparam) {
  2647. + GenReg * genreg;
  2648. + DynReg * dynreg=(DynReg *)retparam->value;
  2649. + if (*retparam->line == 'd') {
  2650. + genreg=x64gen.regs[X64_REG_RAX];
  2651. + if (dynreg->genreg) dynreg->genreg->dynreg=0;
  2652. + genreg->Load(dynreg,true);
  2653. + } else
  2654. + genreg = FindDynReg(dynreg);
  2655. + if (*retparam->line == 'h') {
  2656. + // mov reg8h, al
  2657. + opcode(0,true,0).setrm(genreg->index,4).Emit8(0x88);
  2658. + } else if (genreg->index) { // test for (e)ax/al
  2659. + opcode op(0); // src=eax/al
  2660. + switch (*retparam->line) {
  2661. + case 'w':
  2662. + op.setword();
  2663. + case 'd':
  2664. + // mov r/m32, eax
  2665. + op.setrm(genreg->index).Emit8(0x89);
  2666. + break;
  2667. + case 'l':
  2668. + // mov r/m8, al
  2669. + op.setrm(genreg->index,0).Emit8(0x88);
  2670. + break;
  2671. + }
  2672. + }
  2673. + dynreg->flags|=DYNFLG_CHANGED;
  2674. + }
  2675. + /* Restore EAX registers to be used again */
  2676. + x64gen.regs[X64_REG_RAX]->notusable=false;
  2677. +}
  2678. +
  2679. +static void gen_call_write(DynReg * dr,Bit32u val,Bitu write_size) {
  2680. + void *func;
  2681. + gen_protectflags();
  2682. + gen_load_arg_reg(0,dr,"rd");
  2683. +
  2684. + switch (write_size) {
  2685. + case 1: func = (void*)mem_writeb_checked; break;
  2686. + case 2: func = (void*)mem_writew_checked; break;
  2687. + case 4: func = (void*)mem_writed_checked; break;
  2688. + default: IllegalOption("gen_call_write");
  2689. + }
  2690. +
  2691. + x64gen.regs[reg_args[1]]->Clear();
  2692. + opcode(ARG1_REG).setimm(val,4).Emit8Reg(0xB8); // mov ARG2, imm32
  2693. + gen_call_ptr(func);
  2694. +}
  2695. +
  2696. +static Bit8u * gen_create_branch(BranchTypes type) {
  2697. + /* First free all registers */
  2698. + cache_addw(0x70+type);
  2699. + return (cache.pos-1);
  2700. +}
  2701. +
  2702. +static void gen_fill_branch(Bit8u * data,Bit8u * from=cache.pos) {
  2703. +#if C_DEBUG
  2704. + Bits len=from-data-1;
  2705. + if (len<0) len=-len-1;
  2706. + if (len>127)
  2707. + LOG_MSG("Big jump %d",len);
  2708. +#endif
  2709. + *data=(from-data-1);
  2710. +}
  2711. +
  2712. +static Bit8u * gen_create_branch_long(BranchTypes type) {
  2713. + cache_addw(0x800f+(type<<8));
  2714. + cache_addd(0);
  2715. + return (cache.pos-4);
  2716. +}
  2717. +
  2718. +static void gen_fill_branch_long(Bit8u * data,Bit8u * from=cache.pos) {
  2719. + *(Bit32u*)data=(from-data-4);
  2720. +}
  2721. +
  2722. +static Bit8u * gen_create_jump(Bit8u * to=0) {
  2723. + /* First free all registers */
  2724. + cache_addb(0xe9);
  2725. + cache_addd(to-(cache.pos+4));
  2726. + return (cache.pos-4);
  2727. +}
  2728. +
  2729. +static void gen_fill_jump(Bit8u * data,Bit8u * to=cache.pos) {
  2730. + *(Bit32u*)data=(to-data-4);
  2731. +}
  2732. +
  2733. +static Bit8u * gen_create_short_jump(void) {
  2734. + cache_addw(0x00EB);
  2735. + return cache.pos-1;
  2736. +}
  2737. +
  2738. +static void gen_fill_short_jump(Bit8u * data, Bit8u * to=cache.pos) {
  2739. +#if C_DEBUG
  2740. + Bits len=to-data-1;
  2741. + if (len<0) len=-len-1;
  2742. + if (len>127)
  2743. + LOG_MSG("Big jump %d",len);
  2744. +#endif
  2745. + data[0] = to-data-1;
  2746. +}
  2747. +
  2748. +static void gen_jmp_ptr(void * ptr,Bit32s imm=0) {
  2749. + opcode(0).set64().setabsaddr(ptr).Emit8(0x8B); // mov RAX, [ptr]
  2750. + opcode(4).setea(0,-1,0,imm).Emit8(0xFF); // jmp [rax+imm]
  2751. +}
  2752. +
  2753. +static void gen_save_flags(DynReg * dynreg) {
  2754. + if (GCC_UNLIKELY(x64gen.flagsactive)) IllegalOption("gen_save_flags");
  2755. + opcode(FindDynReg(dynreg)->index).setea(4).Emit8(0x8B); // mov reg32, [rsp]
  2756. + dynreg->flags|=DYNFLG_CHANGED;
  2757. +}
  2758. +
  2759. +static void gen_load_flags(DynReg * dynreg) {
  2760. + if (GCC_UNLIKELY(x64gen.flagsactive)) IllegalOption("gen_load_flags");
  2761. + opcode(FindDynReg(dynreg)->index).setea(4).Emit8(0x89); // mov [rsp],reg32
  2762. +}
  2763. +
  2764. +static void gen_save_host_direct(void *data,Bitu imm) {
  2765. + if ((Bit32s)imm != imm) {
  2766. + opcode(0).setimm(imm,4).setabsaddr(data).Emit8(0xC7); // mov dword[], imm32 (low dword)
  2767. + opcode(0).setimm(imm>>32,4).setabsaddr((Bit8u*)data+4).Emit8(0xC7); // high dword
  2768. + } else
  2769. + opcode(0).set64().setimm(imm,4).setabsaddr(data).Emit8(0xC7); // mov qword[], Bit32s
  2770. +}
  2771. +
  2772. +static void gen_return(BlockReturn retcode) {
  2773. + gen_protectflags();
  2774. + cache_addb(0x59); //POP RCX, the flags
  2775. + if (retcode==0) cache_addw(0xc033); //MOV EAX, 0
  2776. + else {
  2777. + cache_addb(0xb8); //MOV EAX, retcode
  2778. + cache_addd(retcode);
  2779. + }
  2780. + cache_addb(0xc3); //RET
  2781. +}
  2782. +
  2783. +static void gen_return_fast(BlockReturn retcode,bool ret_exception=false) {
  2784. + if (GCC_UNLIKELY(x64gen.flagsactive)) IllegalOption("gen_return_fast");
  2785. + opcode(1).setabsaddr(&cpu_regs.flags).Emit8(0x8B); // mov ECX, [cpu_regs.flags]
  2786. + if (!ret_exception) {
  2787. + cache_addb(0x5A); // pop rdx (shorter version of "add rsp,8")
  2788. + if (retcode==0) cache_addw(0xc033); //MOV EAX, 0
  2789. + else {
  2790. + cache_addb(0xb8); //MOV EAX, retcode
  2791. + cache_addd(retcode);
  2792. + }
  2793. + }
  2794. + cache_addb(0xc3); //RET
  2795. +}
  2796. +
  2797. +static void gen_init(void) {
  2798. + x64gen.regs[X64_REG_RAX]=new GenReg(0);
  2799. + x64gen.regs[X64_REG_RCX]=new GenReg(1);
  2800. + x64gen.regs[X64_REG_RDX]=new GenReg(2);
  2801. + x64gen.regs[X64_REG_RBX]=new GenReg(3);
  2802. + x64gen.regs[X64_REG_RSI]=new GenReg(6);
  2803. + x64gen.regs[X64_REG_RDI]=new GenReg(7);
  2804. + x64gen.regs[X64_REG_R8]=new GenReg(8);
  2805. + x64gen.regs[X64_REG_R9]=new GenReg(9);
  2806. + x64gen.regs[X64_REG_R10]=new GenReg(10);
  2807. + x64gen.regs[X64_REG_R11]=new GenReg(11);
  2808. + x64gen.regs[X64_REG_R12]=new GenReg(12);
  2809. + x64gen.regs[X64_REG_R13]=new GenReg(13);
  2810. + x64gen.regs[X64_REG_R14]=new GenReg(14);
  2811. + x64gen.regs[X64_REG_R15]=new GenReg(15);
  2812. +}
  2813. +
  2814. +#if defined(X86_DYNFPU_DH_ENABLED)
  2815. +static void gen_dh_fpu_saveInit(void);
  2816. +static void (*gen_dh_fpu_save)(void) = gen_dh_fpu_saveInit;
  2817. +
  2818. +// DO NOT USE opcode::setabsaddr IN THIS FUNCTION (RBP unavailable at execution time)
  2819. +static void gen_dh_fpu_saveInit(void) {
  2820. + Bit8u* oldpos = cache.pos;
  2821. + cache.pos = &cache_code_link_blocks[64];
  2822. + gen_dh_fpu_save = (void(*)(void))cache.pos;
  2823. +
  2824. + Bits addr = (Bits)&dyn_dh_fpu;
  2825. + // mov RAX, &dyn_dh_fpu
  2826. + if ((Bit32u)addr == addr) opcode(0).setimm(addr,4).Emit8Reg(0xB8);
  2827. + else opcode(0).set64().setimm(addr,8).Emit8Reg(0xB8);
  2828. +
  2829. + // fnsave [RAX+offs8]
  2830. + cache_addw(0x70DD);cache_addb((Bits)&dyn_dh_fpu.state-addr);
  2831. + // fldcw [RAX+offs8]
  2832. + cache_addw(0x68D9);cache_addb((Bits)&dyn_dh_fpu.host_cw-addr);
  2833. + // mov byte [RAX+offs8], 0
  2834. + cache_addw(0x40C6);cache_addw((Bit8u)((Bits)&dyn_dh_fpu.state_used-addr));
  2835. + // or byte [RAX+offs8], 0x3F
  2836. + cache_addw(0x4880);cache_addb((Bits)&dyn_dh_fpu.state.cw-addr);cache_addb(0x3F);
  2837. + cache_addb(0xC3); // RET
  2838. +
  2839. + cache.pos = oldpos;
  2840. + gen_dh_fpu_save();
  2841. +}
  2842. +#endif
  2843. +
  2844. diff --git a/src/cpu/core_dyn_x86/risc_x86.h b/src/cpu/core_dyn_x86/risc_x86.h
  2845. index ca502d8f..df734f70 100644
  2846. --- a/src/cpu/core_dyn_x86/risc_x86.h
  2847. +++ b/src/cpu/core_dyn_x86/risc_x86.h
  2848. @@ -1069,4 +1069,28 @@ static void gen_init(void) {
  2849. x86gen.regs[X86_REG_EDI]=new GenReg(7);
  2850. }
  2851.  
  2852. -
  2853. +#if defined(X86_DYNFPU_DH_ENABLED)
  2854. +static void gen_dh_fpu_save(void)
  2855. +#if defined (_MSC_VER)
  2856. +{
  2857. + __asm {
  2858. + __asm fnsave dyn_dh_fpu.state
  2859. + __asm fldcw dyn_dh_fpu.host_cw
  2860. + }
  2861. + dyn_dh_fpu.state_used=false;
  2862. + dyn_dh_fpu.state.cw|=0x3f;
  2863. +}
  2864. +#else
  2865. +{
  2866. + __asm__ volatile (
  2867. + "fnsave %0 \n"
  2868. + "fldcw %1 \n"
  2869. + : "=m" (dyn_dh_fpu.state)
  2870. + : "m" (dyn_dh_fpu.host_cw)
  2871. + : "memory"
  2872. + );
  2873. + dyn_dh_fpu.state_used=false;
  2874. + dyn_dh_fpu.state.cw|=0x3f;
  2875. +}
  2876. +#endif
  2877. +#endif
  2878. diff --git a/src/cpu/core_dyn_x86/string.h b/src/cpu/core_dyn_x86/string.h
  2879. index 60cfc8e0..8b27672b 100644
  2880. --- a/src/cpu/core_dyn_x86/string.h
  2881. +++ b/src/cpu/core_dyn_x86/string.h
  2882. @@ -82,7 +82,7 @@ static void dyn_string(STRING_OP op) {
  2883. Bit8u * rep_ecx_jmp;
  2884. /* Check if ECX!=zero */
  2885. if (decode.rep) {
  2886. - gen_dop_word(DOP_OR,decode.big_addr,DREG(ECX),DREG(ECX));
  2887. + gen_dop_word(DOP_TEST,decode.big_addr,DREG(ECX),DREG(ECX));
  2888. rep_ecx_jmp=gen_create_branch_long(BR_Z);
  2889. }
  2890. if (usesi) {
  2891. @@ -99,11 +99,11 @@ static void dyn_string(STRING_OP op) {
  2892. }
  2893. switch (op) {
  2894. case STR_OUTSB:
  2895. - gen_call_function((void*)&IO_WriteB,"%Id%Dl",DREG(EDX),tmp_reg);break;
  2896. + gen_call_function((void*)&IO_WriteB,"%Dw%Dl",DREG(EDX),tmp_reg);break;
  2897. case STR_OUTSW:
  2898. - gen_call_function((void*)&IO_WriteW,"%Id%Dw",DREG(EDX),tmp_reg);break;
  2899. + gen_call_function((void*)&IO_WriteW,"%Dw%Dw",DREG(EDX),tmp_reg);break;
  2900. case STR_OUTSD:
  2901. - gen_call_function((void*)&IO_WriteD,"%Id%Dd",DREG(EDX),tmp_reg);break;
  2902. + gen_call_function((void*)&IO_WriteD,"%Dw%Dd",DREG(EDX),tmp_reg);break;
  2903. }
  2904. }
  2905. if (usedi) {
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement