Advertisement
Guest User

Untitled

a guest
Dec 31st, 2012
166
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Diff 52.34 KB | None | 0 0
  1. diff -Naur -x BUILD -x .DS_Store -x '*.orig' -x '*.swp' -x '*.rej' xnu-2050.7.9/iokit/Kernel/IOCatalogue.cpp xnu-2050.7.9-sinetek/iokit/Kernel/IOCatalogue.cpp
  2. --- xnu-2050.7.9/iokit/Kernel/IOCatalogue.cpp   2012-02-15 20:11:52.000000000 -0500
  3. +++ xnu-2050.7.9-sinetek/iokit/Kernel/IOCatalogue.cpp   2012-12-31 10:38:11.000000000 -0500
  4. @@ -38,6 +38,21 @@
  5.   * Version 2.0.
  6.   */
  7.  
  8. +/* Sinetek: Array of blacklisted Kexts.
  9. + * Should be moved somewhere convenient?
  10. + */
  11. +const char *blak [] = {
  12. +   "com.apple.driver.AppleIntelMeromProfile",
  13. +   "com.apple.driver.AppleIntelNehalemProfile",
  14. +   "com.apple.driver.AppleIntelPenrynProfile",
  15. +   "com.apple.driver.AppleIntelYonahProfile",
  16. +   "com.apple.driver.AppleIntelCPUPowerManagement",
  17. +   "com.apple.iokit.CHUDKernLib",
  18. +   "com.apple.iokit.CHUDProf",
  19. +   "com.apple.iokit.CHUDUtils",
  20. +   0, // terminate!
  21. +};
  22. +
  23.  extern "C" {
  24.  #include <machine/machine_routines.h>
  25.  #include <libkern/kernel_mach_header.h>
  26. @@ -332,6 +347,30 @@
  27.              break;
  28.          }
  29.  
  30. +   /* Sinetek: if the kext is in blacklist, skip it TODO make this a function */
  31. +   boolean_t blacklistEnabled = TRUE;
  32. +   printf("BLACKLIST %s\n", blacklistEnabled? "enabled" : "disabled");
  33. +   if(blacklistEnabled) {
  34. +       OSString *moduleName = OSDynamicCast(OSString, personality->getObject(gIOModuleIdentifierKey));
  35. +       const char *cName = NULL;
  36. +       cName = moduleName->getCStringNoCopy();
  37. +       boolean_t blackPersonality = FALSE;
  38. +
  39. +       if(cName) {
  40. +           for(int i = 0; blak[i] != NULL; ++i) {
  41. +               int equal;
  42. +               equal = !strcmp(blak[i], cName);
  43. +               if(equal) {
  44. +                   printf("Skipping personality %s", cName);
  45. +                   blackPersonality = TRUE;
  46. +               }
  47. +           }
  48. +       }
  49. +
  50. +       if(blackPersonality) continue;
  51. +   }
  52. +   /* end Sinetek */
  53. +
  54.          OSKext::uniquePersonalityProperties(personality);
  55.  
  56.          // Add driver personality to catalogue.
  57. @@ -840,6 +879,31 @@
  58.            (thisNewPersonality = (OSDictionary *) newPersonalities->getObject(newIdx));
  59.            newIdx++)
  60.       {
  61. +   /* Sinetek: if the kext is in blacklist, skip it TODO make this a function */
  62. +   boolean_t blacklistEnabled = TRUE;
  63. +   printf("BLACKLIST %s\n", blacklistEnabled? "enabled" : "disabled");
  64. +   if(blacklistEnabled) {
  65. +       OSString *moduleName = OSDynamicCast(OSString, thisNewPersonality->getObject(gIOModuleIdentifierKey));
  66. +       const char *cName = NULL;
  67. +       cName = moduleName->getCStringNoCopy();
  68. +       boolean_t blackPersonality = FALSE;
  69. +
  70. +       if(cName) {
  71. +           for(int i = 0; blak[i] != NULL; ++i) {
  72. +               int equal;
  73. +               equal = !strcmp(blak[i], cName);
  74. +               if(equal) {
  75. +                   printf("Skipping personality %s", cName);
  76. +                   blackPersonality = TRUE;
  77. +               }
  78. +           }
  79. +       }
  80. +
  81. +       if(blackPersonality) continue;
  82. +   }
  83. +   /* end Sinetek */
  84. +  
  85. +
  86.           OSKext::uniquePersonalityProperties(thisNewPersonality);
  87.           addPersonality(thisNewPersonality);
  88.           matchSet->setObject(thisNewPersonality);
  89. diff -Naur -x BUILD -x .DS_Store -x '*.orig' -x '*.swp' -x '*.rej' xnu-2050.7.9/makedefs/MakeInc.def xnu-2050.7.9-sinetek/makedefs/MakeInc.def
  90. --- xnu-2050.7.9/makedefs/MakeInc.def   2012-07-24 11:57:49.000000000 -0400
  91. +++ xnu-2050.7.9-sinetek/makedefs/MakeInc.def   2012-12-31 10:38:11.000000000 -0500
  92. @@ -243,11 +243,11 @@
  93.  # Compiler warning flags
  94.  #
  95.  
  96. -CWARNFLAGS_STD = \
  97. -   -Wall -Werror -Wno-format-y2k -Wextra -Wstrict-prototypes \
  98. -   -Wmissing-prototypes -Wpointer-arith -Wreturn-type -Wcast-qual \
  99. -   -Wwrite-strings -Wswitch -Wshadow -Wcast-align -Wchar-subscripts \
  100. -   -Winline -Wnested-externs -Wredundant-decls -Wextra-tokens
  101. +#CWARNFLAGS_STD = \
  102. +#  -Wall -Werror -Wno-format-y2k -Wextra -Wstrict-prototypes \
  103. +#  -Wmissing-prototypes -Wpointer-arith -Wreturn-type -Wcast-qual \
  104. +#  -Wwrite-strings -Wswitch -Wshadow -Wcast-align -Wchar-subscripts \
  105. +#  -Winline -Wnested-externs -Wredundant-decls -Wextra-tokens
  106.  
  107.  # Certain warnings are non-fatal (8474835)
  108.  CWARNFLAGS_STD += -Wno-error=cast-align
  109. @@ -259,10 +259,10 @@
  110.  $(1)_CWARNFLAGS_ADD += $2
  111.  endef
  112.  
  113. -CXXWARNFLAGS_STD = \
  114. -   -Wall -Werror -Wno-format-y2k -Wextra -Wpointer-arith -Wreturn-type \
  115. -   -Wcast-qual -Wwrite-strings -Wswitch -Wcast-align -Wchar-subscripts \
  116. -   -Wredundant-decls -Wextra-tokens
  117. +#CXXWARNFLAGS_STD = \
  118. +#  -Wall -Werror -Wno-format-y2k -Wextra -Wpointer-arith -Wreturn-type \
  119. +#  -Wcast-qual -Wwrite-strings -Wswitch -Wcast-align -Wchar-subscripts \
  120. +#  -Wredundant-decls -Wextra-tokens
  121.  
  122.  # Certain warnings are non-fatal (8474835, 9000888)
  123.  CXXWARNFLAGS_STD += -Wno-error=cast-align -Wno-error=overloaded-virtual
  124. @@ -337,13 +337,13 @@
  125.  endif
  126.  
  127.  
  128. -export CFLAGS_RELEASEI386 = -O2
  129. -export CFLAGS_DEVELOPMENTI386 = -O2
  130. +export CFLAGS_RELEASEI386 = -O2 -march=k8
  131. +export CFLAGS_DEVELOPMENTI386 = -O2 -march=k8
  132.  export CFLAGS_DEBUGI386 = -O0
  133.  export CFLAGS_PROFILEI386 = -O2
  134.  
  135. -export CFLAGS_RELEASEX86_64 = -O2
  136. -export CFLAGS_DEVELOPMENTX86_64 = -O2
  137. +export CFLAGS_RELEASEX86_64 = -O3 -march=k8
  138. +export CFLAGS_DEVELOPMENTX86_64 = -O2 -march=k8
  139.  # No space optimization for the DEBUG kernel for the benefit of gdb:
  140.  export CFLAGS_DEBUGX86_64 = -O0
  141.  export CFLAGS_PROFILEX86_64 = -O2
  142. diff -Naur -x BUILD -x .DS_Store -x '*.orig' -x '*.swp' -x '*.rej' xnu-2050.7.9/osfmk/conf/files xnu-2050.7.9-sinetek/osfmk/conf/files
  143. --- xnu-2050.7.9/osfmk/conf/files   2012-04-06 20:50:07.000000000 -0400
  144. +++ xnu-2050.7.9-sinetek/osfmk/conf/files   2012-12-31 10:38:11.000000000 -0500
  145. @@ -275,3 +275,5 @@
  146.  osfmk/kperf/timetrigger.c               optional kperf
  147.  
  148.  osfmk/console/serial_general.c standard
  149. +
  150. +osfmk/kern/opemu.c     standard
  151. diff -Naur -x BUILD -x .DS_Store -x '*.orig' -x '*.swp' -x '*.rej' xnu-2050.7.9/osfmk/i386/commpage/commpage.c xnu-2050.7.9-sinetek/osfmk/i386/commpage/commpage.c
  152. --- xnu-2050.7.9/osfmk/i386/commpage/commpage.c 2012-02-25 01:42:08.000000000 -0500
  153. +++ xnu-2050.7.9-sinetek/osfmk/i386/commpage/commpage.c 2012-12-31 11:24:46.000000000 -0500
  154. @@ -226,7 +226,7 @@
  155.             bits |= kHasSupplementalSSE3;
  156.             /* fall thru */
  157.         case 5:
  158. -           bits |= kHasSSE3;
  159. +           bits |= kHasSSE3 | kHasSupplementalSSE3 | kHasSSE4_2;
  160.             /* fall thru */
  161.         case 4:
  162.             bits |= kHasSSE2;
  163. diff -Naur -x BUILD -x .DS_Store -x '*.orig' -x '*.swp' -x '*.rej' xnu-2050.7.9/osfmk/i386/cpuid.c xnu-2050.7.9-sinetek/osfmk/i386/cpuid.c
  164. --- xnu-2050.7.9/osfmk/i386/cpuid.c 2012-07-24 11:57:43.000000000 -0400
  165. +++ xnu-2050.7.9-sinetek/osfmk/i386/cpuid.c 2012-12-31 11:18:06.000000000 -0500
  166. @@ -260,6 +260,135 @@
  167.     "Lnone", "L1I", "L1D", "L2U", "L3U"
  168.  };
  169.  
  170. +/* Sinetek: reimplemented, based on AnV, mercurySquad, thanks go to them.
  171. + * Function is AMD-specific.
  172. + */
  173. +static void
  174. +cpuid_set_AMDcache_info( i386_cpu_info_t * info_p )
  175. +{
  176. +   uint32_t    reg[4];
  177. +   uint32_t    linesizes[LCACHE_MAX];
  178. +   cache_type_t    type;
  179. +   uint32_t    j;
  180. +   uint32_t    colors;
  181. +
  182. +   bzero( linesizes, sizeof(linesizes) );
  183. +
  184. +   /* get number of cores in processor */
  185. +   /* No HT on AMD so logicals = cores */
  186. +   cpuid_fn(0x80000008, reg);
  187. +   info_p->cpuid_cores_per_package = bitfield32(reg[ecx], 7, 0) + 1;
  188. +   info_p->cpuid_logical_per_package = info_p->cpuid_cores_per_package;
  189. +
  190. +
  191. +   /* L1 Data */
  192. +   {
  193. +       type = L1D;
  194. +       cpuid_fn(0x80000005, reg);
  195. +       uint32_t cpuid_c_linesize   = bitfield32(reg[ecx], 7,  0);
  196. +       uint32_t cpuid_c_partitions = bitfield32(reg[ecx], 15, 8);
  197. +       uint32_t cpuid_c_associativity  = bitfield32(reg[ecx], 23, 16);
  198. +       uint32_t cpuid_c_size       = bitfield32(reg[ecx], 31, 24);
  199. +
  200. +       uint32_t cache_associativity    = cpuid_c_associativity;
  201. +
  202. +       // size reported in KB.
  203. +       info_p->cache_size[type]    = cpuid_c_size * 1024;
  204. +       info_p->cache_sharing[type]     = 1;
  205. +       info_p->cache_partitions[type]  = cpuid_c_partitions;
  206. +
  207. +       linesizes[type] = cpuid_c_linesize;
  208. +       uint32_t cache_sets = info_p->cache_size[type] / (cpuid_c_partitions * cpuid_c_linesize * cache_associativity);
  209. +
  210. +       colors = ( cpuid_c_linesize * cache_sets ) >> 12;
  211. +       if ( colors > vm_cache_geometry_colors )
  212. +           vm_cache_geometry_colors = colors;
  213. +   }
  214. +   /* L1 Instruction */
  215. +   {
  216. +       type = L1I;
  217. +       cpuid_fn(0x80000005, reg);
  218. +       uint32_t cpuid_c_linesize   = bitfield32(reg[edx], 7,  0);
  219. +       uint32_t cpuid_c_partitions = bitfield32(reg[edx], 15, 8);
  220. +       uint32_t cpuid_c_associativity  = bitfield32(reg[edx], 23, 16);
  221. +       uint32_t cpuid_c_size       = bitfield32(reg[edx], 31, 24);
  222. +
  223. +       uint32_t cache_associativity    = cpuid_c_associativity;
  224. +
  225. +       // size reported in KB.
  226. +       info_p->cache_size[type]    = cpuid_c_size * 1024;
  227. +       info_p->cache_sharing[type]     = 1;
  228. +       info_p->cache_partitions[type]  = cpuid_c_partitions;
  229. +
  230. +       linesizes[type] = cpuid_c_linesize;
  231. +       uint32_t cache_sets = info_p->cache_size[type] / (cpuid_c_partitions * cpuid_c_linesize * cache_associativity);
  232. +
  233. +       colors = ( cpuid_c_linesize * cache_sets ) >> 12;
  234. +       if ( colors > vm_cache_geometry_colors )
  235. +           vm_cache_geometry_colors = colors;
  236. +   }
  237. +   /* L2 Unified */
  238. +   {
  239. +       type = L1D;
  240. +       cpuid_fn(0x80000006, reg);
  241. +       uint32_t cpuid_c_linesize   = bitfield32(reg[ecx], 7,  0);
  242. +       uint32_t cpuid_c_partitions = bitfield32(reg[ecx], 11, 8);
  243. +       uint32_t cpuid_c_associativity  = bitfield32(reg[ecx], 15, 12);
  244. +       uint32_t cpuid_c_size       = bitfield32(reg[ecx], 31, 16);
  245. +
  246. +       // Special formula for associativity:  2^(assoc / 2)
  247. +       uint32_t cache_associativity    = 1ul << (cpuid_c_associativity / 2);
  248. +
  249. +       // size reported in KB.
  250. +       info_p->cache_size[type]    = cpuid_c_size * 1024;
  251. +       info_p->cache_sharing[type]     = 1;
  252. +       info_p->cache_partitions[type]  = cpuid_c_partitions;
  253. +
  254. +       linesizes[type] = cpuid_c_linesize;
  255. +       uint32_t cache_sets = info_p->cache_size[type] / (cpuid_c_partitions * cpuid_c_linesize * cache_associativity);
  256. +
  257. +       colors = ( cpuid_c_linesize * cache_sets ) >> 12;
  258. +       if ( colors > vm_cache_geometry_colors )
  259. +           vm_cache_geometry_colors = colors;
  260. +
  261. +       // use for cache size etc.
  262. +       info_p->cpuid_cache_L2_associativity = cache_associativity;
  263. +           info_p->cpuid_cache_size    = info_p->cache_size[type];
  264. +       info_p->cache_linesize      = cpuid_c_linesize;
  265. +   }
  266. +   /* L3 Unified */
  267. +   {
  268. +       type = L1D;
  269. +       cpuid_fn(0x80000006, reg);
  270. +       uint32_t cpuid_c_linesize   = bitfield32(reg[edx], 7,  0);
  271. +       uint32_t cpuid_c_partitions = bitfield32(reg[edx], 11, 8);
  272. +       uint32_t cpuid_c_associativity  = bitfield32(reg[edx], 15, 12);
  273. +       uint32_t cpuid_c_size       = bitfield32(reg[edx], 31, 18);
  274. +
  275. +       // Special formula for associativity:  2^(assoc / 2)
  276. +       uint32_t cache_associativity    = 1ul << (cpuid_c_associativity / 2);
  277. +
  278. +       if(cpuid_c_size == 0) {
  279. +           // no L3
  280. +           info_p->cache_size[type]    = 0;
  281. +           info_p->cache_sharing[type]     = 0;
  282. +           info_p->cache_partitions[type]  = 0;
  283. +       } else {
  284. +           // size reported in 512 KB packs.
  285. +           info_p->cache_size[type]    = cpuid_c_size * 1024;
  286. +           info_p->cache_sharing[type]     = 1;
  287. +           info_p->cache_partitions[type]  = cpuid_c_partitions;
  288. +
  289. +           linesizes[type] = cpuid_c_linesize;
  290. +           uint32_t cache_sets = info_p->cache_size[type] / (cpuid_c_partitions * cpuid_c_linesize * cache_associativity);
  291. +
  292. +           colors = ( cpuid_c_linesize * cache_sets ) >> 12;
  293. +           if ( colors > vm_cache_geometry_colors )
  294. +               vm_cache_geometry_colors = colors;
  295. +           }
  296. +   }
  297. +}
  298. +
  299.  /* this function is Intel-specific */
  300.  static void
  301.  cpuid_set_cache_info( i386_cpu_info_t * info_p )
  302. @@ -572,10 +701,10 @@
  303.      * and bracket this with the approved procedure for reading the
  304.      * the microcode version number a.k.a. signature a.k.a. BIOS ID
  305.      */
  306. -   wrmsr64(MSR_IA32_BIOS_SIGN_ID, 0);
  307. +   //wrmsr64(MSR_IA32_BIOS_SIGN_ID, 0);
  308.     cpuid_fn(1, reg);
  309. -   info_p->cpuid_microcode_version =
  310. -       (uint32_t) (rdmsr64(MSR_IA32_BIOS_SIGN_ID) >> 32);
  311. +   //info_p->cpuid_microcode_version =
  312. +   //  (uint32_t) (rdmsr64(MSR_IA32_BIOS_SIGN_ID) >> 32);
  313.     info_p->cpuid_signature = reg[eax];
  314.     info_p->cpuid_stepping  = bitfield32(reg[eax],  3,  0);
  315.     info_p->cpuid_model     = bitfield32(reg[eax],  7,  4);
  316. @@ -587,7 +716,7 @@
  317.     info_p->cpuid_features  = quad(reg[ecx], reg[edx]);
  318.  
  319.     /* Get "processor flag"; necessary for microcode update matching */
  320. -   info_p->cpuid_processor_flag = (rdmsr64(MSR_IA32_PLATFORM_ID)>> 50) & 3;
  321. +   //info_p->cpuid_processor_flag = (rdmsr64(MSR_IA32_PLATFORM_ID)>> 50) & 3;
  322.  
  323.     /* Fold extensions into family/model */
  324.     if (info_p->cpuid_family == 0x0f)
  325. @@ -604,7 +733,8 @@
  326.     if (info_p->cpuid_max_ext >= 0x80000001) {
  327.         cpuid_fn(0x80000001, reg);
  328.         info_p->cpuid_extfeatures =
  329. -               quad(reg[ecx], reg[edx]);
  330. +               quad(reg[ecx], reg[edx]) & ~CPUID_EXTFEATURE_XD;
  331. +       /* Sinetek: AMD doesn't like the XD bit. */
  332.     }
  333.  
  334.     DBG(" max_basic           : %d\n", info_p->cpuid_max_basic);
  335. @@ -801,16 +931,22 @@
  336.     cpuid_set_generic_info(info_p);
  337.  
  338.     /* verify we are running on a supported CPU */
  339. -   if ((strncmp(CPUID_VID_INTEL, info_p->cpuid_vendor,
  340. +   /*if ((strncmp(CPUID_VID_INTEL, info_p->cpuid_vendor,
  341.              min(strlen(CPUID_STRING_UNKNOWN) + 1,
  342.              sizeof(info_p->cpuid_vendor)))) ||
  343.        (cpuid_set_cpufamily(info_p) == CPUFAMILY_UNKNOWN))
  344. -       panic("Unsupported CPU");
  345. +       panic("Unsupported CPU");*/
  346. +   cpuid_set_cpufamily(info_p);
  347.  
  348.     info_p->cpuid_cpu_type = CPU_TYPE_X86;
  349.     info_p->cpuid_cpu_subtype = CPU_SUBTYPE_X86_ARCH1;
  350.     /* Must be invoked after set_generic_info */
  351. -   cpuid_set_cache_info(&cpuid_cpu_info);
  352. +   /* check if running on AMD, call right cache info function */
  353. +   if(!strncmp(CPUID_VID_AMD, info_p->cpuid_vendor,
  354. +            min(strlen(CPUID_STRING_UNKNOWN) + 1,
  355. +            sizeof(info_p->cpuid_vendor)))) {
  356. +       cpuid_set_AMDcache_info(&cpuid_cpu_info);
  357. +   } else cpuid_set_cache_info(&cpuid_cpu_info);
  358.  
  359.     /*
  360.      * Find the number of enabled cores and threads
  361. diff -Naur -x BUILD -x .DS_Store -x '*.orig' -x '*.swp' -x '*.rej' xnu-2050.7.9/osfmk/i386/cpuid.h xnu-2050.7.9-sinetek/osfmk/i386/cpuid.h
  362. --- xnu-2050.7.9/osfmk/i386/cpuid.h 2012-07-24 11:57:43.000000000 -0400
  363. +++ xnu-2050.7.9-sinetek/osfmk/i386/cpuid.h 2012-12-31 10:46:08.000000000 -0500
  364. @@ -167,6 +167,14 @@
  365.  #define CPUID_MODEL_JAKETOWN   0x2D
  366.  #define CPUID_MODEL_IVYBRIDGE  0x3A
  367.  
  368. +/* kaitek: the following definitions are needed by tsc.c and kern_mib.c */
  369. +#define CPU_FAMILY_PENTIUM_M   (0x6)
  370. +#define CPU_FAMILY_PENTIUM_4   (0xF)
  371. +#define CPU_FAMILY_AMD_PHENOM  (0x10)
  372. +#define CPU_FAMILY_AMD_SHANGHAI    (0x11)
  373. +#define CPU_FAMILY_I5      (0x1E)
  374. +#define CPU_FAMILY_I9      (0x2C)
  375. +#define CPU_FAMILY_SANDY   (0x2A)
  376.  
  377.  #define CPUID_VMM_FAMILY_UNKNOWN   0x0
  378.  #define CPUID_VMM_FAMILY_VMWARE        0x1
  379. @@ -358,6 +366,8 @@
  380.  #ifdef __cplusplus
  381.  extern "C" {
  382.  #endif
  383. +extern boolean_t   IsAmdCPU(void);
  384. +extern boolean_t   IsIntelCPU(void);
  385.  
  386.  /*
  387.   * External declarations
  388. diff -Naur -x BUILD -x .DS_Store -x '*.orig' -x '*.swp' -x '*.rej' xnu-2050.7.9/osfmk/i386/lapic_native.c xnu-2050.7.9-sinetek/osfmk/i386/lapic_native.c
  389. --- xnu-2050.7.9/osfmk/i386/lapic_native.c  2011-09-09 15:23:12.000000000 -0400
  390. +++ xnu-2050.7.9-sinetek/osfmk/i386/lapic_native.c  2012-12-31 10:38:11.000000000 -0500
  391. @@ -218,7 +218,7 @@
  392.  
  393.     lapic_ops->init();
  394.  
  395. -   if ((LAPIC_READ(VERSION)&LAPIC_VERSION_MASK) < 0x14) {
  396. +   if ((LAPIC_READ(VERSION)&LAPIC_VERSION_MASK) < 0x10) {
  397.         panic("Local APIC version 0x%x, 0x14 or more expected\n",
  398.             (LAPIC_READ(VERSION)&LAPIC_VERSION_MASK));
  399.     }
  400. diff -Naur -x BUILD -x .DS_Store -x '*.orig' -x '*.swp' -x '*.rej' xnu-2050.7.9/osfmk/i386/start.s xnu-2050.7.9-sinetek/osfmk/i386/start.s
  401. --- xnu-2050.7.9/osfmk/i386/start.s 2011-12-01 20:25:12.000000000 -0500
  402. +++ xnu-2050.7.9-sinetek/osfmk/i386/start.s 2012-12-31 10:38:11.000000000 -0500
  403. @@ -302,10 +302,6 @@
  404.     orl     $(CR4_PAE),%eax
  405.     movl    %eax,%cr4               /* enable page size extensions */
  406.  
  407. -   movl    $(MSR_IA32_EFER), %ecx          /* MSR number in ecx */
  408. -   rdmsr                       /* MSR value return in edx: eax */
  409. -   orl $(MSR_IA32_EFER_NXE), %eax      /* Set NXE bit in low 32-bits */
  410. -   wrmsr                       /* Update Extended Feature Enable reg */
  411.  
  412.     movl    %cr0, %eax
  413.     orl $(CR0_PG|CR0_WP), %eax
  414. diff -Naur -x BUILD -x .DS_Store -x '*.orig' -x '*.swp' -x '*.rej' xnu-2050.7.9/osfmk/i386/trap.c xnu-2050.7.9-sinetek/osfmk/i386/trap.c
  415. --- xnu-2050.7.9/osfmk/i386/trap.c  2012-02-25 01:42:08.000000000 -0500
  416. +++ xnu-2050.7.9-sinetek/osfmk/i386/trap.c  2012-12-31 10:38:11.000000000 -0500
  417. @@ -1059,6 +1059,9 @@
  418.         break;
  419.  
  420.         case T_INVALID_OPCODE:
  421. +       /* Sinetek: we'll handle this. */
  422. +       opemu_trap(saved_state);
  423. +
  424.         exc = EXC_BAD_INSTRUCTION;
  425.         code = EXC_I386_INVOP;
  426.         break;
  427. diff -Naur -x BUILD -x .DS_Store -x '*.orig' -x '*.swp' -x '*.rej' xnu-2050.7.9/osfmk/i386/tsc.c xnu-2050.7.9-sinetek/osfmk/i386/tsc.c
  428. --- xnu-2050.7.9/osfmk/i386/tsc.c   2012-03-21 20:24:34.000000000 -0400
  429. +++ xnu-2050.7.9-sinetek/osfmk/i386/tsc.c   2012-12-31 10:48:06.000000000 -0500
  430. @@ -72,6 +72,7 @@
  431.  uint64_t   tscGranularity = 0;
  432.  uint64_t   bus2tsc = 0;
  433.  uint64_t   busFreq = 0;
  434. +uint32_t   kTscPanicOn = 0;
  435.  uint32_t   flex_ratio = 0;
  436.  uint32_t   flex_ratio_min = 0;
  437.  uint32_t   flex_ratio_max = 0;
  438. @@ -88,9 +89,31 @@
  439.  #define Tera (kilo * Giga)
  440.  #define Peta (kilo * Tera)
  441.  
  442. -#define CPU_FAMILY_PENTIUM_M   (0x6)
  443. +/* mercurysquad: The following enum specifies one of the bus ratio calc paths to take */
  444. +typedef enum {
  445. +   BUSRATIO_BOOTFLAG,
  446. +   BUSRATIO_ATHLON,
  447. +   BUSRATIO_EFI,
  448. +   BUSRATIO_PHENOM_SHANGHAI,
  449. +   BUSRATIO_INTEL_MSR,
  450. +   BUSRATIO_AUTODETECT,
  451. +   BUSRATIO_PENTIUM4_MSR, // P4 model 2+ have an MSR too
  452. +   BUSRATIO_TIMER
  453. +} busratio_path_t;
  454. +
  455. +static const char* busRatioPathNames[] = {
  456. +   "Boot-time argument",
  457. +   "AMD Athlon",
  458. +   "Pentium 4 (via EFI)",
  459. +   "AMD Phenom",
  460. +   "Intel / Apple",
  461. +   "Autodetect",
  462. +   "Pentium 4 (via MSR)",
  463. +   "Time the TSC"
  464. +};
  465.  
  466.  static const char  FSB_Frequency_prop[] = "FSBFrequency";
  467. +static const char  FSB_CPUFrequency_prop[] = "CPUFrequency";
  468.  /*
  469.   * This routine extracts the bus frequency in Hz from the device tree.
  470.   */
  471. @@ -124,6 +147,87 @@
  472.     }
  473.     return frequency;
  474.  }
  475. +/* mercurysquad:
  476. + * This routine extracts the cpu frequency from the efi device tree
  477. + * The value should be set by a custom EFI bootloader (only needed on CPUs which
  478. + * don't report the bus ratio in one of the MSRs.)
  479. + */
  480. +static uint64_t
  481. +EFI_CPU_Frequency(void)
  482. +{
  483. +   uint64_t    frequency = 0;
  484. +   DTEntry     entry;
  485. +   void        *value;
  486. +   unsigned int    size;
  487. +  
  488. +   if (DTLookupEntry(0, "/efi/platform", &entry) != kSuccess) {
  489. +       kprintf("EFI_CPU_Frequency: didn't find /efi/platform\n");
  490. +       return 0;
  491. +   }
  492. +   if (DTGetProperty(entry,FSB_CPUFrequency_prop,&value,&size) != kSuccess) {
  493. +       kprintf("EFI_CPU_Frequency: property %s not found\n",
  494. +           FSB_Frequency_prop);
  495. +       return 0;
  496. +   }
  497. +   if (size == sizeof(uint64_t)) {
  498. +       frequency = *(uint64_t *) value;
  499. +       kprintf("EFI_CPU_Frequency: read %s value: %llu\n",
  500. +           FSB_Frequency_prop, frequency);
  501. +       if (!(10*Mega < frequency && frequency < 50*Giga)) {
  502. +           kprintf("EFI_Fake_MSR: value out of range\n");
  503. +           frequency = 0;
  504. +       }
  505. +   } else {
  506. +       kprintf("EFI_CPU_Frequency: unexpected size %d\n", size);
  507. +   }
  508. +   return frequency;
  509. +}
  510. +
  511. +/*
  512. + * Convert the cpu frequency info into a 'fake' MSR198h in Intel format
  513. + */
  514. +static uint64_t
  515. +getFakeMSR(uint64_t frequency, uint64_t bFreq) {
  516. +   uint64_t fakeMSR = 0ull;
  517. +   uint64_t multi = 0;
  518. +  
  519. +   if (frequency == 0 || bFreq == 0)
  520. +       return 0;
  521. +  
  522. +   multi = frequency / (bFreq / 1000); // = multi*1000
  523. +   // divide by 1000, rounding up if it was x.75 or more
  524. +   // Example: 12900 will get rounded to 13150/1000 = 13
  525. +   //          but 12480 will be 12730/1000 = 12
  526. +   fakeMSR = (multi + 250) / 1000;
  527. +   fakeMSR <<= 40; // push multiplier into bits 44 to 40
  528. +  
  529. +   // If fractional part was within (0.25, 0.75), set N/2
  530. +   if ((multi % 1000 > 250) && (multi % 1000 < 750))
  531. +       fakeMSR |= (1ull << 46);
  532. +
  533. +   return fakeMSR;
  534. +}
  535. +
  536. +int ForceAmdCpu = 0;
  537. +
  538. +/* Handy functions to check what platform we're on */
  539. +boolean_t IsAmdCPU(void) {
  540. +   if (ForceAmdCpu) return TRUE;
  541. +  
  542. +   uint32_t ourcpuid[4];
  543. +   do_cpuid(0, ourcpuid);
  544. +   if (ourcpuid[ebx] == 0x68747541 &&
  545. +       ourcpuid[ecx] == 0x444D4163 &&
  546. +       ourcpuid[edx] == 0x69746E65)
  547. +       return TRUE;
  548. +   else
  549. +       return FALSE;
  550. +};
  551. +
  552. +boolean_t IsIntelCPU(void) {
  553. +   return !IsAmdCPU(); // dirty hack
  554. +}
  555. +
  556.  
  557.  /*
  558.   * Initialize the various conversion factors needed by code referencing
  559. @@ -192,22 +296,197 @@
  560.         if (busFreq == 0)
  561.             busFreq = BASE_NHM_CLOCK_SOURCE;
  562.  
  563. -       break;
  564.              }
  565. +       break;
  566.     default: {
  567. -       uint64_t    prfsts;
  568. +   /*
  569. +    * mercurysquad: The bus ratio is crucial to setting the proper rtc increment.
  570. +    * There are several methods so we first check any bootlfags. If none is specified, we choose
  571. +    * based on the CPU type.
  572. +    */
  573. +   uint64_t cpuFreq = 0, prfsts = 0, boot_arg = 0;
  574. +   busratio_path_t busRatioPath = BUSRATIO_AUTODETECT;
  575. +  
  576. +   if (PE_parse_boot_argn("busratiopath", &boot_arg, sizeof(boot_arg)))
  577. +       busRatioPath = (busratio_path_t) boot_arg;
  578. +   else
  579. +       busRatioPath = BUSRATIO_AUTODETECT;
  580. +  
  581. +   if (PE_parse_boot_argn("busratio", &tscGranularity, sizeof(tscGranularity)))
  582. +       busRatioPath = BUSRATIO_BOOTFLAG;
  583. +  
  584. +   if (busRatioPath == BUSRATIO_AUTODETECT) {
  585. +       /* This happens if no bootflag above was specified.
  586. +        * We'll choose based on CPU type */
  587. +       switch (cpuid_info()->cpuid_family) {
  588. +           case CPU_FAMILY_PENTIUM_4:
  589. +               /* This could be AMD Athlon or Intel P4 as both have family Fh */
  590. +               if (IsAmdCPU())
  591. +                   busRatioPath = BUSRATIO_ATHLON;
  592. +               else if (cpuid_info()->cpuid_model < 2 )
  593. +                   /* These models don't implement proper MSR 198h or 2Ch */
  594. +                   busRatioPath = BUSRATIO_TIMER;
  595. +               else if (cpuid_info()->cpuid_model == 2)
  596. +                   /* This model has an MSR we can use */
  597. +                   busRatioPath = BUSRATIO_PENTIUM4_MSR;
  598. +               else /* 3 or higher */
  599. +                   /* Other models should implement MSR 198h */
  600. +                   busRatioPath = BUSRATIO_INTEL_MSR;
  601. +               break;
  602. +           case CPU_FAMILY_PENTIUM_M:
  603. +               if (cpuid_info()->cpuid_model >= 0xD)
  604. +                   /* Pentium M or Core and above can use Apple method*/
  605. +                   busRatioPath = BUSRATIO_INTEL_MSR;
  606. +               else
  607. +                   /* Other Pentium class CPU, use safest option */
  608. +                   busRatioPath = BUSRATIO_TIMER;
  609. +               break;
  610. +           case CPU_FAMILY_AMD_PHENOM:
  611. +           case CPU_FAMILY_AMD_SHANGHAI:
  612. +               /* These have almost the same method, with a minor difference */
  613. +               busRatioPath = BUSRATIO_PHENOM_SHANGHAI;
  614. +               break;
  615. +           default:
  616. +               /* Fall back to safest method */
  617. +               busRatioPath = BUSRATIO_TIMER;
  618. +       };
  619. +   }
  620. +  
  621. +   /*
  622. +    * Now that we have elected a bus ratio path, we can proceed to calculate it.
  623. +    */
  624. +   printf("rtclock_init: Taking bus ratio path %d (%s)\n",
  625. +          busRatioPath, busRatioPathNames[busRatioPath]);
  626. +   switch (busRatioPath) {
  627. +       case BUSRATIO_BOOTFLAG:
  628. +           /* tscGranularity was already set. However, check for N/2. N/2 is specified by
  629. +            * giving a busratio of 10 times what it is (so last digit is 5). We set a cutoff
  630. +            * of 30 before deciding it's n/2. TODO: find a better way */
  631. +           if (tscGranularity == 0) tscGranularity = 1; // avoid div by zero
  632. +           N_by_2_bus_ratio = (tscGranularity > 30) && ((tscGranularity % 10) != 0);
  633. +           if (N_by_2_bus_ratio) tscGranularity /= 10; /* Scale it back to normal */
  634. +           break;
  635. +#ifndef __i386__ //AnV: in case of x86_64 boot default for busratio timer to EFI value
  636. +       case BUSRATIO_TIMER:
  637. +#endif
  638. +       case BUSRATIO_EFI:
  639. +           /* This uses the CPU frequency exported into EFI by the bootloader */
  640. +           cpuFreq = EFI_CPU_Frequency();
  641. +           prfsts  = getFakeMSR(cpuFreq, busFreq);
  642. +           tscGranularity = (uint32_t)bitfield(prfsts, 44, 40);
  643. +           N_by_2_bus_ratio = prfsts & bit(46);
  644. +           break;
  645. +       case BUSRATIO_INTEL_MSR:
  646. +           /* This will read the performance status MSR on intel systems (Apple method) */
  647. +           prfsts = rdmsr64(IA32_PERF_STS);
  648. +           tscGranularity  = (uint32_t)bitfield(prfsts, 44, 40);
  649. +           N_by_2_bus_ratio= prfsts & bit(46);
  650. +           break;
  651. +       case BUSRATIO_ATHLON:
  652. +           /* Athlons specify the bus ratio directly in an MSR using a simple formula */
  653. +           prfsts      = rdmsr64(AMD_PERF_STS);
  654. +           tscGranularity  = 4 + bitfield(prfsts, 5, 1);
  655. +           N_by_2_bus_ratio= prfsts & bit(0); /* FIXME: This is experimental! */
  656. +           break;
  657. +       case BUSRATIO_PENTIUM4_MSR:
  658. +           prfsts      = rdmsr64(0x2C); // TODO: Add to header
  659. +           tscGranularity  = bitfield(prfsts, 31, 24);
  660. +           break;
  661. +       case BUSRATIO_PHENOM_SHANGHAI:
  662. +           /* Phenoms and Shanghai processors have a different MSR to read the frequency
  663. +            * multiplier and divisor, from which the cpu frequency can be calculated.
  664. +            * This can then be used to construct the fake MSR. */
  665. +           prfsts      = rdmsr64(AMD_COFVID_STS);
  666. +           printf("rtclock_init: Phenom MSR 0x%x returned: 0x%llx\n", AMD_COFVID_STS, prfsts);
  667. +           uint64_t cpuFid = bitfield(prfsts, 5, 0);
  668. +           uint64_t cpuDid = bitfield(prfsts, 8, 6);
  669. +           /* The base for Fid could be either 8 or 16 depending on the cpu family */
  670. +           if (cpuid_info()->cpuid_family == CPU_FAMILY_AMD_PHENOM)
  671. +               cpuFreq = (100 * Mega * (cpuFid + 0x10)) >> cpuDid;
  672. +           else /* shanghai */
  673. +               cpuFreq = (100 * Mega * (cpuFid + 0x08)) >> cpuDid;
  674. +           prfsts = getFakeMSR(cpuFreq, busFreq);
  675. +           tscGranularity = (uint32_t)bitfield(prfsts, 44, 40);
  676. +           N_by_2_bus_ratio = prfsts & bit(46);
  677. +           break;
  678. +#ifdef __i386__ //qoopz: no get_PIT2 for x86_64
  679. +       case BUSRATIO_TIMER:
  680. +           /* Fun fun fun. :-|  */
  681. +           cpuFreq = timeRDTSC() * 20;
  682. +           prfsts = getFakeMSR(cpuFreq, busFreq);
  683. +           tscGranularity = (uint32_t)bitfield(prfsts, 44, 40);
  684. +           N_by_2_bus_ratio = prfsts & bit(46);
  685. +           break;
  686. +#endif
  687. +       case BUSRATIO_AUTODETECT:
  688. +       default:
  689. +           kTscPanicOn = 1; /* see sanity check below */
  690. +   };
  691. +
  692. +#ifdef __i386__
  693. +   /* Verify */
  694. +   if (!PE_parse_boot_argn("-notscverify", &boot_arg, sizeof(boot_arg))) {
  695. +       uint64_t realCpuFreq = timeRDTSC() * 20;
  696. +       cpuFreq = tscGranularity * busFreq;
  697. +       if (N_by_2_bus_ratio) cpuFreq += (busFreq / 2);
  698. +       uint64_t difference = 0;
  699. +       if (realCpuFreq > cpuFreq)
  700. +           difference = realCpuFreq - cpuFreq;
  701. +       else
  702. +           difference = cpuFreq - realCpuFreq;
  703. +      
  704. +       if (difference >= 4*Mega) {
  705. +           // Shouldn't have more than 4MHz difference. This is about 2-3% of most FSBs.
  706. +           // Fall back to using measured speed and correct the busFreq
  707. +           // Note that the tscGran was read from CPU so should be correct.
  708. +           // Only on Phenom the tscGran is calculated by dividing by busFreq.
  709. +           printf("TSC: Reported FSB: %4d.%04dMHz, ", (uint32_t)(busFreq / Mega), (uint32_t)(busFreq % Mega));
  710. +           if (N_by_2_bus_ratio)
  711. +               busFreq = (realCpuFreq * 2) / (1 + 2*tscGranularity);
  712. +           else
  713. +               busFreq = realCpuFreq / tscGranularity;
  714. +           printf("corrected FSB: %4d.%04dMHz\n", (uint32_t)(busFreq / Mega), (uint32_t)(busFreq % Mega));
  715. +           // Reset the busCvt factors
  716. +           busFCvtt2n = ((1 * Giga) << 32) / busFreq;
  717. +           busFCvtn2t = 0xFFFFFFFFFFFFFFFFULL / busFCvtt2n;
  718. +           busFCvtInt = tmrCvt(1 * Peta, 0xFFFFFFFFFFFFFFFFULL / busFreq);
  719. +           printf("TSC: Verification of clock speed failed. "
  720. +                  "Fallback correction was performed. Please upgrade bootloader.\n");
  721. +       } else {
  722. +           printf("TSC: Verification of clock speed PASSED.\n");
  723. +       }
  724. +   }
  725. +#else
  726. +   printf("TSC: Verification of clock speed not available in x86_64.\n");
  727. +#endif
  728. +  
  729. +   /* Do a sanity check of the granularity */
  730. +   if ((tscGranularity == 0) ||
  731. +       (tscGranularity > 30) ||
  732. +       (busFreq < 50*Mega) ||
  733. +       (busFreq > 1*Giga) ||
  734. +       /* The following is useful to force a panic to print diagnostic info */
  735. +       PE_parse_boot_argn("-tscpanic", &boot_arg, sizeof(boot_arg)))
  736. +   {
  737. +       printf("\n\n");
  738. +       printf(" >>> The real-time clock was not properly initialized on your system!\n");
  739. +       printf("     Contact Voodoo Software for further information.\n");
  740. +       kTscPanicOn = 1; /* Later when the console is initialized, this will show up, and we'll halt */
  741. +       if (tscGranularity == 0) tscGranularity = 1; /* to avoid divide-by-zero in the following few lines */
  742. +   }
  743.  
  744. -       prfsts = rdmsr64(IA32_PERF_STS);
  745. -       tscGranularity = (uint32_t)bitfield(prfsts, 44, 40);
  746. -       N_by_2_bus_ratio = (prfsts & bit(46)) != 0;
  747.         }
  748. +       break;
  749.     }
  750.  
  751.     if (busFreq != 0) {
  752.         busFCvtt2n = ((1 * Giga) << 32) / busFreq;
  753.         busFCvtn2t = 0xFFFFFFFFFFFFFFFFULL / busFCvtt2n;
  754.     } else {
  755. -       panic("tsc_init: EFI not supported!\n");
  756. +       /* Instead of panicking, set a default FSB frequency */
  757. +       busFreq = 133*Mega;
  758. +       kprintf("rtclock_init: Setting fsb to %u MHz\n", (uint32_t) (busFreq/Mega));
  759. +  
  760.     }
  761.  
  762.     kprintf(" BUS: Frequency = %6d.%06dMHz, "
  763. diff -Naur -x BUILD -x .DS_Store -x '*.orig' -x '*.swp' -x '*.rej' xnu-2050.7.9/osfmk/i386/tsc.h xnu-2050.7.9-sinetek/osfmk/i386/tsc.h
  764. --- xnu-2050.7.9/osfmk/i386/tsc.h   2010-04-21 21:25:23.000000000 -0400
  765. +++ xnu-2050.7.9-sinetek/osfmk/i386/tsc.h   2012-12-31 10:39:54.000000000 -0500
  766. @@ -44,6 +44,11 @@
  767.  #define IA32_PERF_STS      0x198
  768.  #define    SLOW_TSC_THRESHOLD  1000067800  /* TSC is too slow for regular nanotime() algorithm */
  769.  
  770. +/* mercurysquad: MSRs for AMD support (getting bus ratio) */
  771. +#define AMD_PERF_STS   0xC0010042  /* AMD's version of the MSR */
  772. +#define AMD_PSTATE0_STS    0xC0010064  /* K10/phenom class AMD cpus */
  773. +#define AMD_COFVID_STS 0xC0010071  /* This might be a better MSR for K10? */
  774. +
  775.  #ifndef ASSEMBLER
  776.  extern uint64_t    busFCvtt2n;
  777.  extern uint64_t    busFCvtn2t;
  778. @@ -53,6 +58,7 @@
  779.  extern uint64_t tscGranularity;
  780.  extern uint64_t bus2tsc;
  781.  extern uint64_t busFreq;
  782. +extern uint32_t kTscPanicOn;
  783.  extern uint32_t    flex_ratio;
  784.  extern uint32_t    flex_ratio_min;
  785.  extern uint32_t    flex_ratio_max;
  786. diff -Naur -x BUILD -x .DS_Store -x '*.orig' -x '*.swp' -x '*.rej' xnu-2050.7.9/osfmk/kern/opemu.c xnu-2050.7.9-sinetek/osfmk/kern/opemu.c
  787. --- xnu-2050.7.9/osfmk/kern/opemu.c 1969-12-31 19:00:00.000000000 -0500
  788. +++ xnu-2050.7.9-sinetek/osfmk/kern/opemu.c 2012-12-31 10:38:11.000000000 -0500
  789. @@ -0,0 +1,742 @@
  790. +#include <stdint.h>
  791. +#include "opemu.h"
  792. +/*   ** SINETEK **
  793. +* This is an emulator for the SSSE3 instruction set.
  794. +* It is executed as a part of the XNU kernel as a trap.
  795. +*
  796. +* Information about SSSE3: there are 32 instructions.
  797. +* A few of these use MMX registers and are therefore just like XMM.
  798. +*/
  799. +
  800. +#ifndef TESTCASE
  801. +#include <kern/sched_prim.h>
  802. +#endif
  803. +
  804. +#if 0
  805. +#define printf(...)
  806. +#endif
  807. +
  808. +#ifndef TESTCASE
  809. +void print_buffer(uint8_t *buffer);
  810. +void print_buffer(uint8_t *buffer)
  811. +{
  812. +    int i;
  813. +    printf("DEBUG: emu: buffer data ");
  814. +    for(i = 0; i < 15; ++i) {
  815. +       kprintf("%02x ", buffer[i]);
  816. +    }
  817. +    kprintf("\n");
  818. +}
  819. +
  820. +void print_debug(uint8_t *buffer, x86_saved_state_t *saved_state);
  821. +void print_debug(uint8_t *buffer, x86_saved_state_t *saved_state)
  822. +{
  823. +    if(is_saved_state64(saved_state)) {
  824. +       x86_saved_state64_t     *regs;
  825. +       regs = saved_state64(saved_state);
  826. +       kprintf("DEBUG: emu64:  eip=%016llx\n", regs->isf.rip);
  827. +    } else {
  828. +       x86_saved_state32_t     *regs;
  829. +       regs = saved_state32(saved_state);
  830. +       kprintf("DEBUG: emu32:  eip=%08x\n", regs->eip);
  831. +    }
  832. +    print_buffer(buffer);
  833. +}
  834. +
  835. +void opemu_trap(
  836. +    x86_saved_state_t *saved_state)
  837. +{
  838. +    /* instructions are at most 15ish bytes
  839. +     * can we just dereference something instead?
  840. +     */
  841. +    uint8_t buffered_code[15];
  842. +
  843. +    if(is_saved_state64(saved_state)) {
  844. +       x86_saved_state64_t     *regs;
  845. +       regs = saved_state64(saved_state);
  846. +
  847. +   //kprintf("DEBUG: emu64:  eip=%016llx\n", regs->isf.rip);
  848. +       user_addr_t pc = regs->isf.rip;
  849. +       copyin(pc, (char*)buffered_code, 15);
  850. +          //print_buffer(buffered_code);          
  851. +       regs->isf.rip += opemu(buffered_code, saved_state);
  852. +   //kprintf("returning from opemu %eip=%08x\n", regs->isf.rip);
  853. +    } else {
  854. +       x86_saved_state32_t     *regs;
  855. +       regs = saved_state32(saved_state);
  856. +
  857. +       //kprintf("DEBUG: emu32:  eip=%08x\n", regs->eip);
  858. +       user_addr_t pc = regs->eip;
  859. +       copyin(pc, (char*)buffered_code, 15);
  860. +          //print_buffer(buffered_code);          
  861. +       int opsize = (int) opemu(buffered_code, saved_state);          
  862. +       regs->eip += opsize;
  863. +   //kprintf("returning from opemu %eip=%08x\n", regs->eip);
  864. +    }
  865. +
  866. +    thread_exception_return();
  867. +    /* NOTREACHED */
  868. +}
  869. +
  870. +// forward declaration for sysenter handler of mach;
  871. +void mach_call_munger(x86_saved_state_t *state);
  872. +void unix_syscall(x86_saved_state_t *);
  873. +uint64_t opemu(uint8_t *code, x86_saved_state_t *saved_state)
  874. +{
  875. +    // size of the instruction in bytes. will serve for adjusting
  876. +    // the return address;
  877. +    int ins_size = 1;
  878. +    XMM Xsrc, Xdst, Xres;
  879. +    MM Msrc, Mdst, Mres;
  880. +
  881. +    if(code[0] == 0x0F && code[1] == 0x34) {
  882. +       // sysenter, TODO remove redundancy regs load
  883. +       //   edx        return address
  884. +       //   ecx        return stack
  885. +       x86_saved_state32_t     *regs;
  886. +       regs = saved_state32(saved_state);
  887. +       regs->eip = regs->edx;
  888. +       regs->uesp = regs->ecx;
  889. +
  890. +       if((signed int)regs->eax < 0) {
  891. +       //      printf("mach call\n");
  892. +           mach_call_munger(saved_state);
  893. +       } else {
  894. +       //      printf("unix call\n");
  895. +           unix_syscall(saved_state);
  896. +       }
  897. +       /* NEVER REACHES */
  898. +    } else if(code[0] == 0xFF) {
  899. +       // Instruction 0xFFFF, used as a debugging aid. (2-byte NOP)
  900. +       if(code[1] != 0xFF) goto invalid;
  901. +       ins_size = 2;
  902. +    } else if(
  903. +       (code[0] == 0x66 && code[1] == 0x0F && code[2] == 0x38) ||
  904. +       ((code[0] == 0x66 && code[2] == 0x0F && code[3] == 0x38)) ) {
  905. +       // Instruction would be of type XMM (128-bit).
  906. +       XMM *src, *dst, *rs;
  907. +       unsigned int NSRC, NDST;
  908. +       src = &Xsrc;
  909. +       dst = &Xdst;
  910. +       rs = &Xres;
  911. +       ins_size = 5;
  912. +       uint8_t opcode = code[3];
  913. +
  914. +       /* In long mode, there is a possible 0x40->0x4f prefix
  915. +        * used to handle the higher xmm registers.
  916. +        */
  917. +      
  918. +       InterpretSSSE3Operands(&code[4], &NSRC, &NDST);
  919. +       if(code[1] & 0x40) {
  920. +           if(code[1] & 0x44) NDST += 8;
  921. +           if(code[1] & 0x41) NSRC += 8;
  922. +           opcode = code[4];
  923. +           ins_size += 1;
  924. +       }
  925. +       getxmm(src, NSRC);
  926. +       getxmm(dst, NDST);
  927. +
  928. +       switch(opcode) {
  929. +       case 0x00: pshufb128(src,dst,rs); break;
  930. +       case 0x01: phaddw128(src,dst,rs); break;
  931. +       case 0x02: phaddd128(src,dst,rs); break;
  932. +       case 0x03: phaddsw128(src,dst,rs); break;
  933. +       case 0x04: pmaddubsw128(src,dst,rs); break;
  934. +       case 0x05: phsubw128(src,dst,rs); break;
  935. +       case 0x06: phsubd128(src,dst,rs); break;
  936. +       case 0x07: phsubsw128(src,dst,rs); break;
  937. +       case 0x08: psignb128(src,dst,rs); break;
  938. +       case 0x09: psignw128(src,dst,rs); break;
  939. +       case 0x0A: psignd128(src,dst,rs); break;
  940. +       case 0x0B: pmulhrsw128(src,dst,rs); break;
  941. +       case 0x1C: pabsb128(src,rs); break;
  942. +       case 0x1D: pabsw128(src,rs); break;
  943. +       case 0x1E: pabsd128(src,rs); break;
  944. +       default: goto invalid; break;
  945. +       }
  946. +       movxmm(rs, NDST);
  947. +
  948. +    } else if(code[0] == 0x0F && code[1] == 0x38) {
  949. +       // Instruction would be of type MMX (64-bit).
  950. +       MM *src, *dst, *rs;
  951. +       unsigned int NSRC, NDST; // reg 0 to 7 possible.
  952. +       src = &Msrc;
  953. +       dst = &Mdst;
  954. +       rs = &Mres;
  955. +       ins_size = 4;
  956. +
  957. +       InterpretSSSE3Operands(&code[3], &NSRC, &NDST);
  958. +       getmm(src, NSRC);
  959. +       getmm(dst, NDST);
  960. +
  961. +       switch(code[2]) {
  962. +       case 0x00: pshufb64(src,dst,rs); break;
  963. +       case 0x01: phaddw64(src,dst,rs); break;
  964. +       case 0x02: phaddd64(src,dst,rs); break;
  965. +       case 0x03: phaddsw64(src,dst,rs); break;
  966. +       case 0x04: pmaddubsw64(src,dst,rs); break;
  967. +       case 0x05: phsubw64(src,dst,rs); break;
  968. +       case 0x06: phsubd64(src,dst,rs); break;
  969. +       case 0x07: phsubsw64(src,dst,rs); break;
  970. +       case 0x08: psignb64(src,dst,rs); break;
  971. +       case 0x09: psignw64(src,dst,rs); break;
  972. +       case 0x0A: psignd64(src,dst,rs); break;
  973. +       case 0x0B: pmulhrsw64(src,dst,rs); break;
  974. +       case 0x1C: pabsb64(src,rs); break;
  975. +       case 0x1D: pabsw64(src,rs); break;
  976. +       case 0x1E: pabsd64(src,rs); break;
  977. +       default: goto invalid; break;
  978. +       }
  979. +       movmm(rs, NDST);
  980. +
  981. +    } else if(code[0] == 0x0F && code[1] == 0x3A &&
  982. +         code[2] == 0x0F) {
  983. +       // Not groupable with the other ones. (64-bit).
  984. +       MM *src, *dst, *rs;
  985. +       unsigned int NSRC, NDST; // reg 0 to 7 possible.
  986. +       src = &Msrc;
  987. +       dst = &Mdst;
  988. +       rs = &Mres;
  989. +       ins_size = 5;
  990. +
  991. +       InterpretSSSE3Operands(&code[3], &NSRC, &NDST);
  992. +       getmm(src, NSRC);
  993. +       getmm(dst, NDST);              
  994. +       palignr64(src,dst,rs,code[4]);
  995. +       movmm(rs, NDST);
  996. +
  997. +    } else if(
  998. +   (code[0] == 0x66 && code[1] == 0x0F &&
  999. +   code[2] == 0x3A && code[3] == 0x0F) ||
  1000. +   ((code[0] == 0x66 && code[2] == 0x0F &&
  1001. +   code[3] == 0x3A && code[4] == 0x0F) )
  1002. +   ) {
  1003. +       // Not groupable with the other ones. (128-bit).
  1004. +       XMM *src, *dst, *rs;
  1005. +       unsigned int NSRC, NDST;
  1006. +       src = &Xsrc;
  1007. +       dst = &Xdst;
  1008. +       rs = &Xres;
  1009. +       ins_size = 6;
  1010. +       uint8_t operand = code[5];
  1011. +       uint8_t modrm = code[4];
  1012. +
  1013. +       /* In long mode, there is a possible 0x40->0x4f prefix
  1014. +        * used to handle the higher xmm registers.
  1015. +        */
  1016. +      
  1017. +       InterpretSSSE3Operands(&code[4], &NSRC, &NDST);
  1018. +       if(code[1] & 0x40) {
  1019. +           if(code[1] & 0x44) NDST += 8;
  1020. +           if(code[1] & 0x41) NSRC += 8;
  1021. +           modrm = code[5];
  1022. +           operand = code[6];
  1023. +           ins_size += 1;
  1024. +       }
  1025. +
  1026. +       InterpretSSSE3Operands(&modrm, &NSRC, &NDST);
  1027. +       getxmm(src, NSRC);
  1028. +       getxmm(dst, NDST);
  1029. +       palignr128(src,dst,rs,operand);
  1030. +       movxmm(rs, NDST);
  1031. +
  1032. +    } else {
  1033. +invalid:
  1034. +       // Invalid opcode, report it
  1035. +   kprintf("EMU: invop\n");
  1036. +   printf("EMU: invop\n");
  1037. +       print_debug(code, saved_state);
  1038. +    }
  1039. +
  1040. +
  1041. +    return ins_size;
  1042. +}
  1043. +#endif
  1044. +
  1045. +/** interpret ModRM byte
  1046. +* [2:0] -- source operand
  1047. +* [5:4] -- destination operand
  1048. +*/
  1049. +inline void InterpretSSSE3Operands(uint8_t *ModRM, unsigned int *src,
  1050. +    unsigned int *dst)
  1051. +{
  1052. +    *src = *ModRM & 0x7;
  1053. +    *dst = (*ModRM >> 3) & 0x7;
  1054. +}
  1055. +
  1056. +/* get value from the xmm register i */
  1057. +inline void getxmm(XMM *v, unsigned int i)
  1058. +{
  1059. +    switch(i) {
  1060. +    case 0:
  1061. +      asm __volatile__ ("movdqu %%xmm0, %0" : "=m" (*v->a8));
  1062. +    break;
  1063. +    case 1:
  1064. +      asm __volatile__ ("movdqu %%xmm1, %0" : "=m" (*v->a8));
  1065. +    break;
  1066. +    case 2:
  1067. +      asm __volatile__ ("movdqu %%xmm2, %0" : "=m" (*v->a8));
  1068. +    break;
  1069. +    case 3:
  1070. +      asm __volatile__ ("movdqu %%xmm3, %0" : "=m" (*v->a8));
  1071. +    break;
  1072. +    case 4:
  1073. +      asm __volatile__ ("movdqu %%xmm4, %0" : "=m" (*v->a8));
  1074. +    break;
  1075. +    case 5:
  1076. +      asm __volatile__ ("movdqu %%xmm5, %0" : "=m" (*v->a8));
  1077. +    break;
  1078. +    case 6:
  1079. +      asm __volatile__ ("movdqu %%xmm6, %0" : "=m" (*v->a8));
  1080. +    break;
  1081. +    case 7:
  1082. +      asm __volatile__ ("movdqu %%xmm7, %0" : "=m" (*v->a8));
  1083. +    break;
  1084. +    case 8:
  1085. +      asm __volatile__ ("movdqu %%xmm8, %0" : "=m" (*v->a8));
  1086. +    break;
  1087. +    case 9:
  1088. +      asm __volatile__ ("movdqu %%xmm9, %0" : "=m" (*v->a8));
  1089. +    break;
  1090. +    case 10:
  1091. +      asm __volatile__ ("movdqu %%xmm10, %0" : "=m" (*v->a8));
  1092. +    break;
  1093. +    case 11:
  1094. +      asm __volatile__ ("movdqu %%xmm11, %0" : "=m" (*v->a8));
  1095. +    break;
  1096. +    case 12:
  1097. +      asm __volatile__ ("movdqu %%xmm12, %0" : "=m" (*v->a8));
  1098. +    break;
  1099. +    case 13:
  1100. +      asm __volatile__ ("movdqu %%xmm13, %0" : "=m" (*v->a8));
  1101. +    break;
  1102. +    case 14:
  1103. +      asm __volatile__ ("movdqu %%xmm14, %0" : "=m" (*v->a8));
  1104. +    break;
  1105. +    case 15:
  1106. +      asm __volatile__ ("movdqu %%xmm15, %0" : "=m" (*v->a8));
  1107. +    break;
  1108. +    }
  1109. +}
  1110. +
  1111. +/* get value from the mm register i  */
  1112. +inline void getmm(MM *v, unsigned int i)
  1113. +{
  1114. +    switch(i) {
  1115. +    case 0:
  1116. +      asm __volatile__ ("movq %%mm0, %0" : "=m" (*v->a8));
  1117. +    break;
  1118. +    case 1:
  1119. +      asm __volatile__ ("movq %%mm1, %0" : "=m" (*v->a8));
  1120. +    break;
  1121. +    case 2:
  1122. +      asm __volatile__ ("movq %%mm2, %0" : "=m" (*v->a8));
  1123. +    break;
  1124. +    case 3:
  1125. +      asm __volatile__ ("movq %%mm3, %0" : "=m" (*v->a8));
  1126. +    break;
  1127. +    case 4:
  1128. +      asm __volatile__ ("movq %%mm4, %0" : "=m" (*v->a8));
  1129. +    break;
  1130. +    case 5:
  1131. +      asm __volatile__ ("movq %%mm5, %0" : "=m" (*v->a8));
  1132. +    break;
  1133. +    case 6:
  1134. +      asm __volatile__ ("movq %%mm6, %0" : "=m" (*v->a8));
  1135. +    break;
  1136. +    case 7:
  1137. +      asm __volatile__ ("movq %%mm7, %0" : "=m" (*v->a8));
  1138. +    break;
  1139. +    }
  1140. +}
  1141. +
  1142. +/* move value over to xmm register i */
  1143. +inline void movxmm(XMM *v, unsigned int i)
  1144. +{
  1145. +    switch(i) {
  1146. +    case 0:
  1147. +      asm __volatile__ ("movdqu %0, %%xmm0" :: "m" (*v->a8) );
  1148. +    break;
  1149. +    case 1:
  1150. +      asm __volatile__ ("movdqu %0, %%xmm1" :: "m" (*v->a8) );
  1151. +    break;
  1152. +    case 2:
  1153. +      asm __volatile__ ("movdqu %0, %%xmm2" :: "m" (*v->a8) );
  1154. +    break;
  1155. +    case 3:
  1156. +      asm __volatile__ ("movdqu %0, %%xmm3" :: "m" (*v->a8) );
  1157. +    break;
  1158. +    case 4:
  1159. +      asm __volatile__ ("movdqu %0, %%xmm4" :: "m" (*v->a8) );
  1160. +    break;
  1161. +    case 5:
  1162. +      asm __volatile__ ("movdqu %0, %%xmm5" :: "m" (*v->a8) );
  1163. +    break;
  1164. +    case 6:
  1165. +      asm __volatile__ ("movdqu %0, %%xmm6" :: "m" (*v->a8) );
  1166. +    break;
  1167. +    case 7:
  1168. +      asm __volatile__ ("movdqu %0, %%xmm7" :: "m" (*v->a8) );
  1169. +    break;
  1170. +    case 8:
  1171. +      asm __volatile__ ("movdqu %0, %%xmm8" :: "m" (*v->a8) );
  1172. +    break;
  1173. +    case 9:
  1174. +      asm __volatile__ ("movdqu %0, %%xmm9" :: "m" (*v->a8) );
  1175. +    break;
  1176. +    case 10:
  1177. +      asm __volatile__ ("movdqu %0, %%xmm10" :: "m" (*v->a8) );
  1178. +    break;
  1179. +    case 11:
  1180. +      asm __volatile__ ("movdqu %0, %%xmm11" :: "m" (*v->a8) );
  1181. +    break;
  1182. +    case 12:
  1183. +      asm __volatile__ ("movdqu %0, %%xmm12" :: "m" (*v->a8) );
  1184. +    break;
  1185. +    case 13:
  1186. +      asm __volatile__ ("movdqu %0, %%xmm13" :: "m" (*v->a8) );
  1187. +    break;
  1188. +    case 14:
  1189. +      asm __volatile__ ("movdqu %0, %%xmm14" :: "m" (*v->a8) );
  1190. +    break;
  1191. +    case 15:
  1192. +      asm __volatile__ ("movdqu %0, %%xmm15" :: "m" (*v->a8) );
  1193. +    break;
  1194. +    }
  1195. +}
  1196. +
  1197. +/* move value over to mm register i */
  1198. +inline void movmm(MM *v, unsigned int i)
  1199. +{
  1200. +    switch(i) {
  1201. +    case 0:
  1202. +      asm __volatile__ ("movq %0, %%mm0" :: "m" (*v->a8) );
  1203. +    break;
  1204. +    case 1:
  1205. +      asm __volatile__ ("movq %0, %%mm1" :: "m" (*v->a8) );
  1206. +    break;
  1207. +    case 2:
  1208. +      asm __volatile__ ("movq %0, %%mm2" :: "m" (*v->a8) );
  1209. +    break;
  1210. +    case 3:
  1211. +      asm __volatile__ ("movq %0, %%mm3" :: "m" (*v->a8) );
  1212. +    break;
  1213. +    case 4:
  1214. +      asm __volatile__ ("movq %0, %%mm4" :: "m" (*v->a8) );
  1215. +    break;
  1216. +    case 5:
  1217. +      asm __volatile__ ("movq %0, %%mm5" :: "m" (*v->a8) );
  1218. +    break;
  1219. +    case 6:
  1220. +      asm __volatile__ ("movq %0, %%mm6" :: "m" (*v->a8) );
  1221. +    break;
  1222. +    case 7:
  1223. +      asm __volatile__ ("movq %0, %%mm7" :: "m" (*v->a8) );
  1224. +    break;
  1225. +    }
  1226. +}
  1227. +
  1228. +/***************************************/
  1229. +/** SSSE3 instructions implementation **/
  1230. +/***************************************/
  1231. +
  1232. +#define SATSW(x) ((x > 32767)? 32767 : ((x < -32768)? -32768 : x) )
  1233. +
  1234. +
  1235. +
  1236. +/** complex byte shuffle **/
  1237. +void pshufb128(XMM *src, XMM *dst, XMM *res)
  1238. +{
  1239. +    int i;
  1240. +    for(i = 0; i < 16; ++i)
  1241. +       res->a8[i] = (src->a8[i] < 0) ? 0 :
  1242. +           dst->a8[src->a8[i] & 0xF];
  1243. +}
  1244. +
  1245. +void pshufb64(MM *src, MM *dst, MM *res)
  1246. +{
  1247. +    int i;
  1248. +    for(i = 0; i < 8; ++i)
  1249. +       res->a8[i] = (src->a8[i] < 0) ? 0 :
  1250. +           dst->a8[src->a8[i] & 0x7];
  1251. +}
  1252. +
  1253. +/** packed horizontal add word **/
  1254. +void phaddw128(XMM *src, XMM *dst, XMM *res)
  1255. +{
  1256. +    int i;
  1257. +    for(i = 0; i < 4; ++i)
  1258. +       res->a16[i] = dst->a16[2*i] + dst->a16[2*i+1];
  1259. +    for(i = 0; i < 4; ++i)
  1260. +       res->a16[i+4] = src->a16[2*i] + src->a16[2*i+1];
  1261. +}
  1262. +
  1263. +void phaddw64(MM *src, MM *dst, MM *res)
  1264. +{
  1265. +    res->a16[0] = dst->a16[0] + dst->a16[1];
  1266. +    res->a16[1] = dst->a16[2] + dst->a16[3];
  1267. +    res->a16[2] = src->a16[0] + src->a16[1];
  1268. +    res->a16[3] = src->a16[2] + src->a16[3];
  1269. +}
  1270. +
  1271. +/** packed horizontal add double **/
  1272. +void phaddd128(XMM *src, XMM *dst, XMM *res)
  1273. +{
  1274. +    int i;
  1275. +    for(i = 0; i < 2; ++i) {
  1276. +       res->a32[i  ] = dst->a32[2*i] + dst->a32[2*i+1];
  1277. +    }
  1278. +    for(i = 0; i < 2; ++i)
  1279. +       res->a32[i+2] = src->a32[2*i] + src->a32[2*i+1];
  1280. +}
  1281. +
  1282. +void phaddd64(MM *src, MM *dst, MM *res)
  1283. +{
  1284. +    res->a32[0] = dst->a32[0] + dst->a32[1];
  1285. +    res->a32[1] = src->a32[0] + src->a32[1];
  1286. +}
  1287. +
  1288. +/** packed horizontal add and saturate word **/
  1289. +void phaddsw128(XMM *src, XMM *dst, XMM *res)
  1290. +{
  1291. +    int i;
  1292. +    for(i = 0; i < 4; ++i)
  1293. +       res->a16[i] = SATSW( dst->a16[2*i] + dst->a16[2*i+1] );
  1294. +    for(i = 0; i < 4; ++i)
  1295. +       res->a16[i+4] = SATSW( src->a16[2*i] + src->a16[2*i+1] );
  1296. +}
  1297. +
  1298. +void phaddsw64(MM *src, MM *dst, MM *res)
  1299. +{
  1300. +    res->a16[0] = SATSW( dst->a16[0] + dst->a16[1] );
  1301. +    res->a16[1] = SATSW( dst->a16[2] + dst->a16[3] );
  1302. +    res->a16[2] = SATSW( src->a16[0] + src->a16[1] );
  1303. +    res->a16[3] = SATSW( src->a16[2] + src->a16[3] );
  1304. +}
  1305. +
  1306. +/** multiply and add packed signed and unsigned bytes **/
  1307. +void pmaddubsw128(XMM *src, XMM *dst, XMM *res)
  1308. +{
  1309. +    int i;
  1310. +    int16_t tmp[16];
  1311. +    for(i=0; i<16; ++i) {
  1312. +       tmp[i] = src->a8[i] * dst->ua8[i];
  1313. +    }
  1314. +    for(i=0; i<8; ++i) {
  1315. +       res->a16[i] = SATSW( tmp[2*i] + tmp[2*i+1] );
  1316. +    }
  1317. +}
  1318. +
  1319. +void pmaddubsw64(MM *src, MM *dst, MM *res)
  1320. +{
  1321. +    int i;
  1322. +    int16_t tmp[8];
  1323. +    for(i=0; i<8; ++i) {
  1324. +       tmp[i] = src->a8[i] * dst->ua8[i];
  1325. +    }
  1326. +    for(i=0; i<4; ++i) {
  1327. +       res->a16[i] = SATSW( tmp[2*i] + tmp[2*i+1] );
  1328. +    }
  1329. +}
  1330. +
  1331. +/** packed horizontal subtract word **/
  1332. +void phsubw128(XMM *src, XMM *dst, XMM *res)
  1333. +{
  1334. +    int i;
  1335. +    for(i = 0; i < 4; ++i)
  1336. +       res->a16[i] = dst->a16[2*i] - dst->a16[2*i+1];
  1337. +    for(i = 0; i < 4; ++i)
  1338. +       res->a16[i+4] = src->a16[2*i] - src->a16[2*i+1];
  1339. +}
  1340. +
  1341. +void phsubw64(MM *src, MM *dst, MM *res)
  1342. +{
  1343. +    res->a16[0] = dst->a16[0] - dst->a16[1];
  1344. +    res->a16[1] = dst->a16[2] - dst->a16[3];
  1345. +    res->a16[2] = src->a16[0] - src->a16[1];
  1346. +    res->a16[3] = src->a16[2] - src->a16[3];
  1347. +}
  1348. +
  1349. +/** packed horizontal subtract double **/
  1350. +void phsubd128(XMM *src, XMM *dst, XMM *res)
  1351. +{
  1352. +    int i;
  1353. +    for(i = 0; i < 2; ++i)
  1354. +       res->a32[i  ] = dst->a32[2*i] - dst->a32[2*i+1];
  1355. +    for(i = 0; i < 2; ++i)
  1356. +       res->a32[i+2] = src->a32[2*i] - src->a32[2*i+1];
  1357. +}
  1358. +
  1359. +void phsubd64(MM *src, MM *dst, MM *res)
  1360. +{
  1361. +    res->a32[0] = dst->a32[0] - dst->a32[1];
  1362. +    res->a32[1] = src->a32[0] - src->a32[1];
  1363. +}
  1364. +
  1365. +/** packed horizontal subtract and saturate word **/
  1366. +void phsubsw128(XMM *src, XMM *dst, XMM *res)
  1367. +{
  1368. +    int i;
  1369. +    for(i = 0; i < 4; ++i)
  1370. +       res->a16[i] = SATSW( dst->a16[2*i] - dst->a16[2*i+1] );
  1371. +    for(i = 0; i < 4; ++i)
  1372. +       res->a16[i+4] = SATSW( src->a16[2*i] - src->a16[2*i+1] );
  1373. +}
  1374. +
  1375. +void phsubsw64(MM *src, MM *dst, MM *res)
  1376. +{
  1377. +    res->a16[0] = SATSW( dst->a16[0] - dst->a16[1] );
  1378. +    res->a16[1] = SATSW( dst->a16[2] - dst->a16[3] );
  1379. +    res->a16[2] = SATSW( src->a16[0] - src->a16[1] );
  1380. +    res->a16[3] = SATSW( src->a16[2] - src->a16[3] );
  1381. +}
  1382. +
  1383. +/** packed sign byte **/
  1384. +void psignb128(XMM *src, XMM *dst, XMM *res)
  1385. +{
  1386. +    int i;
  1387. +    for(i = 0; i < 16; ++i) {
  1388. +       if(src->a8[i] < 0) res->a8[i] = -dst->a8[i];
  1389. +       else if(src->a8[i] == 0) res->a8[i] = 0;
  1390. +       else res->a8[i] = dst->a8[i];
  1391. +    }
  1392. +}
  1393. +
  1394. +void psignb64(MM *src, MM *dst, MM *res)
  1395. +{
  1396. +    int i;
  1397. +    for(i = 0; i < 8; ++i) {
  1398. +       if(src->a8[i] < 0) res->a8[i] = -dst->a8[i];
  1399. +       else if(src->a8[i] == 0) res->a8[i] = 0;
  1400. +       else res->a8[i] = dst->a8[i];
  1401. +    }
  1402. +}
  1403. +
  1404. +/** packed sign word **/
  1405. +void psignw128(XMM *src, XMM *dst, XMM *res)
  1406. +{
  1407. +    int i;
  1408. +    for(i = 0; i < 8; ++i) {
  1409. +       if(src->a16[i] < 0) res->a16[i] = -dst->a16[i];
  1410. +       else if(src->a16[i] == 0) res->a16[i] = 0;
  1411. +       else res->a16[i] = dst->a16[i];
  1412. +    }
  1413. +}
  1414. +
  1415. +void psignw64(MM *src, MM *dst, MM *res)
  1416. +{
  1417. +    int i;
  1418. +    for(i = 0; i < 4; ++i) {
  1419. +       if(src->a16[i] < 0) res->a16[i] = -dst->a16[i];
  1420. +       else if(src->a16[i] == 0) res->a16[i] = 0;
  1421. +       else res->a16[i] = dst->a16[i];
  1422. +    }
  1423. +}
  1424. +
  1425. +/** packed sign double **/
  1426. +void psignd128(XMM *src, XMM *dst, XMM *res)
  1427. +{
  1428. +    int i;
  1429. +    for(i = 0; i < 4; ++i) {
  1430. +       if(src->a32[i] < 0) res->a32[i] = -dst->a32[i];
  1431. +       else if(src->a32[i] == 0) res->a32[i] = 0;
  1432. +       else res->a32[i] = dst->a32[i];
  1433. +    }
  1434. +}
  1435. +
  1436. +void psignd64(MM *src, MM *dst, MM *res)
  1437. +{
  1438. +    int i;
  1439. +    for(i = 0; i < 2; ++i) {
  1440. +       if(src->a32[i] < 0) res->a32[i] = -dst->a32[i];
  1441. +       else if(src->a32[i] == 0) res->a32[i] = 0;
  1442. +       else res->a32[i] = dst->a32[i];
  1443. +    }
  1444. +}
  1445. +
  1446. +/** packed multiply high with round and scale word **/
  1447. +void pmulhrsw128(XMM *src, XMM *dst, XMM *res)
  1448. +{
  1449. +    int i;
  1450. +    for(i = 0; i < 8; ++i)
  1451. +       res->a16[i] = (((dst->a16[i] * src->a16[i] >> 14) + 1) >> 1);
  1452. +}
  1453. +
  1454. +void pmulhrsw64(MM *src, MM *dst, MM *res)
  1455. +{
  1456. +    int i;
  1457. +    for(i = 0; i < 4; ++i)
  1458. +       res->a16[i] = (((dst->a16[i] * src->a16[i] >> 14) + 1) >> 1);
  1459. +}
  1460. +
  1461. +/** packed absolute value byte **/
  1462. +void pabsb128(XMM *src, XMM *res)
  1463. +{
  1464. +    int i;
  1465. +    for(i = 0; i < 16; ++i)
  1466. +       if(src->a8[i] < 0) res->a8[i] = -src->a8[i];
  1467. +       else res->a8[i] = src->a8[i];
  1468. +}
  1469. +
  1470. +void pabsb64(MM *src, MM *res)
  1471. +{
  1472. +    int i;
  1473. +    for(i = 0; i < 8; ++i)
  1474. +       if(src->a8[i] < 0) res->a8[i] = -src->a8[i];
  1475. +       else res->a8[i] = src->a8[i];
  1476. +}
  1477. +
  1478. +/** packed absolute value word **/
  1479. +void pabsw128(XMM *src, XMM *res)
  1480. +{
  1481. +    int i;
  1482. +    for(i = 0; i < 8; ++i)
  1483. +       if(src->a16[i] < 0) res->a16[i] = -src->a16[i];
  1484. +       else res->a16[i] = src->a16[i];
  1485. +}
  1486. +
  1487. +void pabsw64(MM *src, MM *res)
  1488. +{
  1489. +    int i;
  1490. +    for(i = 0; i < 4; ++i)
  1491. +       if(src->a16[i] < 0) res->a16[i] = -src->a16[i];
  1492. +       else res->a16[i] = src->a16[i];
  1493. +}
  1494. +
  1495. +/** packed absolute value double **/
  1496. +void pabsd128(XMM *src, XMM *res)
  1497. +{
  1498. +    int i;
  1499. +    for(i = 0; i < 4; ++i)
  1500. +       if(src->a32[i] < 0) res->a32[i] = -src->a32[i];
  1501. +       else res->a32[i] = src->a32[i];
  1502. +}
  1503. +
  1504. +void pabsd64(MM *src, MM *res)
  1505. +{
  1506. +    int i;
  1507. +    for(i = 0; i < 2; ++i)
  1508. +       if(src->a32[i] < 0) res->a32[i] = -src->a32[i];
  1509. +       else res->a32[i] = src->a32[i];
  1510. +}
  1511. +
  1512. +/** packed align right **/
  1513. +void palignr128(XMM *src, XMM *dst, XMM *res, uint8_t IMM)
  1514. +{
  1515. +   int n = IMM * 8;
  1516. +   __uint128_t low, high;
  1517. +   low = src->ua128;
  1518. +   high = dst->ua128;
  1519. +   res->ua128 = (low >> n) + (high << (128-n));
  1520. +}
  1521. +
  1522. +void palignr64(MM *src, MM *dst, MM *res, uint8_t IMM)
  1523. +{
  1524. +   int n = IMM * 8;
  1525. +   __uint128_t t;
  1526. +   t = src->ua64 | ((__uint128_t)dst->ua64 << 64);
  1527. +   t >>= n;
  1528. +   res->ua64 = t;
  1529. +}
  1530. +
  1531. +
  1532. diff -Naur -x BUILD -x .DS_Store -x '*.orig' -x '*.swp' -x '*.rej' xnu-2050.7.9/osfmk/kern/opemu.h xnu-2050.7.9-sinetek/osfmk/kern/opemu.h
  1533. --- xnu-2050.7.9/osfmk/kern/opemu.h 1969-12-31 19:00:00.000000000 -0500
  1534. +++ xnu-2050.7.9-sinetek/osfmk/kern/opemu.h 2012-12-31 10:38:11.000000000 -0500
  1535. @@ -0,0 +1,81 @@
  1536. +#ifndef OPEMU_H
  1537. +#define OPEMU_H
  1538. +#include <stdint.h>
  1539. +
  1540. +#ifndef TESTCASE
  1541. +#include <mach/thread_status.h>
  1542. +#endif
  1543. +
  1544. +union XMM_u {
  1545. +int8_t a8[16];
  1546. +int16_t a16[8];
  1547. +int32_t a32[4];
  1548. +int64_t a64[2];
  1549. +__int128_t a128;
  1550. +uint8_t ua8[16];
  1551. +uint16_t ua16[8];
  1552. +uint32_t ua32[4];
  1553. +uint64_t ua64[2];
  1554. +__uint128_t ua128;
  1555. +};
  1556. +typedef union XMM_u XMM;
  1557. +
  1558. +union MM_u {
  1559. +int8_t a8[8];
  1560. +int16_t a16[4];
  1561. +int32_t a32[2];
  1562. +int64_t a64;
  1563. +uint8_t ua8[8];
  1564. +uint16_t ua16[4];
  1565. +uint32_t ua32[2];
  1566. +uint64_t ua64;
  1567. +};
  1568. +typedef union MM_u MM;
  1569. +
  1570. +#ifndef TESTCASE
  1571. +void opemu_trap(x86_saved_state_t *saved_state);
  1572. +uint64_t opemu(uint8_t *code, x86_saved_state_t *saved_state);
  1573. +#endif
  1574. +inline void getxmm(XMM *v, unsigned int i);
  1575. +inline void getmm(MM *v, unsigned int i);
  1576. +inline void movxmm(XMM *v, unsigned int i);
  1577. +inline void movmm(MM *v, unsigned int i);
  1578. +inline void InterpretSSSE3Operands(uint8_t *ModRM,
  1579. +   unsigned int *src, unsigned int *dst);
  1580. +
  1581. +/** All 32 SSSE3 instructions **/
  1582. +void pshufb128(XMM *src, XMM *dst, XMM *res);
  1583. +void pshufb64(MM *src, MM *dst, MM *res);
  1584. +void phaddw128(XMM *src, XMM *dst, XMM *res);
  1585. +void phaddw64(MM *src, MM *dst, MM *res);
  1586. +void phaddd128(XMM *src, XMM *dst, XMM *res);
  1587. +void phaddd64(MM *src, MM *dst, MM *res);
  1588. +void phaddsw128(XMM *src, XMM *dst, XMM *res);
  1589. +void phaddsw64(MM *src, MM *dst, MM *res);
  1590. +void pmaddubsw128(XMM *src, XMM *dst, XMM *res);
  1591. +void pmaddubsw64(MM *src, MM *dst, MM *res);
  1592. +void phsubw128(XMM *src, XMM *dst, XMM *res);
  1593. +void phsubw64(MM *src, MM *dst, MM *res);
  1594. +void phsubd128(XMM *src, XMM *dst, XMM *res);
  1595. +void phsubd64(MM *src, MM *dst, MM *res);
  1596. +void phsubsw128(XMM *src, XMM *dst, XMM *res);
  1597. +void phsubsw64(MM *src, MM *dst, MM *res);
  1598. +void psignb128(XMM *src, XMM *dst, XMM *res);
  1599. +void psignb64(MM *src, MM *dst, MM *res);
  1600. +void psignw128(XMM *src, XMM *dst, XMM *res);
  1601. +void psignw64(MM *src, MM *dst, MM *res);
  1602. +void psignd128(XMM *src, XMM *dst, XMM *res);
  1603. +void psignd64(MM *src, MM *dst, MM *res);
  1604. +void pmulhrsw128(XMM *src, XMM *dst, XMM *res);
  1605. +void pmulhrsw64(MM *src, MM *dst, MM *res);
  1606. +void pabsb128(XMM *src, XMM *res);
  1607. +void pabsb64(MM *src, MM *res);
  1608. +void pabsw128(XMM *src, XMM *res);
  1609. +void pabsw64(MM *src, MM *res);
  1610. +void pabsd128(XMM *src, XMM *res);
  1611. +void pabsd64(MM *src, MM *res);
  1612. +void palignr128(XMM *src, XMM *dst, XMM *res, uint8_t IMM);
  1613. +void palignr64(MM *src, MM *dst, MM *res, uint8_t IMM);
  1614. +
  1615. +#endif
  1616. +
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement