Advertisement
Guest User

Catcat

a guest
Sep 22nd, 2014
255
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 15.57 KB | None | 0 0
  1.  
  2. #define _GNU_SOURCE
  3.  
  4. #include <stdio.h>
  5. #include <stdlib.h>
  6. #include <string.h>
  7. #include <ctype.h>
  8.  
  9. #define KSYM_NAME_LEN 127
  10.  
  11.  
  12. struct sym_entry {
  13. unsigned long long addr;
  14. unsigned int len;
  15. unsigned char *sym;
  16. };
  17.  
  18.  
  19. static struct sym_entry *table;
  20. static unsigned int table_size, table_cnt;
  21. static unsigned long long _text, _stext, _etext, _sinittext, _einittext, _sextratext, _eextratext;
  22. static int all_symbols = 0;
  23. static char symbol_prefix_char = '\0';
  24.  
  25. int token_profit[0x10000];
  26.  
  27. /* the table that holds the result of the compression */
  28. unsigned char best_table[256][2];
  29. unsigned char best_table_len[256];
  30.  
  31.  
  32. static void usage(void)
  33. {
  34. fprintf(stderr, "Usage: kallsyms [--all-symbols] [--symbol-prefix=<prefix char>] < in.map > out.S\n");
  35. exit(1);
  36. }
  37.  
  38. /*
  39. * This ignores the intensely annoying "mapping symbols" found
  40. * in ARM ELF files: $a, $t and $d.
  41. */
  42. static inline int is_arm_mapping_symbol(const char *str)
  43. {
  44. return str[0] == '$' && strchr("atd", str[1])
  45. && (str[2] == '\0' || str[2] == '.');
  46. }
  47.  
  48. static int read_symbol(FILE *in, struct sym_entry *s)
  49. {
  50. char str[500];
  51. char *sym, stype;
  52. int rc;
  53.  
  54. rc = fscanf(in, "%llx %c %499s\n", &s->addr, &stype, str);
  55. if (rc != 3) {
  56. if (rc != EOF) {
  57. /* skip line */
  58. fgets(str, 500, in);
  59. }
  60. return -1;
  61. }
  62.  
  63. sym = str;
  64. /* skip prefix char */
  65. if (symbol_prefix_char && str[0] == symbol_prefix_char)
  66. sym++;
  67.  
  68. /* Ignore most absolute/undefined (?) symbols. */
  69. if (strcmp(sym, "_text") == 0)
  70. _text = s->addr;
  71. else if (strcmp(sym, "_stext") == 0)
  72. _stext = s->addr;
  73. else if (strcmp(sym, "_etext") == 0)
  74. _etext = s->addr;
  75. else if (strcmp(sym, "_sinittext") == 0)
  76. _sinittext = s->addr;
  77. else if (strcmp(sym, "_einittext") == 0)
  78. _einittext = s->addr;
  79. else if (strcmp(sym, "_sextratext") == 0)
  80. _sextratext = s->addr;
  81. else if (strcmp(sym, "_eextratext") == 0)
  82. _eextratext = s->addr;
  83. else if (toupper(stype) == 'A')
  84. {
  85. /* Keep these useful absolute symbols */
  86. if (strcmp(sym, "__kernel_syscall_via_break") &&
  87. strcmp(sym, "__kernel_syscall_via_epc") &&
  88. strcmp(sym, "__kernel_sigtramp") &&
  89. strcmp(sym, "__gp"))
  90. return -1;
  91.  
  92. }
  93. else if (toupper(stype) == 'U' ||
  94. is_arm_mapping_symbol(sym))
  95. return -1;
  96. /* exclude also MIPS ELF local symbols ($L123 instead of .L123) */
  97. else if (str[0] == '$')
  98. return -1;
  99.  
  100. /* include the type field in the symbol name, so that it gets
  101. * compressed together */
  102. s->len = strlen(str) + 1;
  103. s->sym = malloc(s->len + 1);
  104. if (!s->sym) {
  105. fprintf(stderr, "kallsyms failure: "
  106. "unable to allocate required amount of memory\n");
  107. exit(EXIT_FAILURE);
  108. }
  109. strcpy((char *)s->sym + 1, str);
  110. s->sym[0] = stype;
  111.  
  112. return 0;
  113. }
  114.  
  115. static int symbol_valid(struct sym_entry *s)
  116. {
  117. /* Symbols which vary between passes. Passes 1 and 2 must have
  118. * identical symbol lists. The kallsyms_* symbols below are only added
  119. * after pass 1, they would be included in pass 2 when --all-symbols is
  120. * specified so exclude them to get a stable symbol list.
  121. */
  122. static char *special_symbols[] = {
  123. "kallsyms_addresses",
  124. "kallsyms_num_syms",
  125. "kallsyms_names",
  126. "kallsyms_markers",
  127. "kallsyms_token_table",
  128. "kallsyms_token_index",
  129.  
  130. /* Exclude linker generated symbols which vary between passes */
  131. "_SDA_BASE_", /* ppc */
  132. "_SDA2_BASE_", /* ppc */
  133. NULL };
  134. int i;
  135. int offset = 1;
  136.  
  137. /* skip prefix char */
  138. if (symbol_prefix_char && *(s->sym + 1) == symbol_prefix_char)
  139. offset++;
  140.  
  141. /* if --all-symbols is not specified, then symbols outside the text
  142. * and inittext sections are discarded */
  143. if (!all_symbols) {
  144. if ((s->addr < _stext || s->addr > _etext)
  145. && (s->addr < _sinittext || s->addr > _einittext)
  146. && (s->addr < _sextratext || s->addr > _eextratext))
  147. return 0;
  148. /* Corner case. Discard any symbols with the same value as
  149. * _etext _einittext or _eextratext; they can move between pass
  150. * 1 and 2 when the kallsyms data are added. If these symbols
  151. * move then they may get dropped in pass 2, which breaks the
  152. * kallsyms rules.
  153. */
  154. if ((s->addr == _etext && strcmp((char*)s->sym + offset, "_etext")) ||
  155. (s->addr == _einittext && strcmp((char*)s->sym + offset, "_einittext")) ||
  156. (s->addr == _eextratext && strcmp((char*)s->sym + offset, "_eextratext")))
  157. return 0;
  158. }
  159.  
  160. /* Exclude symbols which vary between passes. */
  161. if (strstr((char *)s->sym + offset, "_compiled."))
  162. return 0;
  163.  
  164. for (i = 0; special_symbols[i]; i++)
  165. if( strcmp((char *)s->sym + offset, special_symbols[i]) == 0 )
  166. return 0;
  167.  
  168. return 1;
  169. }
  170.  
  171. static void read_map(FILE *in)
  172. {
  173. while (!feof(in)) {
  174. if (table_cnt >= table_size) {
  175. table_size += 10000;
  176. table = realloc(table, sizeof(*table) * table_size);
  177. if (!table) {
  178. fprintf(stderr, "out of memory\n");
  179. exit (1);
  180. }
  181. }
  182. if (read_symbol(in, &table[table_cnt]) == 0)
  183. table_cnt++;
  184. }
  185. }
  186.  
  187. static void output_label(char *label)
  188. {
  189. if (symbol_prefix_char)
  190. printf(".globl %c%s\n", symbol_prefix_char, label);
  191. else
  192. printf(".globl %s\n", label);
  193. printf("\tALGN\n");
  194. if (symbol_prefix_char)
  195. printf("%c%s:\n", symbol_prefix_char, label);
  196. else
  197. printf("%s:\n", label);
  198. }
  199.  
  200. /* uncompress a compressed symbol. When this function is called, the best table
  201. * might still be compressed itself, so the function needs to be recursive */
  202. static int expand_symbol(unsigned char *data, int len, char *result)
  203. {
  204. int c, rlen, total=0;
  205.  
  206. while (len) {
  207. c = *data;
  208. /* if the table holds a single char that is the same as the one
  209. * we are looking for, then end the search */
  210. if (best_table[c][0]==c && best_table_len[c]==1) {
  211. *result++ = c;
  212. total++;
  213. } else {
  214. /* if not, recurse and expand */
  215. rlen = expand_symbol(best_table[c], best_table_len[c], result);
  216. total += rlen;
  217. result += rlen;
  218. }
  219. data++;
  220. len--;
  221. }
  222. *result=0;
  223.  
  224. return total;
  225. }
  226.  
  227. static void write_src(void)
  228. {
  229. unsigned int i, k, off;
  230. unsigned int best_idx[256];
  231. unsigned int *markers;
  232. char buf[KSYM_NAME_LEN+1];
  233.  
  234. printf("#include <asm/types.h>\n");
  235. printf("#if BITS_PER_LONG == 64\n");
  236. printf("#define PTR .quad\n");
  237. printf("#define ALGN .align 8\n");
  238. printf("#else\n");
  239. printf("#define PTR .long\n");
  240. printf("#define ALGN .align 4\n");
  241. printf("#endif\n");
  242.  
  243. printf(".data\n");
  244.  
  245. /* Provide proper symbols relocatability by their '_text'
  246. * relativeness. The symbol names cannot be used to construct
  247. * normal symbol references as the list of symbols contains
  248. * symbols that are declared static and are private to their
  249. * .o files. This prevents .tmp_kallsyms.o or any other
  250. * object from referencing them.
  251. */
  252. output_label("kallsyms_addresses");
  253. for (i = 0; i < table_cnt; i++) {
  254. if (toupper(table[i].sym[0]) != 'A') {
  255. printf("\tPTR\t_text + %#llx\n",
  256. table[i].addr - _text);
  257. } else {
  258. printf("\tPTR\t%#llx\n", table[i].addr);
  259. }
  260. }
  261. printf("\n");
  262.  
  263. output_label("kallsyms_num_syms");
  264. printf("\tPTR\t%d\n", table_cnt);
  265. printf("\n");
  266.  
  267. /* table of offset markers, that give the offset in the compressed stream
  268. * every 256 symbols */
  269. markers = malloc(sizeof(unsigned int) * ((table_cnt + 255) / 256));
  270. if (!markers) {
  271. fprintf(stderr, "kallsyms failure: "
  272. "unable to allocate required memory\n");
  273. exit(EXIT_FAILURE);
  274. }
  275.  
  276. output_label("kallsyms_names");
  277. off = 0;
  278. for (i = 0; i < table_cnt; i++) {
  279. if ((i & 0xFF) == 0)
  280. markers[i >> 8] = off;
  281.  
  282. printf("\t.byte 0x%02x", table[i].len);
  283. for (k = 0; k < table[i].len; k++)
  284. printf(", 0x%02x", table[i].sym[k]);
  285. printf("\n");
  286.  
  287. off += table[i].len + 1;
  288. }
  289. printf("\n");
  290.  
  291. output_label("kallsyms_markers");
  292. for (i = 0; i < ((table_cnt + 255) >> 8); i++)
  293. printf("\tPTR\t%d\n", markers[i]);
  294. printf("\n");
  295.  
  296. free(markers);
  297.  
  298. output_label("kallsyms_token_table");
  299. off = 0;
  300. for (i = 0; i < 256; i++) {
  301. best_idx[i] = off;
  302. expand_symbol(best_table[i], best_table_len[i], buf);
  303. printf("\t.asciz\t\"%s\"\n", buf);
  304. off += strlen(buf) + 1;
  305. }
  306. printf("\n");
  307.  
  308. output_label("kallsyms_token_index");
  309. for (i = 0; i < 256; i++)
  310. printf("\t.short\t%d\n", best_idx[i]);
  311. printf("\n");
  312. }
  313.  
  314.  
  315. /* table lookup compression functions */
  316.  
  317. /* count all the possible tokens in a symbol */
  318. static void learn_symbol(unsigned char *symbol, int len)
  319. {
  320. int i;
  321.  
  322. for (i = 0; i < len - 1; i++)
  323. token_profit[ symbol[i] + (symbol[i + 1] << 8) ]++;
  324. }
  325.  
  326. /* decrease the count for all the possible tokens in a symbol */
  327. static void forget_symbol(unsigned char *symbol, int len)
  328. {
  329. int i;
  330.  
  331. for (i = 0; i < len - 1; i++)
  332. token_profit[ symbol[i] + (symbol[i + 1] << 8) ]--;
  333. }
  334.  
  335. /* remove all the invalid symbols from the table and do the initial token count */
  336. static void build_initial_tok_table(void)
  337. {
  338. unsigned int i, pos;
  339.  
  340. pos = 0;
  341. for (i = 0; i < table_cnt; i++) {
  342. if ( symbol_valid(&table[i]) ) {
  343. if (pos != i)
  344. table[pos] = table[i];
  345. learn_symbol(table[pos].sym, table[pos].len);
  346. pos++;
  347. }
  348. }
  349. table_cnt = pos;
  350. }
  351.  
  352. /* replace a given token in all the valid symbols. Use the sampled symbols
  353. * to update the counts */
  354. static void compress_symbols(unsigned char *str, int idx)
  355. {
  356. unsigned int i, len, size;
  357. unsigned char *p1, *p2;
  358.  
  359. for (i = 0; i < table_cnt; i++) {
  360.  
  361. len = table[i].len;
  362. p1 = table[i].sym;
  363.  
  364. /* find the token on the symbol */
  365. p2 = memmem(p1, len, str, 2);
  366. if (!p2) continue;
  367.  
  368. /* decrease the counts for this symbol's tokens */
  369. forget_symbol(table[i].sym, len);
  370.  
  371. size = len;
  372.  
  373. do {
  374. *p2 = idx;
  375. p2++;
  376. size -= (p2 - p1);
  377. memmove(p2, p2 + 1, size);
  378. p1 = p2;
  379. len--;
  380.  
  381. if (size < 2) break;
  382.  
  383. /* find the token on the symbol */
  384. p2 = memmem(p1, size, str, 2);
  385.  
  386. } while (p2);
  387.  
  388. table[i].len = len;
  389.  
  390. /* increase the counts for this symbol's new tokens */
  391. learn_symbol(table[i].sym, len);
  392. }
  393. }
  394.  
  395. /* search the token with the maximum profit */
  396. static int find_best_token(void)
  397. {
  398. int i, best, bestprofit;
  399.  
  400. bestprofit=-10000;
  401. best = 0;
  402.  
  403. for (i = 0; i < 0x10000; i++) {
  404. if (token_profit[i] > bestprofit) {
  405. best = i;
  406. bestprofit = token_profit[i];
  407. }
  408. }
  409. return best;
  410. }
  411.  
  412. /* this is the core of the algorithm: calculate the "best" table */
  413. static void optimize_result(void)
  414. {
  415. int i, best;
  416.  
  417. /* using the '\0' symbol last allows compress_symbols to use standard
  418. * fast string functions */
  419. for (i = 255; i >= 0; i--) {
  420.  
  421. /* if this table slot is empty (it is not used by an actual
  422. * original char code */
  423. if (!best_table_len[i]) {
  424.  
  425. /* find the token with the breates profit value */
  426. best = find_best_token();
  427.  
  428. /* place it in the "best" table */
  429. best_table_len[i] = 2;
  430. best_table[i][0] = best & 0xFF;
  431. best_table[i][1] = (best >> 8) & 0xFF;
  432.  
  433. /* replace this token in all the valid symbols */
  434. compress_symbols(best_table[i], i);
  435. }
  436. }
  437. }
  438.  
  439. /* start by placing the symbols that are actually used on the table */
  440. static void insert_real_symbols_in_table(void)
  441. {
  442. unsigned int i, j, c;
  443.  
  444. memset(best_table, 0, sizeof(best_table));
  445. memset(best_table_len, 0, sizeof(best_table_len));
  446.  
  447. for (i = 0; i < table_cnt; i++) {
  448. for (j = 0; j < table[i].len; j++) {
  449. c = table[i].sym[j];
  450. best_table[c][0]=c;
  451. best_table_len[c]=1;
  452. }
  453. }
  454. }
  455.  
  456. static void optimize_token_table(void)
  457. {
  458. build_initial_tok_table();
  459.  
  460. insert_real_symbols_in_table();
  461.  
  462. /* When valid symbol is not registered, exit to error */
  463. if (!table_cnt) {
  464. fprintf(stderr, "No valid symbol.\n");
  465. exit(1);
  466. }
  467.  
  468. optimize_result();
  469. }
  470.  
  471.  
  472. int main(int argc, char **argv)
  473. {
  474. if (argc >= 2) {
  475. int i;
  476. for (i = 1; i < argc; i++) {
  477. if(strcmp(argv[i], "--all-symbols") == 0)
  478. all_symbols = 1;
  479. else if (strncmp(argv[i], "--symbol-prefix=", 16) == 0) {
  480. char *p = &argv[i][16];
  481. /* skip quote */
  482. if ((*p == '"' && *(p+2) == '"') || (*p == '\'' && *(p+2) == '\''))
  483. p++;
  484. symbol_prefix_char = *p;
  485. } else
  486. usage();
  487. }
  488. } else if (argc != 1)
  489. usage();
  490.  
  491. read_map(stdin);
  492. optimize_token_table();
  493. write_src();
  494.  
  495. return 0;
  496. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement