s4kur4

lc 26.32

Feb 15th, 2017
117
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 14.70 KB | None | 0 0
  1. #define _GNU_SOURCE
  2.  
  3.  
  4.  
  5. #include <stdio.h>
  6.  
  7. #include <stdlib.h>
  8.  
  9. #include <string.h>
  10.  
  11. #include <ctype.h>
  12.  
  13.  
  14.  
  15. #define KSYM_NAME_LEN 127
  16.  
  17.  
  18.  
  19.  
  20.  
  21. struct sym_entry {
  22.  
  23. unsigned long long addr;
  24.  
  25. unsigned int len;
  26.  
  27. unsigned char *sym;
  28.  
  29. };
  30.  
  31.  
  32.  
  33.  
  34.  
  35. static struct sym_entry *table;
  36.  
  37. static unsigned int table_size, table_cnt;
  38.  
  39. static unsigned long long _text, _stext, _etext, _sinittext, _einittext, _sextratext, _eextratext;
  40.  
  41. static int all_symbols = 0;
  42.  
  43. static char symbol_prefix_char = '\0';
  44.  
  45.  
  46.  
  47. int token_profit[0x10000];
  48.  
  49.  
  50.  
  51. /* the table that holds the result of the compression */
  52.  
  53. unsigned char best_table[256][2];
  54.  
  55. unsigned char best_table_len[256];
  56.  
  57.  
  58.  
  59.  
  60.  
  61. static void usage(void)
  62.  
  63. {
  64.  
  65. fprintf(stderr, "Usage: kallsyms [--all-symbols] [--symbol-prefix=<prefix char>] < in.map > out.S\n");
  66.  
  67. exit(1);
  68.  
  69. }
  70.  
  71.  
  72.  
  73. /*
  74.  
  75. * This ignores the intensely annoying "mapping symbols" found
  76.  
  77. * in ARM ELF files: $a, $t and $d.
  78.  
  79. */
  80.  
  81. static inline int is_arm_mapping_symbol(const char *str)
  82.  
  83. {
  84.  
  85. return str[0] == '$' && strchr("atd", str[1])
  86.  
  87. && (str[2] == '\0' || str[2] == '.');
  88.  
  89. }
  90.  
  91.  
  92.  
  93. static int read_symbol(FILE *in, struct sym_entry *s)
  94.  
  95. {
  96.  
  97. char str[500];
  98.  
  99. char *sym, stype;
  100.  
  101. int rc;
  102.  
  103.  
  104.  
  105. rc = fscanf(in, "%llx %c %499s\n", &s->addr, &stype, str);
  106.  
  107. if (rc != 3) {
  108.  
  109. if (rc != EOF) {
  110.  
  111. /* skip line */
  112.  
  113. fgets(str, 500, in);
  114.  
  115. }
  116.  
  117. return -1;
  118.  
  119. }
  120.  
  121.  
  122.  
  123. sym = str;
  124.  
  125. /* skip prefix char */
  126.  
  127. if (symbol_prefix_char && str[0] == symbol_prefix_char)
  128.  
  129. sym++;
  130.  
  131.  
  132.  
  133. /* Ignore most absolute/undefined (?) symbols. */
  134.  
  135. if (strcmp(sym, "_text") == 0)
  136.  
  137. _text = s->addr;
  138.  
  139. else if (strcmp(sym, "_stext") == 0)
  140.  
  141. _stext = s->addr;
  142.  
  143. else if (strcmp(sym, "_etext") == 0)
  144.  
  145. _etext = s->addr;
  146.  
  147. else if (strcmp(sym, "_sinittext") == 0)
  148.  
  149. _sinittext = s->addr;
  150.  
  151. else if (strcmp(sym, "_einittext") == 0)
  152.  
  153. _einittext = s->addr;
  154.  
  155. else if (strcmp(sym, "_sextratext") == 0)
  156.  
  157. _sextratext = s->addr;
  158.  
  159. else if (strcmp(sym, "_eextratext") == 0)
  160.  
  161. _eextratext = s->addr;
  162.  
  163. else if (toupper(stype) == 'A')
  164.  
  165. {
  166.  
  167. /* Keep these useful absolute symbols */
  168.  
  169. if (strcmp(sym, "__kernel_syscall_via_break") &&
  170.  
  171. strcmp(sym, "__kernel_syscall_via_epc") &&
  172.  
  173. strcmp(sym, "__kernel_sigtramp") &&
  174.  
  175. strcmp(sym, "__gp"))
  176.  
  177. return -1;
  178.  
  179.  
  180.  
  181. }
  182.  
  183. else if (toupper(stype) == 'U' ||
  184.  
  185. is_arm_mapping_symbol(sym))
  186.  
  187. return -1;
  188.  
  189. /* exclude also MIPS ELF local symbols ($L123 instead of .L123) */
  190.  
  191. else if (str[0] == '$')
  192.  
  193. return -1;
  194.  
  195.  
  196.  
  197. /* include the type field in the symbol name, so that it gets
  198.  
  199. * compressed together */
  200.  
  201. s->len = strlen(str) + 1;
  202.  
  203. s->sym = malloc(s->len + 1);
  204.  
  205. if (!s->sym) {
  206.  
  207. fprintf(stderr, "kallsyms failure: "
  208.  
  209. "unable to allocate required amount of memory\n");
  210.  
  211. exit(EXIT_FAILURE);
  212.  
  213. }
  214.  
  215. strcpy((char *)s->sym + 1, str);
  216.  
  217. s->sym[0] = stype;
  218.  
  219.  
  220.  
  221. return 0;
  222.  
  223. }
  224.  
  225.  
  226.  
  227. static int symbol_valid(struct sym_entry *s)
  228.  
  229. {
  230.  
  231. /* Symbols which vary between passes. Passes 1 and 2 must have
  232.  
  233. * identical symbol lists. The kallsyms_* symbols below are only added
  234.  
  235. * after pass 1, they would be included in pass 2 when --all-symbols is
  236.  
  237. * specified so exclude them to get a stable symbol list.
  238.  
  239. */
  240.  
  241. static char *special_symbols[] = {
  242.  
  243. "kallsyms_addresses",
  244.  
  245. "kallsyms_num_syms",
  246.  
  247. "kallsyms_names",
  248.  
  249. "kallsyms_markers",
  250.  
  251. "kallsyms_token_table",
  252.  
  253. "kallsyms_token_index",
  254.  
  255.  
  256.  
  257. /* Exclude linker generated symbols which vary between passes */
  258.  
  259. "_SDA_BASE_", /* ppc */
  260.  
  261. "_SDA2_BASE_", /* ppc */
  262.  
  263. NULL };
  264.  
  265. int i;
  266.  
  267. int offset = 1;
  268.  
  269.  
  270.  
  271. /* skip prefix char */
  272.  
  273. if (symbol_prefix_char && *(s->sym + 1) == symbol_prefix_char)
  274.  
  275. offset++;
  276.  
  277.  
  278.  
  279. /* if --all-symbols is not specified, then symbols outside the text
  280.  
  281. * and inittext sections are discarded */
  282.  
  283. if (!all_symbols) {
  284.  
  285. if ((s->addr < _stext || s->addr > _etext)
  286.  
  287. && (s->addr < _sinittext || s->addr > _einittext)
  288.  
  289. && (s->addr < _sextratext || s->addr > _eextratext))
  290.  
  291. return 0;
  292.  
  293. /* Corner case. Discard any symbols with the same value as
  294.  
  295. * _etext _einittext or _eextratext; they can move between pass
  296.  
  297. * 1 and 2 when the kallsyms data are added. If these symbols
  298.  
  299. * move then they may get dropped in pass 2, which breaks the
  300.  
  301. * kallsyms rules.
  302.  
  303. */
  304.  
  305. if ((s->addr == _etext && strcmp((char*)s->sym + offset, "_etext")) ||
  306.  
  307. (s->addr == _einittext && strcmp((char*)s->sym + offset, "_einittext")) ||
  308.  
  309. (s->addr == _eextratext && strcmp((char*)s->sym + offset, "_eextratext")))
  310.  
  311. return 0;
  312.  
  313. }
  314.  
  315.  
  316.  
  317. /* Exclude symbols which vary between passes. */
  318.  
  319. if (strstr((char *)s->sym + offset, "_compiled."))
  320.  
  321. return 0;
  322.  
  323.  
  324.  
  325. for (i = 0; special_symbols[i]; i++)
  326.  
  327. if( strcmp((char *)s->sym + offset, special_symbols[i]) == 0 )
  328.  
  329. return 0;
  330.  
  331.  
  332.  
  333. return 1;
  334.  
  335. }
  336.  
  337.  
  338.  
  339. static void read_map(FILE *in)
  340.  
  341. {
  342.  
  343. while (!feof(in)) {
  344.  
  345. if (table_cnt >= table_size) {
  346.  
  347. table_size += 10000;
  348.  
  349. table = realloc(table, sizeof(*table) * table_size);
  350.  
  351. if (!table) {
  352.  
  353. fprintf(stderr, "out of memory\n");
  354.  
  355. exit (1);
  356.  
  357. }
  358.  
  359. }
  360.  
  361. if (read_symbol(in, &table[table_cnt]) == 0)
  362.  
  363. table_cnt++;
  364.  
  365. }
  366.  
  367. }
  368.  
  369.  
  370.  
  371. static void output_label(char *label)
  372.  
  373. {
  374.  
  375. if (symbol_prefix_char)
  376.  
  377. printf(".globl %c%s\n", symbol_prefix_char, label);
  378.  
  379. else
  380.  
  381. printf(".globl %s\n", label);
  382.  
  383. printf("\tALGN\n");
  384.  
  385. if (symbol_prefix_char)
  386.  
  387. printf("%c%s:\n", symbol_prefix_char, label);
  388.  
  389. else
  390.  
  391. printf("%s:\n", label);
  392.  
  393. }
  394.  
  395.  
  396.  
  397. /* uncompress a compressed symbol. When this function is called, the best table
  398.  
  399. * might still be compressed itself, so the function needs to be recursive */
  400.  
  401. static int expand_symbol(unsigned char *data, int len, char *result)
  402.  
  403. {
  404.  
  405. int c, rlen, total=0;
  406.  
  407.  
  408.  
  409. while (len) {
  410.  
  411. c = *data;
  412.  
  413. /* if the table holds a single char that is the same as the one
  414.  
  415. * we are looking for, then end the search */
  416.  
  417. if (best_table[c][0]==c && best_table_len[c]==1) {
  418.  
  419. *result++ = c;
  420.  
  421. total++;
  422.  
  423. } else {
  424.  
  425. /* if not, recurse and expand */
  426.  
  427. rlen = expand_symbol(best_table[c], best_table_len[c], result);
  428.  
  429. total += rlen;
  430.  
  431. result += rlen;
  432.  
  433. }
  434.  
  435. data++;
  436.  
  437. len--;
  438.  
  439. }
  440.  
  441. *result=0;
  442.  
  443.  
  444.  
  445. return total;
  446.  
  447. }
  448.  
  449.  
  450.  
  451. static void write_src(void)
  452.  
  453. {
  454.  
  455. unsigned int i, k, off;
  456.  
  457. unsigned int best_idx[256];
  458.  
  459. unsigned int *markers;
  460.  
  461. char buf[KSYM_NAME_LEN+1];
  462.  
  463.  
  464.  
  465. printf("#include <asm/types.h>\n");
  466.  
  467. printf("#if BITS_PER_LONG == 64\n");
  468.  
  469. printf("#define PTR .quad\n");
  470.  
  471. printf("#define ALGN .align 8\n");
  472.  
  473. printf("#else\n");
  474.  
  475. printf("#define PTR .long\n");
  476.  
  477. printf("#define ALGN .align 4\n");
  478.  
  479. printf("#endif\n");
  480.  
  481.  
  482.  
  483. printf(".data\n");
  484.  
  485.  
  486.  
  487. /* Provide proper symbols relocatability by their '_text'
  488.  
  489. * relativeness. The symbol names cannot be used to construct
  490.  
  491. * normal symbol references as the list of symbols contains
  492.  
  493. * symbols that are declared static and are private to their
  494.  
  495. * .o files. This prevents .tmp_kallsyms.o or any other
  496.  
  497. * object from referencing them.
  498.  
  499. */
  500.  
  501. output_label("kallsyms_addresses");
  502.  
  503. for (i = 0; i < table_cnt; i++) {
  504.  
  505. if (toupper(table[i].sym[0]) != 'A') {
  506.  
  507. printf("\tPTR\t_text + %#llx\n",
  508.  
  509. table[i].addr - _text);
  510.  
  511. } else {
  512.  
  513. printf("\tPTR\t%#llx\n", table[i].addr);
  514.  
  515. }
  516.  
  517. }
  518.  
  519. printf("\n");
  520.  
  521.  
  522.  
  523. output_label("kallsyms_num_syms");
  524.  
  525. printf("\tPTR\t%d\n", table_cnt);
  526.  
  527. printf("\n");
  528.  
  529.  
  530.  
  531. /* table of offset markers, that give the offset in the compressed stream
  532.  
  533. * every 256 symbols */
  534.  
  535. markers = malloc(sizeof(unsigned int) * ((table_cnt + 255) / 256));
  536.  
  537. if (!markers) {
  538.  
  539. fprintf(stderr, "kallsyms failure: "
  540.  
  541. "unable to allocate required memory\n");
  542.  
  543. exit(EXIT_FAILURE);
  544.  
  545. }
  546.  
  547.  
  548.  
  549. output_label("kallsyms_names");
  550.  
  551. off = 0;
  552.  
  553. for (i = 0; i < table_cnt; i++) {
  554.  
  555. if ((i & 0xFF) == 0)
  556.  
  557. markers[i >> 8] = off;
  558.  
  559.  
  560.  
  561. printf("\t.byte 0x%02x", table[i].len);
  562.  
  563. for (k = 0; k < table[i].len; k++)
  564.  
  565. printf(", 0x%02x", table[i].sym[k]);
  566.  
  567. printf("\n");
  568.  
  569.  
  570.  
  571. off += table[i].len + 1;
  572.  
  573. }
  574.  
  575. printf("\n");
  576.  
  577.  
  578.  
  579. output_label("kallsyms_markers");
  580.  
  581. for (i = 0; i < ((table_cnt + 255) >> 8); i++)
  582.  
  583. printf("\tPTR\t%d\n", markers[i]);
  584.  
  585. printf("\n");
  586.  
  587.  
  588.  
  589. free(markers);
  590.  
  591.  
  592.  
  593. output_label("kallsyms_token_table");
  594.  
  595. off = 0;
  596.  
  597. for (i = 0; i < 256; i++) {
  598.  
  599. best_idx[i] = off;
  600.  
  601. expand_symbol(best_table[i], best_table_len[i], buf);
  602.  
  603. printf("\t.asciz\t\"%s\"\n", buf);
  604.  
  605. off += strlen(buf) + 1;
  606.  
  607. }
  608.  
  609. printf("\n");
  610.  
  611.  
  612.  
  613. output_label("kallsyms_token_index");
  614.  
  615. for (i = 0; i < 256; i++)
  616.  
  617. printf("\t.short\t%d\n", best_idx[i]);
  618.  
  619. printf("\n");
  620.  
  621. }
  622.  
  623.  
  624.  
  625.  
  626.  
  627. /* table lookup compression functions */
  628.  
  629.  
  630.  
  631. /* count all the possible tokens in a symbol */
  632.  
  633. static void learn_symbol(unsigned char *symbol, int len)
  634.  
  635. {
  636.  
  637. int i;
  638.  
  639.  
  640.  
  641. for (i = 0; i < len - 1; i++)
  642.  
  643. token_profit[ symbol[i] + (symbol[i + 1] << 8) ]++;
  644.  
  645. }
  646.  
  647.  
  648.  
  649. /* decrease the count for all the possible tokens in a symbol */
  650.  
  651. static void forget_symbol(unsigned char *symbol, int len)
  652.  
  653. {
  654.  
  655. int i;
  656.  
  657.  
  658.  
  659. for (i = 0; i < len - 1; i++)
  660.  
  661. token_profit[ symbol[i] + (symbol[i + 1] << 8) ]--;
  662.  
  663. }
  664.  
  665.  
  666.  
  667. /* remove all the invalid symbols from the table and do the initial token count */
  668.  
  669. static void build_initial_tok_table(void)
  670.  
  671. {
  672.  
  673. unsigned int i, pos;
  674.  
  675.  
  676.  
  677. pos = 0;
  678.  
  679. for (i = 0; i < table_cnt; i++) {
  680.  
  681. if ( symbol_valid(&table[i]) ) {
  682.  
  683. if (pos != i)
  684.  
  685. table[pos] = table[i];
  686.  
  687. learn_symbol(table[pos].sym, table[pos].len);
  688.  
  689. pos++;
  690.  
  691. }
  692.  
  693. }
  694.  
  695. table_cnt = pos;
  696.  
  697. }
  698.  
  699.  
  700.  
  701. /* replace a given token in all the valid symbols. Use the sampled symbols
  702.  
  703. * to update the counts */
  704.  
  705. static void compress_symbols(unsigned char *str, int idx)
  706.  
  707. {
  708.  
  709. unsigned int i, len, size;
  710.  
  711. unsigned char *p1, *p2;
  712.  
  713.  
  714.  
  715. for (i = 0; i < table_cnt; i++) {
  716.  
  717.  
  718.  
  719. len = table[i].len;
  720.  
  721. p1 = table[i].sym;
  722.  
  723.  
  724.  
  725. /* find the token on the symbol */
  726.  
  727. p2 = memmem(p1, len, str, 2);
  728.  
  729. if (!p2) continue;
  730.  
  731.  
  732.  
  733. /* decrease the counts for this symbol's tokens */
  734.  
  735. forget_symbol(table[i].sym, len);
  736.  
  737.  
  738.  
  739. size = len;
  740.  
  741.  
  742.  
  743. do {
  744.  
  745. *p2 = idx;
  746.  
  747. p2++;
  748.  
  749. size -= (p2 - p1);
  750.  
  751. memmove(p2, p2 + 1, size);
  752.  
  753. p1 = p2;
  754.  
  755. len--;
  756.  
  757.  
  758.  
  759. if (size < 2) break;
  760.  
  761.  
  762.  
  763. /* find the token on the symbol */
  764.  
  765. p2 = memmem(p1, size, str, 2);
  766.  
  767.  
  768.  
  769. } while (p2);
  770.  
  771.  
  772.  
  773. table[i].len = len;
  774.  
  775.  
  776.  
  777. /* increase the counts for this symbol's new tokens */
  778.  
  779. learn_symbol(table[i].sym, len);
  780.  
  781. }
  782.  
  783. }
  784.  
  785.  
  786.  
  787. /* search the token with the maximum profit */
  788.  
  789. static int find_best_token(void)
  790.  
  791. {
  792.  
  793. int i, best, bestprofit;
  794.  
  795.  
  796.  
  797. bestprofit=-10000;
  798.  
  799. best = 0;
  800.  
  801.  
  802.  
  803. for (i = 0; i < 0x10000; i++) {
  804.  
  805. if (token_profit[i] > bestprofit) {
  806.  
  807. best = i;
  808.  
  809. bestprofit = token_profit[i];
  810.  
  811. }
  812.  
  813. }
  814.  
  815. return best;
  816.  
  817. }
  818.  
  819.  
  820.  
  821. /* this is the core of the algorithm: calculate the "best" table */
  822.  
  823. static void optimize_result(void)
  824.  
  825. {
  826.  
  827. int i, best;
  828.  
  829.  
  830.  
  831. /* using the '\0' symbol last allows compress_symbols to use standard
  832.  
  833. * fast string functions */
  834.  
  835. for (i = 255; i >= 0; i--) {
  836.  
  837.  
  838.  
  839. /* if this table slot is empty (it is not used by an actual
  840.  
  841. * original char code */
  842.  
  843. if (!best_table_len[i]) {
  844.  
  845.  
  846.  
  847. /* find the token with the breates profit value */
  848.  
  849. best = find_best_token();
  850.  
  851.  
  852.  
  853. /* place it in the "best" table */
  854.  
  855. best_table_len[i] = 2;
  856.  
  857. best_table[i][0] = best & 0xFF;
  858.  
  859. best_table[i][1] = (best >> 8) & 0xFF;
  860.  
  861.  
  862.  
  863. /* replace this token in all the valid symbols */
  864.  
  865. compress_symbols(best_table[i], i);
  866.  
  867. }
  868.  
  869. }
  870.  
  871. }
  872.  
  873.  
  874.  
  875. /* start by placing the symbols that are actually used on the table */
  876.  
  877. static void insert_real_symbols_in_table(void)
  878.  
  879. {
  880.  
  881. unsigned int i, j, c;
  882.  
  883.  
  884.  
  885. memset(best_table, 0, sizeof(best_table));
  886.  
  887. memset(best_table_len, 0, sizeof(best_table_len));
  888.  
  889.  
  890.  
  891. for (i = 0; i < table_cnt; i++) {
  892.  
  893. for (j = 0; j < table[i].len; j++) {
  894.  
  895. c = table[i].sym[j];
  896.  
  897. best_table[c][0]=c;
  898.  
  899. best_table_len[c]=1;
  900.  
  901. }
  902.  
  903. }
  904.  
  905. }
  906.  
  907.  
  908.  
  909. static void optimize_token_table(void)
  910.  
  911. {
  912.  
  913. build_initial_tok_table();
  914.  
  915.  
  916.  
  917. insert_real_symbols_in_table();
  918.  
  919.  
  920.  
  921. /* When valid symbol is not registered, exit to error */
  922.  
  923. if (!table_cnt) {
  924.  
  925. fprintf(stderr, "No valid symbol.\n");
  926.  
  927. exit(1);
  928.  
  929. }
  930.  
  931.  
  932.  
  933. optimize_result();
  934.  
  935. }
  936.  
  937.  
  938.  
  939.  
  940.  
  941. int main(int argc, char **argv)
  942.  
  943. {
  944.  
  945. if (argc >= 2) {
  946.  
  947. int i;
  948.  
  949. for (i = 1; i < argc; i++) {
  950.  
  951. if(strcmp(argv[i], "--all-symbols") == 0)
  952.  
  953. all_symbols = 1;
  954.  
  955. else if (strncmp(argv[i], "--symbol-prefix=", 16) == 0) {
  956.  
  957. char *p = &argv[i][16];
  958.  
  959. /* skip quote */
  960.  
  961. if ((*p == '"' && *(p+2) == '"') || (*p == '\'' && *(p+2) == '\''))
  962.  
  963. p++;
  964.  
  965. symbol_prefix_char = *p;
  966.  
  967. } else
  968.  
  969. usage();
  970.  
  971. }
  972.  
  973. } else if (argc != 1)
  974.  
  975. usage();
  976.  
  977.  
  978.  
  979. read_map(stdin);
  980.  
  981. optimize_token_table();
  982.  
  983. write_src();
  984.  
  985.  
  986.  
  987. return 0;
  988.  
  989. }
Add Comment
Please, Sign In to add comment