Advertisement
Guest User

AndroidXMLDecompress

a guest
Jan 5th, 2012
3,042
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 7.97 KB | None | 0 0
  1. import java.io.FileInputStream;
  2. import java.io.FileNotFoundException;
  3. import java.io.IOException;
  4. import java.io.InputStream;
  5. import java.util.zip.ZipEntry;
  6. import java.util.zip.ZipFile;
  7.  
  8. class AndroidXMLDecompress {
  9.     // decompressXML -- Parse the 'compressed' binary form of Android XML docs
  10.     // such as for AndroidManifest.xml in .apk files
  11.     public static int endDocTag = 0x00100101;
  12.     public static int startTag = 0x00100102;
  13.     public static int endTag = 0x00100103;
  14.  
  15.     static void prt(String str) {
  16.         //System.err.print(str);
  17.     }
  18.    
  19.     public static String decompressXML(byte[] xml) {
  20.        
  21.         StringBuilder finalXML = new StringBuilder();
  22.        
  23.         // Compressed XML file/bytes starts with 24x bytes of data,
  24.         // 9 32 bit words in little endian order (LSB first):
  25.         // 0th word is 03 00 08 00
  26.         // 3rd word SEEMS TO BE: Offset at then of StringTable
  27.         // 4th word is: Number of strings in string table
  28.         // WARNING: Sometime I indiscriminently display or refer to word in
  29.         // little endian storage format, or in integer format (ie MSB first).
  30.         int numbStrings = LEW(xml, 4 * 4);
  31.  
  32.         // StringIndexTable starts at offset 24x, an array of 32 bit LE offsets
  33.         // of the length/string data in the StringTable.
  34.         int sitOff = 0x24; // Offset of start of StringIndexTable
  35.  
  36.         // StringTable, each string is represented with a 16 bit little endian
  37.         // character count, followed by that number of 16 bit (LE) (Unicode)
  38.         // chars.
  39.         int stOff = sitOff + numbStrings * 4; // StringTable follows
  40.                                                 // StrIndexTable
  41.  
  42.         // XMLTags, The XML tag tree starts after some unknown content after the
  43.         // StringTable. There is some unknown data after the StringTable, scan
  44.         // forward from this point to the flag for the start of an XML start
  45.         // tag.
  46.         int xmlTagOff = LEW(xml, 3 * 4); // Start from the offset in the 3rd
  47.                                             // word.
  48.         // Scan forward until we find the bytes: 0x02011000(x00100102 in normal
  49.         // int)
  50.         for (int ii = xmlTagOff; ii < xml.length - 4; ii += 4) {
  51.             if (LEW(xml, ii) == startTag) {
  52.                 xmlTagOff = ii;
  53.                 break;
  54.             }
  55.         } // end of hack, scanning for start of first start tag
  56.  
  57.         // XML tags and attributes:
  58.         // Every XML start and end tag consists of 6 32 bit words:
  59.         // 0th word: 02011000 for startTag and 03011000 for endTag
  60.         // 1st word: a flag?, like 38000000
  61.         // 2nd word: Line of where this tag appeared in the original source file
  62.         // 3rd word: FFFFFFFF ??
  63.         // 4th word: StringIndex of NameSpace name, or FFFFFFFF for default NS
  64.         // 5th word: StringIndex of Element Name
  65.         // (Note: 01011000 in 0th word means end of XML document, endDocTag)
  66.  
  67.         // Start tags (not end tags) contain 3 more words:
  68.         // 6th word: 14001400 meaning??
  69.         // 7th word: Number of Attributes that follow this tag(follow word 8th)
  70.         // 8th word: 00000000 meaning??
  71.  
  72.         // Attributes consist of 5 words:
  73.         // 0th word: StringIndex of Attribute Name's Namespace, or FFFFFFFF
  74.         // 1st word: StringIndex of Attribute Name
  75.         // 2nd word: StringIndex of Attribute Value, or FFFFFFF if ResourceId
  76.         // used
  77.         // 3rd word: Flags?
  78.         // 4th word: str ind of attr value again, or ResourceId of value
  79.  
  80.         // TMP, dump string table to tr for debugging
  81.         // tr.addSelect("strings", null);
  82.         // for (int ii=0; ii<numbStrings; ii++) {
  83.         // // Length of string starts at StringTable plus offset in StrIndTable
  84.         // String str = compXmlString(xml, sitOff, stOff, ii);
  85.         // tr.add(String.valueOf(ii), str);
  86.         // }
  87.         // tr.parent();
  88.  
  89.         // Step through the XML tree element tags and attributes
  90.         int off = xmlTagOff;
  91.         int indent = 0;
  92.         int startTagLineNo = -2;
  93.         while (off < xml.length) {
  94.             int tag0 = LEW(xml, off);
  95.             // int tag1 = LEW(xml, off+1*4);
  96.             int lineNo = LEW(xml, off + 2 * 4);
  97.             // int tag3 = LEW(xml, off+3*4);
  98.             int nameNsSi = LEW(xml, off + 4 * 4);
  99.             int nameSi = LEW(xml, off + 5 * 4);
  100.  
  101.             if (tag0 == startTag) { // XML START TAG
  102.                 int tag6 = LEW(xml, off + 6 * 4); // Expected to be 14001400
  103.                 int numbAttrs = LEW(xml, off + 7 * 4); // Number of Attributes
  104.                                                         // to follow
  105.                 // int tag8 = LEW(xml, off+8*4); // Expected to be 00000000
  106.                 off += 9 * 4; // Skip over 6+3 words of startTag data
  107.                 String name = compXmlString(xml, sitOff, stOff, nameSi);
  108.                 // tr.addSelect(name, null);
  109.                 startTagLineNo = lineNo;
  110.  
  111.                 // Look for the Attributes
  112.                 StringBuffer sb = new StringBuffer();
  113.                 for (int ii = 0; ii < numbAttrs; ii++) {
  114.                     int attrNameNsSi = LEW(xml, off); // AttrName Namespace Str
  115.                                                         // Ind, or FFFFFFFF
  116.                     int attrNameSi = LEW(xml, off + 1 * 4); // AttrName String
  117.                                                             // Index
  118.                     int attrValueSi = LEW(xml, off + 2 * 4); // AttrValue Str
  119.                                                                 // Ind, or
  120.                                                                 // FFFFFFFF
  121.                     int attrFlags = LEW(xml, off + 3 * 4);
  122.                     int attrResId = LEW(xml, off + 4 * 4); // AttrValue
  123.                                                             // ResourceId or dup
  124.                                                             // AttrValue StrInd
  125.                     off += 5 * 4; // Skip over the 5 words of an attribute
  126.  
  127.                     String attrName = compXmlString(xml, sitOff, stOff,
  128.                             attrNameSi);
  129.                     String attrValue = attrValueSi != -1 ? compXmlString(xml,
  130.                             sitOff, stOff, attrValueSi) : "resourceID 0x"
  131.                             + Integer.toHexString(attrResId);
  132.                     sb.append(" " + attrName + "=\"" + attrValue + "\"");
  133.                     // tr.add(attrName, attrValue);
  134.                 }
  135.                 finalXML.append("<" + name + sb + ">");
  136.                 prtIndent(indent, "<" + name + sb + ">");
  137.                 indent++;
  138.  
  139.             } else if (tag0 == endTag) { // XML END TAG
  140.                 indent--;
  141.                 off += 6 * 4; // Skip over 6 words of endTag data
  142.                 String name = compXmlString(xml, sitOff, stOff, nameSi);
  143.                 finalXML.append("</" + name + ">");
  144.                 prtIndent(indent, "</" + name + "> (line " + startTagLineNo
  145.                          + "-" + lineNo + ")");
  146.                 // tr.parent(); // Step back up the NobTree
  147.  
  148.             } else if (tag0 == endDocTag) { // END OF XML DOC TAG
  149.                 break;
  150.  
  151.             } else {
  152.                 prt("  Unrecognized tag code '" + Integer.toHexString(tag0)
  153.                         + "' at offset " + off);
  154.                 break;
  155.             }
  156.         } // end of while loop scanning tags and attributes of XML tree
  157.         //prt("    end at offset " + off);
  158.         return finalXML.toString();
  159.     } // end of decompressXML
  160.  
  161.     public static String compXmlString(byte[] xml, int sitOff, int stOff, int strInd) {
  162.         if (strInd < 0)
  163.             return null;
  164.         int strOff = stOff + LEW(xml, sitOff + strInd * 4);
  165.         return compXmlStringAt(xml, strOff);
  166.     }
  167.  
  168.     public static String spaces = "                                             ";
  169.  
  170.     public static void prtIndent(int indent, String str) {
  171.         prt(spaces.substring(0, Math.min(indent * 2, spaces.length())) + str);
  172.     }
  173.  
  174.     // compXmlStringAt -- Return the string stored in StringTable format at
  175.     // offset strOff. This offset points to the 16 bit string length, which
  176.     // is followed by that number of 16 bit (Unicode) chars.
  177.     public static String compXmlStringAt(byte[] arr, int strOff) {
  178.         int strLen = arr[strOff + 1] << 8 & 0xff00 | arr[strOff] & 0xff;
  179.         byte[] chars = new byte[strLen];
  180.         for (int ii = 0; ii < strLen; ii++) {
  181.             chars[ii] = arr[strOff + 2 + ii * 2];
  182.         }
  183.         return new String(chars); // Hack, just use 8 byte chars
  184.     } // end of compXmlStringAt
  185.  
  186.     // LEW -- Return value of a Little Endian 32 bit word from the byte array
  187.     // at offset off.
  188.     public static int LEW(byte[] arr, int off) {
  189.         return arr[off + 3] << 24 & 0xff000000 | arr[off + 2] << 16 & 0xff0000
  190.                 | arr[off + 1] << 8 & 0xff00 | arr[off] & 0xFF;
  191.     } // end of LEW
  192.    
  193.    
  194.    
  195.    
  196.     public static void main(String[] args) throws IOException {
  197.        
  198.         String fileName = args[0];
  199.         InputStream is = null;
  200.         ZipFile zip = null;
  201.        
  202.         if (fileName.endsWith(".apk") || fileName.endsWith(".zip")) {
  203.            
  204.             zip = new ZipFile(fileName);
  205.             ZipEntry mft = zip.getEntry("AndroidManifest.xml");
  206.             is = zip.getInputStream(mft);
  207.            
  208.         } else {
  209.             is = new FileInputStream(fileName);
  210.         }
  211.        
  212.         byte[] buf = new byte[10240];
  213.         int bytesRead = is.read(buf);
  214.        
  215.         is.close();
  216.         if (zip != null) {
  217.             zip.close();
  218.         }
  219.        
  220.         String xml = AndroidXMLDecompress.decompressXML(buf);
  221.        
  222.         System.out.println(xml);
  223.     }
  224. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement