Guest User

AndroidXMLDecompress

a guest
Jan 5th, 2012
2,178
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import java.io.FileInputStream;
  2. import java.io.FileNotFoundException;
  3. import java.io.IOException;
  4. import java.io.InputStream;
  5. import java.util.zip.ZipEntry;
  6. import java.util.zip.ZipFile;
  7.  
  8. class AndroidXMLDecompress {
  9.     // decompressXML -- Parse the 'compressed' binary form of Android XML docs
  10.     // such as for AndroidManifest.xml in .apk files
  11.     public static int endDocTag = 0x00100101;
  12.     public static int startTag = 0x00100102;
  13.     public static int endTag = 0x00100103;
  14.  
  15.     static void prt(String str) {
  16.         //System.err.print(str);
  17.     }
  18.    
  19.     public static String decompressXML(byte[] xml) {
  20.        
  21.         StringBuilder finalXML = new StringBuilder();
  22.        
  23.         // Compressed XML file/bytes starts with 24x bytes of data,
  24.         // 9 32 bit words in little endian order (LSB first):
  25.         // 0th word is 03 00 08 00
  26.         // 3rd word SEEMS TO BE: Offset at then of StringTable
  27.         // 4th word is: Number of strings in string table
  28.         // WARNING: Sometime I indiscriminently display or refer to word in
  29.         // little endian storage format, or in integer format (ie MSB first).
  30.         int numbStrings = LEW(xml, 4 * 4);
  31.  
  32.         // StringIndexTable starts at offset 24x, an array of 32 bit LE offsets
  33.         // of the length/string data in the StringTable.
  34.         int sitOff = 0x24; // Offset of start of StringIndexTable
  35.  
  36.         // StringTable, each string is represented with a 16 bit little endian
  37.         // character count, followed by that number of 16 bit (LE) (Unicode)
  38.         // chars.
  39.         int stOff = sitOff + numbStrings * 4; // StringTable follows
  40.                                                 // StrIndexTable
  41.  
  42.         // XMLTags, The XML tag tree starts after some unknown content after the
  43.         // StringTable. There is some unknown data after the StringTable, scan
  44.         // forward from this point to the flag for the start of an XML start
  45.         // tag.
  46.         int xmlTagOff = LEW(xml, 3 * 4); // Start from the offset in the 3rd
  47.                                             // word.
  48.         // Scan forward until we find the bytes: 0x02011000(x00100102 in normal
  49.         // int)
  50.         for (int ii = xmlTagOff; ii < xml.length - 4; ii += 4) {
  51.             if (LEW(xml, ii) == startTag) {
  52.                 xmlTagOff = ii;
  53.                 break;
  54.             }
  55.         } // end of hack, scanning for start of first start tag
  56.  
  57.         // XML tags and attributes:
  58.         // Every XML start and end tag consists of 6 32 bit words:
  59.         // 0th word: 02011000 for startTag and 03011000 for endTag
  60.         // 1st word: a flag?, like 38000000
  61.         // 2nd word: Line of where this tag appeared in the original source file
  62.         // 3rd word: FFFFFFFF ??
  63.         // 4th word: StringIndex of NameSpace name, or FFFFFFFF for default NS
  64.         // 5th word: StringIndex of Element Name
  65.         // (Note: 01011000 in 0th word means end of XML document, endDocTag)
  66.  
  67.         // Start tags (not end tags) contain 3 more words:
  68.         // 6th word: 14001400 meaning??
  69.         // 7th word: Number of Attributes that follow this tag(follow word 8th)
  70.         // 8th word: 00000000 meaning??
  71.  
  72.         // Attributes consist of 5 words:
  73.         // 0th word: StringIndex of Attribute Name's Namespace, or FFFFFFFF
  74.         // 1st word: StringIndex of Attribute Name
  75.         // 2nd word: StringIndex of Attribute Value, or FFFFFFF if ResourceId
  76.         // used
  77.         // 3rd word: Flags?
  78.         // 4th word: str ind of attr value again, or ResourceId of value
  79.  
  80.         // TMP, dump string table to tr for debugging
  81.         // tr.addSelect("strings", null);
  82.         // for (int ii=0; ii<numbStrings; ii++) {
  83.         // // Length of string starts at StringTable plus offset in StrIndTable
  84.         // String str = compXmlString(xml, sitOff, stOff, ii);
  85.         // tr.add(String.valueOf(ii), str);
  86.         // }
  87.         // tr.parent();
  88.  
  89.         // Step through the XML tree element tags and attributes
  90.         int off = xmlTagOff;
  91.         int indent = 0;
  92.         int startTagLineNo = -2;
  93.         while (off < xml.length) {
  94.             int tag0 = LEW(xml, off);
  95.             // int tag1 = LEW(xml, off+1*4);
  96.             int lineNo = LEW(xml, off + 2 * 4);
  97.             // int tag3 = LEW(xml, off+3*4);
  98.             int nameNsSi = LEW(xml, off + 4 * 4);
  99.             int nameSi = LEW(xml, off + 5 * 4);
  100.  
  101.             if (tag0 == startTag) { // XML START TAG
  102.                 int tag6 = LEW(xml, off + 6 * 4); // Expected to be 14001400
  103.                 int numbAttrs = LEW(xml, off + 7 * 4); // Number of Attributes
  104.                                                         // to follow
  105.                 // int tag8 = LEW(xml, off+8*4); // Expected to be 00000000
  106.                 off += 9 * 4; // Skip over 6+3 words of startTag data
  107.                 String name = compXmlString(xml, sitOff, stOff, nameSi);
  108.                 // tr.addSelect(name, null);
  109.                 startTagLineNo = lineNo;
  110.  
  111.                 // Look for the Attributes
  112.                 StringBuffer sb = new StringBuffer();
  113.                 for (int ii = 0; ii < numbAttrs; ii++) {
  114.                     int attrNameNsSi = LEW(xml, off); // AttrName Namespace Str
  115.                                                         // Ind, or FFFFFFFF
  116.                     int attrNameSi = LEW(xml, off + 1 * 4); // AttrName String
  117.                                                             // Index
  118.                     int attrValueSi = LEW(xml, off + 2 * 4); // AttrValue Str
  119.                                                                 // Ind, or
  120.                                                                 // FFFFFFFF
  121.                     int attrFlags = LEW(xml, off + 3 * 4);
  122.                     int attrResId = LEW(xml, off + 4 * 4); // AttrValue
  123.                                                             // ResourceId or dup
  124.                                                             // AttrValue StrInd
  125.                     off += 5 * 4; // Skip over the 5 words of an attribute
  126.  
  127.                     String attrName = compXmlString(xml, sitOff, stOff,
  128.                             attrNameSi);
  129.                     String attrValue = attrValueSi != -1 ? compXmlString(xml,
  130.                             sitOff, stOff, attrValueSi) : "resourceID 0x"
  131.                             + Integer.toHexString(attrResId);
  132.                     sb.append(" " + attrName + "=\"" + attrValue + "\"");
  133.                     // tr.add(attrName, attrValue);
  134.                 }
  135.                 finalXML.append("<" + name + sb + ">");
  136.                 prtIndent(indent, "<" + name + sb + ">");
  137.                 indent++;
  138.  
  139.             } else if (tag0 == endTag) { // XML END TAG
  140.                 indent--;
  141.                 off += 6 * 4; // Skip over 6 words of endTag data
  142.                 String name = compXmlString(xml, sitOff, stOff, nameSi);
  143.                 finalXML.append("</" + name + ">");
  144.                 prtIndent(indent, "</" + name + "> (line " + startTagLineNo
  145.                          + "-" + lineNo + ")");
  146.                 // tr.parent(); // Step back up the NobTree
  147.  
  148.             } else if (tag0 == endDocTag) { // END OF XML DOC TAG
  149.                 break;
  150.  
  151.             } else {
  152.                 prt("  Unrecognized tag code '" + Integer.toHexString(tag0)
  153.                         + "' at offset " + off);
  154.                 break;
  155.             }
  156.         } // end of while loop scanning tags and attributes of XML tree
  157.         //prt("    end at offset " + off);
  158.         return finalXML.toString();
  159.     } // end of decompressXML
  160.  
  161.     public static String compXmlString(byte[] xml, int sitOff, int stOff, int strInd) {
  162.         if (strInd < 0)
  163.             return null;
  164.         int strOff = stOff + LEW(xml, sitOff + strInd * 4);
  165.         return compXmlStringAt(xml, strOff);
  166.     }
  167.  
  168.     public static String spaces = "                                             ";
  169.  
  170.     public static void prtIndent(int indent, String str) {
  171.         prt(spaces.substring(0, Math.min(indent * 2, spaces.length())) + str);
  172.     }
  173.  
  174.     // compXmlStringAt -- Return the string stored in StringTable format at
  175.     // offset strOff. This offset points to the 16 bit string length, which
  176.     // is followed by that number of 16 bit (Unicode) chars.
  177.     public static String compXmlStringAt(byte[] arr, int strOff) {
  178.         int strLen = arr[strOff + 1] << 8 & 0xff00 | arr[strOff] & 0xff;
  179.         byte[] chars = new byte[strLen];
  180.         for (int ii = 0; ii < strLen; ii++) {
  181.             chars[ii] = arr[strOff + 2 + ii * 2];
  182.         }
  183.         return new String(chars); // Hack, just use 8 byte chars
  184.     } // end of compXmlStringAt
  185.  
  186.     // LEW -- Return value of a Little Endian 32 bit word from the byte array
  187.     // at offset off.
  188.     public static int LEW(byte[] arr, int off) {
  189.         return arr[off + 3] << 24 & 0xff000000 | arr[off + 2] << 16 & 0xff0000
  190.                 | arr[off + 1] << 8 & 0xff00 | arr[off] & 0xFF;
  191.     } // end of LEW
  192.    
  193.    
  194.    
  195.    
  196.     public static void main(String[] args) throws IOException {
  197.        
  198.         String fileName = args[0];
  199.         InputStream is = null;
  200.         ZipFile zip = null;
  201.        
  202.         if (fileName.endsWith(".apk") || fileName.endsWith(".zip")) {
  203.            
  204.             zip = new ZipFile(fileName);
  205.             ZipEntry mft = zip.getEntry("AndroidManifest.xml");
  206.             is = zip.getInputStream(mft);
  207.            
  208.         } else {
  209.             is = new FileInputStream(fileName);
  210.         }
  211.        
  212.         byte[] buf = new byte[10240];
  213.         int bytesRead = is.read(buf);
  214.        
  215.         is.close();
  216.         if (zip != null) {
  217.             zip.close();
  218.         }
  219.        
  220.         String xml = AndroidXMLDecompress.decompressXML(buf);
  221.        
  222.         System.out.println(xml);
  223.     }
  224. }
RAW Paste Data

Adblocker detected! Please consider disabling it...

We've detected AdBlock Plus or some other adblocking software preventing Pastebin.com from fully loading.

We don't have any obnoxious sound, or popup ads, we actively block these annoying types of ads!

Please add Pastebin.com to your ad blocker whitelist or disable your adblocking software.

×