import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; class AndroidXMLDecompress { // decompressXML -- Parse the 'compressed' binary form of Android XML docs // such as for AndroidManifest.xml in .apk files public static int endDocTag = 0x00100101; public static int startTag = 0x00100102; public static int endTag = 0x00100103; static void prt(String str) { //System.err.print(str); } public static String decompressXML(byte[] xml) { StringBuilder finalXML = new StringBuilder(); // Compressed XML file/bytes starts with 24x bytes of data, // 9 32 bit words in little endian order (LSB first): // 0th word is 03 00 08 00 // 3rd word SEEMS TO BE: Offset at then of StringTable // 4th word is: Number of strings in string table // WARNING: Sometime I indiscriminently display or refer to word in // little endian storage format, or in integer format (ie MSB first). int numbStrings = LEW(xml, 4 * 4); // StringIndexTable starts at offset 24x, an array of 32 bit LE offsets // of the length/string data in the StringTable. int sitOff = 0x24; // Offset of start of StringIndexTable // StringTable, each string is represented with a 16 bit little endian // character count, followed by that number of 16 bit (LE) (Unicode) // chars. int stOff = sitOff + numbStrings * 4; // StringTable follows // StrIndexTable // XMLTags, The XML tag tree starts after some unknown content after the // StringTable. There is some unknown data after the StringTable, scan // forward from this point to the flag for the start of an XML start // tag. int xmlTagOff = LEW(xml, 3 * 4); // Start from the offset in the 3rd // word. // Scan forward until we find the bytes: 0x02011000(x00100102 in normal // int) for (int ii = xmlTagOff; ii < xml.length - 4; ii += 4) { if (LEW(xml, ii) == startTag) { xmlTagOff = ii; break; } } // end of hack, scanning for start of first start tag // XML tags and attributes: // Every XML start and end tag consists of 6 32 bit words: // 0th word: 02011000 for startTag and 03011000 for endTag // 1st word: a flag?, like 38000000 // 2nd word: Line of where this tag appeared in the original source file // 3rd word: FFFFFFFF ?? // 4th word: StringIndex of NameSpace name, or FFFFFFFF for default NS // 5th word: StringIndex of Element Name // (Note: 01011000 in 0th word means end of XML document, endDocTag) // Start tags (not end tags) contain 3 more words: // 6th word: 14001400 meaning?? // 7th word: Number of Attributes that follow this tag(follow word 8th) // 8th word: 00000000 meaning?? // Attributes consist of 5 words: // 0th word: StringIndex of Attribute Name's Namespace, or FFFFFFFF // 1st word: StringIndex of Attribute Name // 2nd word: StringIndex of Attribute Value, or FFFFFFF if ResourceId // used // 3rd word: Flags? // 4th word: str ind of attr value again, or ResourceId of value // TMP, dump string table to tr for debugging // tr.addSelect("strings", null); // for (int ii=0; ii"); prtIndent(indent, "<" + name + sb + ">"); indent++; } else if (tag0 == endTag) { // XML END TAG indent--; off += 6 * 4; // Skip over 6 words of endTag data String name = compXmlString(xml, sitOff, stOff, nameSi); finalXML.append(""); prtIndent(indent, " (line " + startTagLineNo + "-" + lineNo + ")"); // tr.parent(); // Step back up the NobTree } else if (tag0 == endDocTag) { // END OF XML DOC TAG break; } else { prt(" Unrecognized tag code '" + Integer.toHexString(tag0) + "' at offset " + off); break; } } // end of while loop scanning tags and attributes of XML tree //prt(" end at offset " + off); return finalXML.toString(); } // end of decompressXML public static String compXmlString(byte[] xml, int sitOff, int stOff, int strInd) { if (strInd < 0) return null; int strOff = stOff + LEW(xml, sitOff + strInd * 4); return compXmlStringAt(xml, strOff); } public static String spaces = " "; public static void prtIndent(int indent, String str) { prt(spaces.substring(0, Math.min(indent * 2, spaces.length())) + str); } // compXmlStringAt -- Return the string stored in StringTable format at // offset strOff. This offset points to the 16 bit string length, which // is followed by that number of 16 bit (Unicode) chars. public static String compXmlStringAt(byte[] arr, int strOff) { int strLen = arr[strOff + 1] << 8 & 0xff00 | arr[strOff] & 0xff; byte[] chars = new byte[strLen]; for (int ii = 0; ii < strLen; ii++) { chars[ii] = arr[strOff + 2 + ii * 2]; } return new String(chars); // Hack, just use 8 byte chars } // end of compXmlStringAt // LEW -- Return value of a Little Endian 32 bit word from the byte array // at offset off. public static int LEW(byte[] arr, int off) { return arr[off + 3] << 24 & 0xff000000 | arr[off + 2] << 16 & 0xff0000 | arr[off + 1] << 8 & 0xff00 | arr[off] & 0xFF; } // end of LEW public static void main(String[] args) throws IOException { String fileName = args[0]; InputStream is = null; ZipFile zip = null; if (fileName.endsWith(".apk") || fileName.endsWith(".zip")) { zip = new ZipFile(fileName); ZipEntry mft = zip.getEntry("AndroidManifest.xml"); is = zip.getInputStream(mft); } else { is = new FileInputStream(fileName); } byte[] buf = new byte[10240]; int bytesRead = is.read(buf); is.close(); if (zip != null) { zip.close(); } String xml = AndroidXMLDecompress.decompressXML(buf); System.out.println(xml); } }