Guest User

Untitled

a guest
Mar 3rd, 2015
243
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.61 KB | None | 0 0
  1. java.io.IOException: Not in GZIP format
  2. at java.util.zip.GZIPInputStream.readHeader(Unknown Source)
  3. at java.util.zip.GZIPInputStream.<init>(Unknown Source)
  4. at java.util.zip.GZIPInputStream.<init>(Unknown Source)
  5. at GZipTest.main(GZipTest.java:16)
  6.  
  7. java.util.zip.ZipException: error in opening zip file
  8. at java.util.zip.ZipFile.open(Native Method)
  9. at java.util.zip.ZipFile.<init>(Unknown Source)
  10. at java.util.zip.ZipFile.<init>(Unknown Source)
  11. at GZipTest.main(GZipTest.java:21)
  12.  
  13. From: "Saved by Windows Internet Explorer 7"
  14.  
  15. Subject: Google
  16. Date: Tue, 13 Jul 2010 21:23:03 +0530
  17. MIME-Version: 1.0
  18.  
  19. Content-Type: multipart/related;
  20. type="text/html";
  21.  
  22. boundary="----=_NextPart_000_0007_01CB22D1.93BBD1A0"
  23.  
  24. ------=_NextPart_000_0007_01CB22D1.93BBD1A0
  25. Content-Type: text/html;
  26. charset="utf-8"
  27. Content-Transfer-Encoding: quoted-printable
  28. Content-Location: http://www.google.com/webhp?sourceid=navclient&ie=UTF-8
  29.  
  30. <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" =
  31. .
  32. .
  33. .
  34.  
  35. public interface IConstants
  36. {
  37. public String BOUNDARY = "boundary";
  38. public String CHAR_SET = "charset";
  39. public String CONTENT_TYPE = "Content-Type";
  40. public String CONTENT_TRANSFER_ENCODING = "Content-Transfer-Encoding";
  41. public String CONTENT_LOCATION = "Content-Location";
  42.  
  43. public String UTF8_BOM = "=EF=BB=BF";
  44.  
  45. public String UTF16_BOM1 = "=FF=FE";
  46. public String UTF16_BOM2 = "=FE=FF";
  47. }
  48.  
  49. /**
  50. * This program and the accompanying materials are made available under the terms of the Eclipse Public License v1.0
  51. * which accompanies this distribution, and is available at
  52. * http://www.eclipse.org/legal/epl-v10.html
  53. */
  54. package com.test.mht.core;
  55.  
  56. import java.io.BufferedOutputStream;
  57. import java.io.BufferedReader;
  58. import java.io.BufferedWriter;
  59. import java.io.File;
  60. import java.io.FileOutputStream;
  61. import java.io.FileReader;
  62. import java.io.OutputStreamWriter;
  63. import java.util.regex.Matcher;
  64. import java.util.regex.Pattern;
  65.  
  66. import sun.misc.BASE64Decoder;
  67.  
  68. /**
  69. * File to parse and decompose *.mts file in its constituting parts.
  70. * @author Manish Shukla
  71. */
  72.  
  73. public class MHTParser implements IConstants
  74. {
  75. private File mhtFile;
  76. private File outputFolder;
  77.  
  78. public MHTParser(File mhtFile, File outputFolder) {
  79. this.mhtFile = mhtFile;
  80. this.outputFolder = outputFolder;
  81. }
  82.  
  83. /**
  84. * @throws Exception
  85. */
  86. public void decompress() throws Exception
  87. {
  88. BufferedReader reader = null;
  89.  
  90. String type = "";
  91. String encoding = "";
  92. String location = "";
  93. String filename = "";
  94. String charset = "utf-8";
  95. StringBuilder buffer = null;
  96.  
  97. try
  98. {
  99. reader = new BufferedReader(new FileReader(mhtFile));
  100.  
  101. final String boundary = getBoundary(reader);
  102. if(boundary == null)
  103. throw new Exception("Failed to find document 'boundary'... Aborting");
  104.  
  105. String line = null;
  106. int i = 1;
  107. while((line = reader.readLine()) != null)
  108. {
  109. String temp = line.trim();
  110. if(temp.contains(boundary))
  111. {
  112. if(buffer != null) {
  113. writeBufferContentToFile(buffer,encoding,filename,charset);
  114. buffer = null;
  115. }
  116.  
  117. buffer = new StringBuilder();
  118. }else if(temp.startsWith(CONTENT_TYPE)) {
  119. type = getType(temp);
  120. }else if(temp.startsWith(CHAR_SET)) {
  121. charset = getCharSet(temp);
  122. }else if(temp.startsWith(CONTENT_TRANSFER_ENCODING)) {
  123. encoding = getEncoding(temp);
  124. }else if(temp.startsWith(CONTENT_LOCATION)) {
  125. location = temp.substring(temp.indexOf(":")+1).trim();
  126. i++;
  127. filename = getFileName(location,type);
  128. }else {
  129. if(buffer != null) {
  130. buffer.append(line + "n");
  131. }
  132. }
  133. }
  134.  
  135. }finally
  136. {
  137. if(null != reader)
  138. reader.close();
  139. }
  140.  
  141. }
  142.  
  143. private String getCharSet(String temp)
  144. {
  145. String t = temp.split("=")[1].trim();
  146. return t.substring(1, t.length()-1);
  147. }
  148.  
  149. /**
  150. * Save the file as per character set and encoding
  151. */
  152. private void writeBufferContentToFile(StringBuilder buffer,String encoding, String filename, String charset)
  153. throws Exception
  154. {
  155.  
  156. if(!outputFolder.exists())
  157. outputFolder.mkdirs();
  158.  
  159. byte[] content = null;
  160.  
  161. boolean text = true;
  162.  
  163. if(encoding.equalsIgnoreCase("base64")){
  164. content = getBase64EncodedString(buffer);
  165. text = false;
  166. }else if(encoding.equalsIgnoreCase("quoted-printable")) {
  167. content = getQuotedPrintableString(buffer);
  168. }
  169. else
  170. content = buffer.toString().getBytes();
  171.  
  172. if(!text)
  173. {
  174. BufferedOutputStream bos = null;
  175. try
  176. {
  177. bos = new BufferedOutputStream(new FileOutputStream(filename));
  178. bos.write(content);
  179. bos.flush();
  180. }finally {
  181. bos.close();
  182. }
  183. }else
  184. {
  185. BufferedWriter bw = null;
  186. try
  187. {
  188. bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(filename), charset));
  189. bw.write(new String(content));
  190. bw.flush();
  191. }finally {
  192. bw.close();
  193. }
  194. }
  195. }
  196.  
  197. /**
  198. * When the save the *.mts file with 'utf-8' encoding then it appends '=EF=BB=BF'</br>
  199. * @see http://en.wikipedia.org/wiki/Byte_order_mark
  200. */
  201. private byte[] getQuotedPrintableString(StringBuilder buffer)
  202. {
  203. //Set<String> uniqueHex = new HashSet<String>();
  204. //final Pattern p = Pattern.compile("(=\p{XDigit}{2})*");
  205.  
  206. String temp = buffer.toString().replaceAll(UTF8_BOM, "").replaceAll("=n", "");
  207.  
  208. //Matcher m = p.matcher(temp);
  209. //while(m.find()) {
  210. // uniqueHex.add(m.group());
  211. //}
  212.  
  213. //System.out.println(uniqueHex);
  214.  
  215. //for (String hex : uniqueHex) {
  216. //temp = temp.replaceAll(hex, getASCIIValue(hex.substring(1)));
  217. //}
  218.  
  219. return temp.getBytes();
  220. }
  221.  
  222. /*private String getASCIIValue(String hex) {
  223. return ""+(char)Integer.parseInt(hex, 16);
  224. }*/
  225. /**
  226. * Although system dependent..it works well
  227. */
  228. private byte[] getBase64EncodedString(StringBuilder buffer) throws Exception {
  229. return new BASE64Decoder().decodeBuffer(buffer.toString());
  230. }
  231.  
  232. /**
  233. * Tries to get a qualified file name. If the name is not apparent it tries to guess it from the URL.
  234. * Otherwise it returns 'unknown.<type>'
  235. */
  236. private String getFileName(String location, String type)
  237. {
  238. final Pattern p = Pattern.compile("(\w|_|-)+\.\w+");
  239. String ext = "";
  240. String name = "";
  241. if(type.toLowerCase().endsWith("jpeg"))
  242. ext = "jpg";
  243. else
  244. ext = type.split("/")[1];
  245.  
  246. if(location.endsWith("/")) {
  247. name = "main";
  248. }else
  249. {
  250. name = location.substring(location.lastIndexOf("/") + 1);
  251.  
  252. Matcher m = p.matcher(name);
  253. String fname = "";
  254. while(m.find()) {
  255. fname = m.group();
  256. }
  257.  
  258. if(fname.trim().length() == 0)
  259. name = "unknown";
  260. else
  261. return getUniqueName(fname.substring(0,fname.indexOf(".")), fname.substring(fname.indexOf(".") + 1, fname.length()));
  262. }
  263. return getUniqueName(name,ext);
  264. }
  265.  
  266. /**
  267. * Returns a qualified unique output file path for the parsed path.</br>
  268. * In case the file already exist it appends a numarical value a continues
  269. */
  270. private String getUniqueName(String name,String ext)
  271. {
  272. int i = 1;
  273. File file = new File(outputFolder,name + "." + ext);
  274. if(file.exists())
  275. {
  276. while(true)
  277. {
  278. file = new File(outputFolder, name + i + "." + ext);
  279. if(!file.exists())
  280. return file.getAbsolutePath();
  281. i++;
  282. }
  283. }
  284.  
  285. return file.getAbsolutePath();
  286. }
  287.  
  288. private String getType(String line) {
  289. return splitUsingColonSpace(line);
  290. }
  291.  
  292. private String getEncoding(String line){
  293. return splitUsingColonSpace(line);
  294. }
  295.  
  296. private String splitUsingColonSpace(String line) {
  297. return line.split(":\s*")[1].replaceAll(";", "");
  298. }
  299.  
  300. /**
  301. * Gives you the boundary string
  302. */
  303. private String getBoundary(BufferedReader reader) throws Exception
  304. {
  305. String line = null;
  306.  
  307. while((line = reader.readLine()) != null)
  308. {
  309. line = line.trim();
  310. if(line.startsWith(BOUNDARY)) {
  311. return line.substring(line.indexOf(""") + 1, line.lastIndexOf("""));
  312. }
  313. }
  314.  
  315. return null;
  316. }
  317. }
  318.  
  319. <dependency>
  320. <groupId>org.apache.james</groupId>
  321. <artifactId>apache-mime4j</artifactId>
  322. <version>0.7.2</version>
  323. </dependency>
  324.  
  325. public static void main(String[] args)
  326. {
  327. MessageTree.main(new String[]{"YOU MHT FILE PATH"});
  328. }
  329.  
  330. /**
  331. * Displays a parsed Message in a window. The window will be divided into
  332. * two panels. The left panel displays the Message tree. Clicking on a
  333. * node in the tree shows information on that node in the right panel.
  334. *
  335. * Some of this code have been copied from the Java tutorial's JTree section.
  336. */
Add Comment
Please, Sign In to add comment