Advertisement
nazar_art

FileScan - ProbeContentFile => exception

Mar 12th, 2013
205
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 9.17 KB | None | 0 0
  1. package task;
  2.  
  3. import java.io.BufferedReader;
  4. import java.io.File;
  5. import java.io.FileInputStream;
  6. import java.io.FileNotFoundException;
  7. import java.io.IOException;
  8. import java.io.InputStreamReader;
  9. import java.nio.file.Files;
  10. import java.nio.file.Path;
  11. import java.nio.file.Paths;
  12. import java.util.ArrayList;
  13. import java.util.Arrays;
  14. import java.util.HashSet;
  15. import java.util.List;
  16. import java.util.Scanner;
  17. import java.util.Set;
  18. import java.util.concurrent.BlockingQueue;
  19. import java.util.concurrent.CountDownLatch;
  20. import java.util.concurrent.Executor;
  21. import java.util.concurrent.Executors;
  22. import java.util.concurrent.LinkedBlockingQueue;
  23. import net.sf.jmimemagic.Magic;
  24. import net.sf.jmimemagic.MagicException;
  25. import net.sf.jmimemagic.MagicMatch;
  26. import net.sf.jmimemagic.MagicMatchNotFoundException;
  27. import net.sf.jmimemagic.MagicParseException;
  28.  
  29. public class FileScan implements Runnable {
  30.     private String whatFind;
  31.     private BlockingQueue<File> queue;
  32.     private CountDownLatch latch;
  33.     private File endOfWorkFile;
  34.  
  35.     public FileScan(String whatFind, BlockingQueue<File> queue,
  36.             CountDownLatch latch, File endOfWorkFile) {
  37.         this.whatFind = whatFind;
  38.         this.queue = queue;
  39.         this.latch = latch;
  40.         this.endOfWorkFile = endOfWorkFile;
  41.     }
  42.  
  43.     public FileScan() {
  44.     }
  45.  
  46.     @Override
  47.     public void run() {
  48.  
  49.         while (true) {
  50.             try {
  51.                 File file;
  52.                 file = queue.take();
  53.  
  54.                 if (file == endOfWorkFile) {
  55.                     break;
  56.                 }
  57.  
  58.                 scan(file);
  59.             } catch (InterruptedException e) {
  60.                 e.printStackTrace();
  61.             }
  62.         }
  63.  
  64.         latch.countDown();
  65.     }
  66.  
  67.     private void scan(File file) {
  68.         Scanner scanner = null;
  69.         int matches = 0;
  70.  
  71.         try {
  72.             scanner = new Scanner(file);
  73.         } catch (FileNotFoundException e) {
  74.             System.out.println("File Not Found.");
  75.             e.printStackTrace();
  76.         }
  77.  
  78.         while (scanner.hasNext())
  79.             if (scanner.next().equals(whatFind)) {
  80.                 matches++;
  81.             }
  82.  
  83.         if (matches > 0) {
  84.             String myStr = String.format(
  85.                     "File: %s - and the number of matches " + "is: %d",
  86.                     file.getAbsolutePath(), matches);
  87.             System.out.println(myStr);
  88.         }
  89.     }
  90.  
  91.     public void askUserPathAndWord() {
  92.         try {
  93.             tryToAskUserPathAndWord();
  94.         } catch (IOException | RuntimeException e) {
  95.             System.out.println("Wrong input!");
  96.             e.printStackTrace();
  97.         } catch (InterruptedException e) {
  98.             System.out.println("Interrupted.");
  99.             e.printStackTrace();
  100.         }
  101.     }
  102.  
  103.     private void tryToAskUserPathAndWord() throws IOException,
  104.             InterruptedException {
  105.         PathAndWord pathAndWord = readPathAndWord();
  106.  
  107.         if (pathAndWord.isProperlyInitialized()) {
  108.             performScan(pathAndWord, "GameOver.tmp");
  109.             System.out.println("Thank you!");
  110.         } else {
  111.             System.out.println("You did not enter anything");
  112.         }
  113.     }
  114.  
  115.     private PathAndWord readPathAndWord() throws IOException {
  116.         System.out
  117.                 .println("Please, enter a Path and Word (which you want to find):");
  118.  
  119.         BufferedReader bufferedReader = new BufferedReader(
  120.                 new InputStreamReader(System.in));
  121.  
  122.         String path = readPath(bufferedReader);
  123.         String whatFind = readWord(bufferedReader);
  124.         return new PathAndWord(path, whatFind);
  125.     }
  126.  
  127.     private String readPath(BufferedReader bufferedReader) throws IOException {
  128.         boolean ok = false;
  129.         File pathInput;
  130.  
  131.         do {
  132.             System.out.println("Please enter a Path:");
  133.             pathInput = new File(bufferedReader.readLine());
  134.             if (pathInput.exists() && pathInput.isDirectory()) {
  135.                 ok = true;
  136.             } else {
  137.                 System.out.println("Doesn't exist or isn't folder!");
  138.             }
  139.         } while (!ok);
  140.  
  141.         return pathInput.getAbsolutePath();
  142.     }
  143.  
  144.     private String readWord(BufferedReader bufferedReader) throws IOException {
  145.         System.out.println("Please enter a Word:");
  146.         return bufferedReader.readLine();
  147.     }
  148.  
  149.     private void performScan(PathAndWord pathAndWord, String endOfWorkFileName)
  150.             throws InterruptedException {
  151.         BlockingQueue<File> queue = new LinkedBlockingQueue<File>();
  152.  
  153.         File endOfWorkFile = new File(endOfWorkFileName);
  154.         CountDownLatch latch = new CountDownLatch(2);
  155.  
  156.         FolderScan folderScan = new FolderScan(pathAndWord.path, queue, latch,
  157.                 endOfWorkFile);
  158.         FileScan fileScan = new FileScan(pathAndWord.whatFind, queue, latch,
  159.                 endOfWorkFile);
  160.  
  161.         Executor executor = Executors.newCachedThreadPool();
  162.         executor.execute(folderScan);
  163.         executor.execute(fileScan);
  164.  
  165.         latch.await();
  166.     }
  167.  
  168.     /**
  169.      * @param args
  170.      */
  171.  
  172.     public static void main(String[] args) {
  173.         long startTime = System.currentTimeMillis();
  174.  
  175.         new FileScan().askUserPathAndWord();
  176.  
  177.         long stopTime = System.currentTimeMillis();
  178.         long elapsedTime = stopTime - startTime;
  179.         System.out.println("\nRuntime time " + elapsedTime + " milliseconds.");
  180.     }
  181. }
  182.  
  183. class FolderScan implements Runnable {
  184.  
  185.     FolderScan(String path, BlockingQueue<File> queue, CountDownLatch latch,
  186.             File endOfWorkFile) {
  187.         this.path = path;
  188.         this.queue = queue;
  189.         this.latch = latch;
  190.         this.endOfWorkFile = endOfWorkFile;
  191.  
  192.         checkers = new ArrayList<Checker>(Arrays.asList(
  193.          new ExtentionCheckers(), /*new ProbeContentTypeCheker(),*/
  194.         new EncodingsCheck() ));
  195.     }
  196.  
  197.     public FolderScan() {
  198.     }
  199.  
  200.     @Override
  201.     public void run() {
  202.         findFiles(path);
  203.         queue.add(endOfWorkFile);
  204.         latch.countDown();
  205.     }
  206.  
  207.     private void findFiles(String path) {
  208.  
  209.         try {
  210.             File root = new File(path);
  211.             File[] list = root.listFiles();
  212.             for (File currentFile : list) {
  213.                 boolean checksFailed = true;
  214.                 if (currentFile.isDirectory()) {
  215.                     findFiles(currentFile.getAbsolutePath());
  216.                 } else {
  217. //                   if (currentFile.getAbsolutePath().equals(
  218. //                   "E:\\Document\\!Nazar\\BOOKS\\)\\PDF\\readme.txt")) {
  219. //                   System.out.println();
  220. //                   }
  221.                     for (Checker currentChecker : checkers) {
  222.                         if (!currentChecker.check(currentFile)) {
  223.                             checksFailed = false;
  224.                             break;
  225.                         }
  226.                     }
  227.  
  228.                     if (checksFailed) {
  229.                         queue.put(currentFile);
  230.                     }
  231.                 }
  232.             }
  233.         } catch (InterruptedException e) {
  234.             e.printStackTrace();
  235.         }
  236.  
  237.     }
  238.  
  239.     private String path;
  240.     private BlockingQueue<File> queue;
  241.     private CountDownLatch latch;
  242.     private File endOfWorkFile;
  243.     private List<Checker> checkers;
  244. }
  245.  
  246. class PathAndWord {
  247.     final String path;
  248.     final String whatFind;
  249.  
  250.     PathAndWord(String path, String whatFind) {
  251.         this.path = path;
  252.         this.whatFind = whatFind;
  253.     }
  254.  
  255.     boolean isProperlyInitialized() {
  256.         return path != null && whatFind != null;
  257.     }
  258. }
  259.  
  260. class ExtentionCheckers implements Checker {
  261.     private String fileName;
  262.  
  263.     @Override
  264.     public boolean check(File currentFile) {
  265.         fileName = currentFile.getName().toLowerCase();
  266.         Set<String> extensions = new HashSet<String>(Arrays.asList(".txt",
  267.                 ".pdf", ".doc", ".docx", ".html", ".htm", ".xml", ".djvu",
  268.                 ".djv", ".rar", ".rtf"));
  269.  
  270.         if (extensions.contains(fileName.substring(fileName.lastIndexOf(".")))) {
  271.             return true;
  272.         }
  273.  
  274.         return false;
  275.     }
  276. }
  277.  
  278. class EncodingsCheck implements Checker {
  279.  
  280.     @Override
  281.     public boolean check(File currentFile) {
  282.         return isUTF8(currentFile);
  283.     }
  284.  
  285.     public static boolean isUTF8(File file) {
  286.         // validate input
  287.         if (null == file) {
  288.             throw new IllegalArgumentException("input file can't be null");
  289.         }
  290.         if (file.isDirectory()) {
  291.             throw new IllegalArgumentException(
  292.                     "input file refers to a directory");
  293.         }
  294.  
  295.         // read input file
  296.         byte[] buffer;
  297.         try {
  298.             buffer = readUTFHeaderBytes(file);
  299.         } catch (IOException e) {
  300.             throw new IllegalArgumentException(
  301.                     "Can't read input file, error = " + e.getLocalizedMessage());
  302.         }
  303.  
  304.         if (0 == (buffer[0] & 0x80)) {
  305.             return true; // ASCII subset character, fast path
  306.         } else if (0xF0 == (buffer[0] & 0xF8)) { // start of 4-byte sequence
  307.             if (buffer[3] >= buffer.length) {
  308.                 return false;
  309.             }
  310.             if ((0x80 == (buffer[1] & 0xC0)) && (0x80 == (buffer[2] & 0xC0))
  311.                     && (0x80 == (buffer[3] & 0xC0)))
  312.                 return true;
  313.         } else if (0xE0 == (buffer[0] & 0xF0)) { // start of 3-byte sequence
  314.             if (buffer[2] >= buffer.length) {
  315.                 return false;
  316.             }
  317.             if ((0x80 == (buffer[1] & 0xC0)) && (0x80 == (buffer[2] & 0xC0))) {
  318.                 return true;
  319.             }
  320.         } else if (0xC0 == (buffer[0] & 0xE0)) { // start of 2-byte sequence
  321.             if (buffer[1] >= buffer.length) {
  322.                 return false;
  323.             }
  324.             if (0x80 == (buffer[1] & 0xC0)) {
  325.                 return true;
  326.             }
  327.         }
  328.  
  329.         return false;
  330.     }
  331.  
  332.     private static byte[] readUTFHeaderBytes(File input) throws IOException {
  333.         // read data
  334.         FileInputStream fileInputStream = new FileInputStream(input);
  335.         try{
  336.             byte firstBytes[] = new byte[4];
  337.             int count = fileInputStream.read(firstBytes);
  338.             if(count < 4){
  339.                 throw new IOException("Empty file");
  340.             }
  341.             return firstBytes;
  342.         } finally {
  343.             fileInputStream.close();
  344.         }
  345.     }
  346. }
  347.  
  348. class ProbeContentTypeCheker implements Checker {
  349.  
  350.     @Override
  351.     public boolean check(File currentFile) {
  352.         String mimeType = null;
  353.         try {
  354.             Path path = Paths.get(currentFile.getAbsolutePath());
  355.             byte[] data = Files.readAllBytes(path);
  356.             MagicMatch match = Magic.getMagicMatch(data);
  357.             mimeType = match.getMimeType();
  358.         } catch (MagicParseException | MagicMatchNotFoundException
  359.                 | MagicException | IOException e) {
  360.             e.printStackTrace();
  361.         }
  362.  
  363.         if (null != mimeType) {
  364.             return true;
  365.         }
  366.  
  367.         return false;
  368.     }
  369. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement