Want more features on Pastebin? Sign Up, it's FREE!
Guest

pHash-like image hash for java

By: a guest on Sep 13th, 2011  |  syntax: Java  |  size: 5.01 KB  |  views: 5,751  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. import java.awt.Graphics2D;
  2. import java.awt.color.ColorSpace;
  3. import java.awt.image.BufferedImage;
  4. import java.awt.image.ColorConvertOp;
  5. import java.io.InputStream;
  6.  
  7. import javax.imageio.ImageIO;
  8. /*
  9.  * pHash-like image hash.
  10.  * Author: Elliot Shepherd (elliot@jarofworms.com
  11.  * Based On: http://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html
  12.  */
  13. public class ImagePHash {
  14.  
  15.         private int size = 32;
  16.         private int smallerSize = 8;
  17.        
  18.         public ImagePHash() {
  19.                 initCoefficients();
  20.         }
  21.        
  22.         public ImagePHash(int size, int smallerSize) {
  23.                 this.size = size;
  24.                 this.smallerSize = smallerSize;
  25.                
  26.                 initCoefficients();
  27.         }
  28.        
  29.         public int distance(String s1, String s2) {
  30.                 int counter = 0;
  31.                 for (int k = 0; k < s1.length();k++) {
  32.                         if(s1.charAt(k) != s2.charAt(k)) {
  33.                                 counter++;
  34.                         }
  35.                 }
  36.                 return counter;
  37.         }
  38.        
  39.         // Returns a 'binary string' (like. 001010111011100010) which is easy to do a hamming distance on.
  40.         public String getHash(InputStream is) throws Exception {
  41.                 BufferedImage img = ImageIO.read(is);
  42.                
  43.                 /* 1. Reduce size.
  44.                  * Like Average Hash, pHash starts with a small image.
  45.                  * However, the image is larger than 8x8; 32x32 is a good size.
  46.                  * This is really done to simplify the DCT computation and not
  47.                  * because it is needed to reduce the high frequencies.
  48.                  */
  49.                 img = resize(img, size, size);
  50.                
  51.                 /* 2. Reduce color.
  52.                  * The image is reduced to a grayscale just to further simplify
  53.                  * the number of computations.
  54.                  */
  55.                 img = grayscale(img);
  56.                
  57.                 double[][] vals = new double[size][size];
  58.                
  59.                 for (int x = 0; x < img.getWidth(); x++) {
  60.                         for (int y = 0; y < img.getHeight(); y++) {
  61.                                 vals[x][y] = getBlue(img, x, y);
  62.                         }
  63.                 }
  64.                
  65.                 /* 3. Compute the DCT.
  66.                  * The DCT separates the image into a collection of frequencies
  67.                  * and scalars. While JPEG uses an 8x8 DCT, this algorithm uses
  68.                  * a 32x32 DCT.
  69.                  */
  70.                 long start = System.currentTimeMillis();
  71.                 double[][] dctVals = applyDCT(vals);
  72.                 System.out.println("DCT: " + (System.currentTimeMillis() - start));
  73.                
  74.                 /* 4. Reduce the DCT.
  75.                  * This is the magic step. While the DCT is 32x32, just keep the
  76.                  * top-left 8x8. Those represent the lowest frequencies in the
  77.                  * picture.
  78.                  */
  79.                 /* 5. Compute the average value.
  80.                  * Like the Average Hash, compute the mean DCT value (using only
  81.                  * the 8x8 DCT low-frequency values and excluding the first term
  82.                  * since the DC coefficient can be significantly different from
  83.                  * the other values and will throw off the average).
  84.                  */
  85.                 double total = 0;
  86.                
  87.                 for (int x = 0; x < smallerSize; x++) {
  88.                         for (int y = 0; y < smallerSize; y++) {
  89.                                 total += dctVals[x][y];
  90.                         }
  91.                 }
  92.                 total -= dctVals[0][0];
  93.                
  94.                 double avg = total / (double) ((smallerSize * smallerSize) - 1);
  95.        
  96.                 /* 6. Further reduce the DCT.
  97.                  * This is the magic step. Set the 64 hash bits to 0 or 1
  98.                  * depending on whether each of the 64 DCT values is above or
  99.                  * below the average value. The result doesn't tell us the
  100.                  * actual low frequencies; it just tells us the very-rough
  101.                  * relative scale of the frequencies to the mean. The result
  102.                  * will not vary as long as the overall structure of the image
  103.                  * remains the same; this can survive gamma and color histogram
  104.                  * adjustments without a problem.
  105.                  */
  106.                 String hash = "";
  107.                
  108.                 for (int x = 0; x < smallerSize; x++) {
  109.                         for (int y = 0; y < smallerSize; y++) {
  110.                                 if (x != 0 && y != 0) {
  111.                                         hash += (dctVals[x][y] > avg?"1":"0");
  112.                                 }
  113.                         }
  114.                 }
  115.                
  116.                 return hash;
  117.         }
  118.        
  119.         private BufferedImage resize(BufferedImage image, int width,    int height) {
  120.                 BufferedImage resizedImage = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB);
  121.                 Graphics2D g = resizedImage.createGraphics();
  122.                 g.drawImage(image, 0, 0, width, height, null);
  123.                 g.dispose();
  124.                 return resizedImage;
  125.         }
  126.        
  127.         private ColorConvertOp colorConvert = new ColorConvertOp(ColorSpace.getInstance(ColorSpace.CS_GRAY), null);
  128.  
  129.         private BufferedImage grayscale(BufferedImage img) {
  130.         colorConvert.filter(img, img);
  131.         return img;
  132.     }
  133.        
  134.         private static int getBlue(BufferedImage img, int x, int y) {
  135.                 return (img.getRGB(x, y)) & 0xff;
  136.         }
  137.        
  138.         // DCT function stolen from http://stackoverflow.com/questions/4240490/problems-with-dct-and-idct-algorithm-in-java
  139.  
  140.         private double[] c;
  141.         private void initCoefficients() {
  142.                 c = new double[size];
  143.                
  144.         for (int i=1;i<size;i++) {
  145.             c[i]=1;
  146.         }
  147.         c[0]=1/Math.sqrt(2.0);
  148.     }
  149.        
  150.         private double[][] applyDCT(double[][] f) {
  151.                 int N = size;
  152.                
  153.         double[][] F = new double[N][N];
  154.         for (int u=0;u<N;u++) {
  155.           for (int v=0;v<N;v++) {
  156.             double sum = 0.0;
  157.             for (int i=0;i<N;i++) {
  158.               for (int j=0;j<N;j++) {
  159.                 sum+=Math.cos(((2*i+1)/(2.0*N))*u*Math.PI)*Math.cos(((2*j+1)/(2.0*N))*v*Math.PI)*(f[i][j]);
  160.               }
  161.             }
  162.             sum*=((c[u]*c[v])/4.0);
  163.             F[u][v] = sum;
  164.           }
  165.         }
  166.         return F;
  167.     }
  168.  
  169. }
clone this paste RAW Paste Data