Guest User

pHash-like image hash for java

a guest
Sep 13th, 2011
13,504
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import java.awt.Graphics2D;
  2. import java.awt.color.ColorSpace;
  3. import java.awt.image.BufferedImage;
  4. import java.awt.image.ColorConvertOp;
  5. import java.io.InputStream;
  6.  
  7. import javax.imageio.ImageIO;
  8. /*
  9.  * pHash-like image hash.
  10.  * Author: Elliot Shepherd (elliot@jarofworms.com
  11.  * Based On: http://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html
  12.  */
  13. public class ImagePHash {
  14.  
  15.     private int size = 32;
  16.     private int smallerSize = 8;
  17.    
  18.     public ImagePHash() {
  19.         initCoefficients();
  20.     }
  21.    
  22.     public ImagePHash(int size, int smallerSize) {
  23.         this.size = size;
  24.         this.smallerSize = smallerSize;
  25.        
  26.         initCoefficients();
  27.     }
  28.    
  29.     public int distance(String s1, String s2) {
  30.         int counter = 0;
  31.         for (int k = 0; k < s1.length();k++) {
  32.             if(s1.charAt(k) != s2.charAt(k)) {
  33.                 counter++;
  34.             }
  35.         }
  36.         return counter;
  37.     }
  38.    
  39.     // Returns a 'binary string' (like. 001010111011100010) which is easy to do a hamming distance on.
  40.     public String getHash(InputStream is) throws Exception {
  41.         BufferedImage img = ImageIO.read(is);
  42.        
  43.         /* 1. Reduce size.
  44.          * Like Average Hash, pHash starts with a small image.
  45.          * However, the image is larger than 8x8; 32x32 is a good size.
  46.          * This is really done to simplify the DCT computation and not
  47.          * because it is needed to reduce the high frequencies.
  48.          */
  49.         img = resize(img, size, size);
  50.        
  51.         /* 2. Reduce color.
  52.          * The image is reduced to a grayscale just to further simplify
  53.          * the number of computations.
  54.          */
  55.         img = grayscale(img);
  56.        
  57.         double[][] vals = new double[size][size];
  58.        
  59.         for (int x = 0; x < img.getWidth(); x++) {
  60.             for (int y = 0; y < img.getHeight(); y++) {
  61.                 vals[x][y] = getBlue(img, x, y);
  62.             }
  63.         }
  64.        
  65.         /* 3. Compute the DCT.
  66.          * The DCT separates the image into a collection of frequencies
  67.          * and scalars. While JPEG uses an 8x8 DCT, this algorithm uses
  68.          * a 32x32 DCT.
  69.          */
  70.         long start = System.currentTimeMillis();
  71.         double[][] dctVals = applyDCT(vals);
  72.         System.out.println("DCT: " + (System.currentTimeMillis() - start));
  73.        
  74.         /* 4. Reduce the DCT.
  75.          * This is the magic step. While the DCT is 32x32, just keep the
  76.          * top-left 8x8. Those represent the lowest frequencies in the
  77.          * picture.
  78.          */
  79.         /* 5. Compute the average value.
  80.          * Like the Average Hash, compute the mean DCT value (using only
  81.          * the 8x8 DCT low-frequency values and excluding the first term
  82.          * since the DC coefficient can be significantly different from
  83.          * the other values and will throw off the average).
  84.          */
  85.         double total = 0;
  86.        
  87.         for (int x = 0; x < smallerSize; x++) {
  88.             for (int y = 0; y < smallerSize; y++) {
  89.                 total += dctVals[x][y];
  90.             }
  91.         }
  92.         total -= dctVals[0][0];
  93.        
  94.         double avg = total / (double) ((smallerSize * smallerSize) - 1);
  95.    
  96.         /* 6. Further reduce the DCT.
  97.          * This is the magic step. Set the 64 hash bits to 0 or 1
  98.          * depending on whether each of the 64 DCT values is above or
  99.          * below the average value. The result doesn't tell us the
  100.          * actual low frequencies; it just tells us the very-rough
  101.          * relative scale of the frequencies to the mean. The result
  102.          * will not vary as long as the overall structure of the image
  103.          * remains the same; this can survive gamma and color histogram
  104.          * adjustments without a problem.
  105.          */
  106.         String hash = "";
  107.        
  108.         for (int x = 0; x < smallerSize; x++) {
  109.             for (int y = 0; y < smallerSize; y++) {
  110.                 if (x != 0 && y != 0) {
  111.                     hash += (dctVals[x][y] > avg?"1":"0");
  112.                 }
  113.             }
  114.         }
  115.        
  116.         return hash;
  117.     }
  118.    
  119.     private BufferedImage resize(BufferedImage image, int width,    int height) {
  120.         BufferedImage resizedImage = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB);
  121.         Graphics2D g = resizedImage.createGraphics();
  122.         g.drawImage(image, 0, 0, width, height, null);
  123.         g.dispose();
  124.         return resizedImage;
  125.     }
  126.    
  127.     private ColorConvertOp colorConvert = new ColorConvertOp(ColorSpace.getInstance(ColorSpace.CS_GRAY), null);
  128.  
  129.     private BufferedImage grayscale(BufferedImage img) {
  130.         colorConvert.filter(img, img);
  131.         return img;
  132.     }
  133.    
  134.     private static int getBlue(BufferedImage img, int x, int y) {
  135.         return (img.getRGB(x, y)) & 0xff;
  136.     }
  137.    
  138.     // DCT function stolen from http://stackoverflow.com/questions/4240490/problems-with-dct-and-idct-algorithm-in-java
  139.  
  140.     private double[] c;
  141.     private void initCoefficients() {
  142.         c = new double[size];
  143.        
  144.         for (int i=1;i<size;i++) {
  145.             c[i]=1;
  146.         }
  147.         c[0]=1/Math.sqrt(2.0);
  148.     }
  149.    
  150.     private double[][] applyDCT(double[][] f) {
  151.         int N = size;
  152.        
  153.         double[][] F = new double[N][N];
  154.         for (int u=0;u<N;u++) {
  155.           for (int v=0;v<N;v++) {
  156.             double sum = 0.0;
  157.             for (int i=0;i<N;i++) {
  158.               for (int j=0;j<N;j++) {
  159.                 sum+=Math.cos(((2*i+1)/(2.0*N))*u*Math.PI)*Math.cos(((2*j+1)/(2.0*N))*v*Math.PI)*(f[i][j]);
  160.               }
  161.             }
  162.             sum*=((c[u]*c[v])/4.0);
  163.             F[u][v] = sum;
  164.           }
  165.         }
  166.         return F;
  167.     }
  168.  
  169. }
RAW Paste Data

Adblocker detected! Please consider disabling it...

We've detected AdBlock Plus or some other adblocking software preventing Pastebin.com from fully loading.

We don't have any obnoxious sound, or popup ads, we actively block these annoying types of ads!

Please add Pastebin.com to your ad blocker whitelist or disable your adblocking software.

×