Advertisement
Guest User

Untitled

a guest
Jan 16th, 2017
77
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.05 KB | None | 0 0
  1. `train_ID =
  2. 14 13 46 35 11 7 43 27 10 45 47 32 36 17 19 44 48 15 21 28 34 16 49 31 42 25 29 9 8 20 18 33 6 38 2 22 41 37 4 23'
  3.  
  4. exp. # of good pairs inside hamming ball of radius <= (n-1)
  5. % precision(n) = --------------------------------------------------------------
  6. % exp. # of total pairs inside hamming ball of radius <= (n-1)
  7. %
  8. % exp. # of good pairs inside hamming ball of radius <= (n-1)
  9. % recall(n) = --------------------------------------------------------------
  10. % exp. # of total good pairs
  11.  
  12. % split up into training and test set
  13. clear all
  14. db_data = rand(50,10);
  15. [ndata, D] = size(db_data);
  16. num_test = 10;
  17. R = randperm(ndata);
  18. test_data = db_data(R(1:num_test), :);
  19. test_ID = R(1:num_test);
  20. R(1: num_test) = [];
  21. train_data = db_data(R, :);
  22. train_ID = R;
  23. num_training = size(train_data, 1);
  24. averageNumberNeighbors = 5;
  25. % define ground-truth neighbors (this is only used for the evaluation):
  26. R = randperm(num_training);
  27. DtrueTraining = distMat(train_data(R(1:10), :), train_data); % sample 10 points to find a threshold
  28. Dball = sort(DtrueTraining, 2); %DtrueTraining sort by row
  29. clear DtrueTraining;
  30. Dball = mean(Dball(:, averageNumberNeighbors));
  31. % scale data so that the target distance is 1
  32. train_data = train_data / Dball;
  33. test_data = test_data / Dball;
  34. Dball = 1;
  35.  
  36. % threshold to define ground truth
  37. DtrueTestTraining = distMat(test_data, train_data);
  38. WtrueTestTraining = DtrueTestTraining < Dball;
  39. clear DtrueTestTraining;
  40.  
  41. % generate training ans test split and the data matrix
  42. XX = [train_data; test_data];
  43.  
  44. B_trn = train_data > 0;
  45. B_tst = test_data > 0;
  46. Dhamm = hammingDist(B_tst, B_trn);
  47. pos = [1,10,20,30,40];
  48. grid = pos;
  49. for i=1:size(Dhamm,1)
  50. [a,b] = sort(Dhamm(i,:),'ascend');
  51. WtrueTestTraining(i,:) = WtrueTestTraining(i,b);
  52. end
  53. total_good_pairs = sum(WtrueTestTraining(:));
  54.  
  55. for i=1:length(grid)
  56. g = grid(i);
  57. retrieved_good_pairs = sum(sum(WtrueTestTraining(:,1:g)));
  58. [row, col] = size(WtrueTestTraining(:,1:g));
  59. total_pairs = row*col;
  60. recall(i) = retrieved_good_pairs/total_good_pairs;
  61. presicion(i) = retrieved_good_pairs/total_pairs;
  62. end
  63.  
  64.  
  65. function Dh=hammingDist(B1, B2)
  66. %
  67. % Compute hamming distance between two sets of samples (B1, B2)
  68. %
  69. % Dh=hammingDist(B1, B2);
  70. %
  71. % Input
  72. % B1, B2: compact bit vectors. Each datapoint is one row.
  73. % size(B1) = [ndatapoints1, nwords]
  74. % size(B2) = [ndatapoints2, nwords]
  75. % It is faster if ndatapoints1 < ndatapoints2
  76. %
  77. % Output
  78. % Dh = hamming distance.
  79. % size(Dh) = [ndatapoints1, ndatapoints2]
  80.  
  81. % example query
  82. % Dhamm = hammingDist(B2, B1);
  83. % this will give the same result than:
  84. % Dhamm = distMat(U2>0, U1>0).^2;
  85. % the size of the distance matrix is:
  86. % size(Dhamm) = [Ntest x Ntraining]
  87.  
  88. % loop-up table:
  89. bit_in_char = uint16([...
  90. 0 1 1 2 1 2 2 3 1 2 2 3 2 3 3 4 1 2 2 3 2 3 ...
  91. 3 4 2 3 3 4 3 4 4 5 1 2 2 3 2 3 3 4 2 3 3 4 ...
  92. 3 4 4 5 2 3 3 4 3 4 4 5 3 4 4 5 4 5 5 6 1 2 ...
  93. 2 3 2 3 3 4 2 3 3 4 3 4 4 5 2 3 3 4 3 4 4 5 ...
  94. 3 4 4 5 4 5 5 6 2 3 3 4 3 4 4 5 3 4 4 5 4 5 ...
  95. 5 6 3 4 4 5 4 5 5 6 4 5 5 6 5 6 6 7 1 2 2 3 ...
  96. 2 3 3 4 2 3 3 4 3 4 4 5 2 3 3 4 3 4 4 5 3 4 ...
  97. 4 5 4 5 5 6 2 3 3 4 3 4 4 5 3 4 4 5 4 5 5 6 ...
  98. 3 4 4 5 4 5 5 6 4 5 5 6 5 6 6 7 2 3 3 4 3 4 ...
  99. 4 5 3 4 4 5 4 5 5 6 3 4 4 5 4 5 5 6 4 5 5 6 ...
  100. 5 6 6 7 3 4 4 5 4 5 5 6 4 5 5 6 5 6 6 7 4 5 ...
  101. 5 6 5 6 6 7 5 6 6 7 6 7 7 8]);
  102.  
  103. n1 = size(B1,1);
  104. [n2, nwords] = size(B2);
  105.  
  106. Dh = zeros([n1 n2], 'uint16');
  107. for j = 1:n1
  108. for n=1:nwords
  109. y = bitxor(B1(j,n),B2(:,n));
  110. Dh(j,:) = Dh(j,:) + bit_in_char(y+1);
  111. end
  112. end
  113.  
  114. function D=distMat(P1, P2)
  115. %
  116. % Euclidian distances between vectors
  117. % each vector is one row
  118.  
  119. if nargin == 2
  120. P1 = double(P1);
  121. P2 = double(P2);
  122.  
  123. X1=repmat(sum(P1.^2,2),[1 size(P2,1)]);
  124. X2=repmat(sum(P2.^2,2),[1 size(P1,1)]);
  125. R=P1*P2';
  126. D=real(sqrt(X1+X2'-2*R));
  127. else
  128. P1 = double(P1);
  129.  
  130. % each vector is one row
  131. X1=repmat(sum(P1.^2,2),[1 size(P1,1)]);
  132. R=P1*P1';
  133. D=X1+X1'-2*R;
  134. D = real(sqrt(D));
  135. end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement