Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- `train_ID =
- 14 13 46 35 11 7 43 27 10 45 47 32 36 17 19 44 48 15 21 28 34 16 49 31 42 25 29 9 8 20 18 33 6 38 2 22 41 37 4 23'
- exp. # of good pairs inside hamming ball of radius <= (n-1)
- % precision(n) = --------------------------------------------------------------
- % exp. # of total pairs inside hamming ball of radius <= (n-1)
- %
- % exp. # of good pairs inside hamming ball of radius <= (n-1)
- % recall(n) = --------------------------------------------------------------
- % exp. # of total good pairs
- % split up into training and test set
- clear all
- db_data = rand(50,10);
- [ndata, D] = size(db_data);
- num_test = 10;
- R = randperm(ndata);
- test_data = db_data(R(1:num_test), :);
- test_ID = R(1:num_test);
- R(1: num_test) = [];
- train_data = db_data(R, :);
- train_ID = R;
- num_training = size(train_data, 1);
- averageNumberNeighbors = 5;
- % define ground-truth neighbors (this is only used for the evaluation):
- R = randperm(num_training);
- DtrueTraining = distMat(train_data(R(1:10), :), train_data); % sample 10 points to find a threshold
- Dball = sort(DtrueTraining, 2); %DtrueTraining sort by row
- clear DtrueTraining;
- Dball = mean(Dball(:, averageNumberNeighbors));
- % scale data so that the target distance is 1
- train_data = train_data / Dball;
- test_data = test_data / Dball;
- Dball = 1;
- % threshold to define ground truth
- DtrueTestTraining = distMat(test_data, train_data);
- WtrueTestTraining = DtrueTestTraining < Dball;
- clear DtrueTestTraining;
- % generate training ans test split and the data matrix
- XX = [train_data; test_data];
- B_trn = train_data > 0;
- B_tst = test_data > 0;
- Dhamm = hammingDist(B_tst, B_trn);
- pos = [1,10,20,30,40];
- grid = pos;
- for i=1:size(Dhamm,1)
- [a,b] = sort(Dhamm(i,:),'ascend');
- WtrueTestTraining(i,:) = WtrueTestTraining(i,b);
- end
- total_good_pairs = sum(WtrueTestTraining(:));
- for i=1:length(grid)
- g = grid(i);
- retrieved_good_pairs = sum(sum(WtrueTestTraining(:,1:g)));
- [row, col] = size(WtrueTestTraining(:,1:g));
- total_pairs = row*col;
- recall(i) = retrieved_good_pairs/total_good_pairs;
- presicion(i) = retrieved_good_pairs/total_pairs;
- end
- function Dh=hammingDist(B1, B2)
- %
- % Compute hamming distance between two sets of samples (B1, B2)
- %
- % Dh=hammingDist(B1, B2);
- %
- % Input
- % B1, B2: compact bit vectors. Each datapoint is one row.
- % size(B1) = [ndatapoints1, nwords]
- % size(B2) = [ndatapoints2, nwords]
- % It is faster if ndatapoints1 < ndatapoints2
- %
- % Output
- % Dh = hamming distance.
- % size(Dh) = [ndatapoints1, ndatapoints2]
- % example query
- % Dhamm = hammingDist(B2, B1);
- % this will give the same result than:
- % Dhamm = distMat(U2>0, U1>0).^2;
- % the size of the distance matrix is:
- % size(Dhamm) = [Ntest x Ntraining]
- % loop-up table:
- bit_in_char = uint16([...
- 0 1 1 2 1 2 2 3 1 2 2 3 2 3 3 4 1 2 2 3 2 3 ...
- 3 4 2 3 3 4 3 4 4 5 1 2 2 3 2 3 3 4 2 3 3 4 ...
- 3 4 4 5 2 3 3 4 3 4 4 5 3 4 4 5 4 5 5 6 1 2 ...
- 2 3 2 3 3 4 2 3 3 4 3 4 4 5 2 3 3 4 3 4 4 5 ...
- 3 4 4 5 4 5 5 6 2 3 3 4 3 4 4 5 3 4 4 5 4 5 ...
- 5 6 3 4 4 5 4 5 5 6 4 5 5 6 5 6 6 7 1 2 2 3 ...
- 2 3 3 4 2 3 3 4 3 4 4 5 2 3 3 4 3 4 4 5 3 4 ...
- 4 5 4 5 5 6 2 3 3 4 3 4 4 5 3 4 4 5 4 5 5 6 ...
- 3 4 4 5 4 5 5 6 4 5 5 6 5 6 6 7 2 3 3 4 3 4 ...
- 4 5 3 4 4 5 4 5 5 6 3 4 4 5 4 5 5 6 4 5 5 6 ...
- 5 6 6 7 3 4 4 5 4 5 5 6 4 5 5 6 5 6 6 7 4 5 ...
- 5 6 5 6 6 7 5 6 6 7 6 7 7 8]);
- n1 = size(B1,1);
- [n2, nwords] = size(B2);
- Dh = zeros([n1 n2], 'uint16');
- for j = 1:n1
- for n=1:nwords
- y = bitxor(B1(j,n),B2(:,n));
- Dh(j,:) = Dh(j,:) + bit_in_char(y+1);
- end
- end
- function D=distMat(P1, P2)
- %
- % Euclidian distances between vectors
- % each vector is one row
- if nargin == 2
- P1 = double(P1);
- P2 = double(P2);
- X1=repmat(sum(P1.^2,2),[1 size(P2,1)]);
- X2=repmat(sum(P2.^2,2),[1 size(P1,1)]);
- R=P1*P2';
- D=real(sqrt(X1+X2'-2*R));
- else
- P1 = double(P1);
- % each vector is one row
- X1=repmat(sum(P1.^2,2),[1 size(P1,1)]);
- R=P1*P1';
- D=X1+X1'-2*R;
- D = real(sqrt(D));
- end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement