Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- % for thisFold = 1:5
- % cd('C:\Users\ryan fitchett\Desktop\IR cw');
- % %define directory
- % thisDir = ['fold' num2str(thisFold)];
- % cmd = ['cd ' thisDir];
- % eval(cmd);
- %
- % %load training for given fold
- % temp = dir('Counts-Folds*.mat');
- % countsFile = temp.name;
- % load(countsFile);
- %
- % % get names of true and deceptive files.
- % trueFiles = dir('t_*.oneline');
- % decepFiles = dir('d_*.oneline');
- %
- % truthTable = table(trainTerms', 'VariableNames', {'Terms'});
- % falseTable = table(trainTerms', 'VariableNames', {'Terms'});
- %
- % for f = 1:size(trueFiles, 1)
- % % get the filename
- % thisFile = trueFiles(f).name;
- %
- % % open the file
- % fileName = fopen(thisFile);
- %
- % % get the first line(one term per line)
- % line = fgetl(fileName);
- %
- % % add first term to vector
- % trueTerms = {};
- % trueTerms{end+1} = line;
- %
- % % while there are more lines(terms) in the file
- % while ischar(line)
- % % get the next line(term)
- % line = fgetl(fileName);
- %
- % % if the line isn't blank and the end hasn't been reached
- % % the term to the term vector.
- % if(ischar(line) & (line ~= ' '))
- % trueTerms{end+1} = line;
- % end
- % end
- %
- % [trueTerms, ~, a] = unique(trueTerms);
- % occurrenceTrue = accumarray(a,1);
- % tempTable = table(trueTerms', occurrenceTrue, 'VariableNames', {'Terms', 'Count'});
- % truthTable = outerjoin(truthTable, tempTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- %
- % %close the file
- % fclose(fileName);
- % end
- %
- % for f = 1:size(decepFiles, 1)
- % % get the filename
- % thisFile = decepFiles(f).name;
- %
- % % open the file
- % fileName = fopen(thisFile);
- %
- % % get the first line(one term per line)
- % line = fgetl(fileName);
- %
- % % add first term to vector
- % falseTerms = {};
- % falseTerms{end+1} = line;
- %
- % % while there are more lines(terms) in the file
- % while ischar(line)
- % % get the next line(term)
- % line = fgetl(fileName);
- %
- % % if the line isn't blank and the end hasn't been reached
- % % the term to the term vector.
- % if(ischar(line) & (line ~= ' '))
- % falseTerms{end+1} = line;
- % end
- % end
- %
- % [falseTerms, ~, a] = unique(falseTerms);
- % occurrenceFalse = accumarray(a,1);
- % tempTable = table(falseTerms', occurrenceFalse, 'VariableNames', {'Terms', 'Count'});
- % falseTable = outerjoin(falseTable, tempTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- %
- % %close the file
- % fclose(fileName);
- % end
- %
- % cd('C:\Users\ryan fitchett\Desktop\IR cw');
- % fileName = ['trueFold' num2str(thisFold) '.mat'];
- % save(fileName, 'truthTable');
- % fileName = ['decepFold' num2str(thisFold) '.mat'];
- % save(fileName, 'falseTable');
- % end
- % clear
- count = 0;
- for thisFold = 1:5
- %define directory
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- thisDir = ['fold' num2str(thisFold)];
- cmd = ['cd ' thisDir];
- eval(cmd);
- %load training for given fold
- temp = dir('Counts-Folds*.mat');
- countsFile = temp.name;
- load(countsFile);
- truthTrainTable = table(trainTerms', 'VariableNames', {'Terms'});
- falseTrainTable = table(trainTerms', 'VariableNames', {'Terms'});
- truthTestTable = table(trainTerms', 'VariableNames', {'Terms'});
- falseTestTable = table(trainTerms', 'VariableNames', {'Terms'});
- switch thisFold
- case 1
- %Load in truthful information
- %load in test data
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('trueFold1.mat');
- truthTestTable = truthTable;
- %load in training data
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('trueFold2.mat');
- truthTrainTable = outerjoin(truthTrainTable, truthTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('trueFold3.mat');
- truthTrainTable = outerjoin(truthTrainTable, truthTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('trueFold4.mat');
- truthTrainTable = outerjoin(truthTrainTable, truthTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('trueFold5.mat');
- truthTrainTable = outerjoin(truthTrainTable, truthTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- %Load in decepful information
- %load in test data
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('decepFold1.mat');
- falseTestTable = falseTable;
- %load in training data
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('decepFold2.mat');
- falseTrainTable = outerjoin(falseTrainTable, falseTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('decepFold3.mat');
- falseTrainTable = outerjoin(falseTrainTable, falseTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('decepFold4.mat');
- falseTrainTable = outerjoin(falseTrainTable, falseTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('decepFold5.mat');
- falseTrainTable = outerjoin(falseTrainTable, falseTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- case 2
- %load in test data
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('trueFold2.mat');
- truthTestTable = truthTable;
- %load in training data
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('trueFold1.mat');
- truthTrainTable = outerjoin(truthTrainTable, truthTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('trueFold3.mat');
- truthTrainTable = outerjoin(truthTrainTable, truthTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('trueFold4.mat');
- truthTrainTable = outerjoin(truthTrainTable, truthTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('trueFold5.mat');
- truthTrainTable = outerjoin(truthTrainTable, truthTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- %Load in decepful information
- %load in test data
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('decepFold2.mat');
- falseTestTable = falseTable;
- %load in training data
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('decepFold1.mat');
- falseTrainTable = outerjoin(falseTrainTable, falseTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('decepFold3.mat');
- falseTrainTable = outerjoin(falseTrainTable, falseTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('decepFold4.mat');
- falseTrainTable = outerjoin(falseTrainTable, falseTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('decepFold5.mat');
- falseTrainTable = outerjoin(falseTrainTable, falseTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- case 3
- %load in test data
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('trueFold3.mat');
- truthTestTable = truthTable;
- %load in training data
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('trueFold1.mat');
- truthTrainTable = outerjoin(truthTrainTable, truthTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('trueFold2.mat');
- truthTrainTable = outerjoin(truthTrainTable, truthTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('trueFold4.mat');
- truthTrainTable = outerjoin(truthTrainTable, truthTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('trueFold5.mat');
- truthTrainTable = outerjoin(truthTrainTable, truthTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- %Load in decepful information
- %load in test data
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('decepFold3.mat');
- falseTestTable = falseTable;
- %load in training data
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('decepFold1.mat');
- falseTrainTable = outerjoin(falseTrainTable, falseTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('decepFold2.mat');
- falseTrainTable = outerjoin(falseTrainTable, falseTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('decepFold4.mat');
- falseTrainTable = outerjoin(falseTrainTable, falseTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('decepFold5.mat');
- falseTrainTable = outerjoin(falseTrainTable, falseTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- case 4
- %load in test data
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('trueFold4.mat');
- truthTestTable = truthTable;
- %load in training data
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('trueFold1.mat');
- truthTrainTable = outerjoin(truthTrainTable, truthTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('trueFold2.mat');
- truthTrainTable = outerjoin(truthTrainTable, truthTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('trueFold3.mat');
- truthTrainTable = outerjoin(truthTrainTable, truthTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('trueFold5.mat');
- truthTrainTable = outerjoin(truthTrainTable, truthTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- %Load in decepful information
- %load in test data
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('decepFold4.mat');
- falseTestTable = falseTable;
- %load in training data
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('decepFold1.mat');
- falseTrainTable = outerjoin(falseTrainTable, falseTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('decepFold2.mat');
- falseTrainTable = outerjoin(falseTrainTable, falseTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('decepFold3.mat');
- falseTrainTable = outerjoin(falseTrainTable, falseTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('decepFold5.mat');
- falseTrainTable = outerjoin(falseTrainTable, falseTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- case 5
- %load in test data
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('trueFold5.mat');
- truthTestTable = truthTable;
- %load in training data
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('trueFold1.mat');
- truthTrainTable = outerjoin(truthTrainTable, truthTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('trueFold2.mat');
- truthTrainTable = outerjoin(truthTrainTable, truthTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('trueFold3.mat');
- truthTrainTable = outerjoin(truthTrainTable, truthTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('trueFold4.mat');
- truthTrainTable = outerjoin(truthTrainTable, truthTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- %Load in decepful information
- %load in test data
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('decepFold5.mat');
- falseTestTable = falseTable;
- %load in training data
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('decepFold1.mat');
- falseTrainTable = outerjoin(falseTrainTable, falseTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('decepFold2.mat');
- falseTrainTable = outerjoin(falseTrainTable, falseTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('decepFold3.mat');
- falseTrainTable = outerjoin(falseTrainTable, falseTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- load('decepFold4.mat');
- falseTrainTable = outerjoin(falseTrainTable, falseTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- end
- %find all the terms accross all 5 folds.
- %testTerms are the terms from the current fold
- %trainTerms are the terms from the other 4 folds.
- allTermsTable = unique(vertcat(truthTrainTable(:,1), falseTrainTable(:,1), truthTestTable(:,1), falseTestTable(:,1)));
- %TrueTraining
- truthTrainSet = outerjoin(truthTrainTable, allTermsTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- truthTrainSet(:,1) = [];
- truthTrainSet = cell2mat(table2cell(truthTrainSet));
- truthTrainSet(isnan(truthTrainSet)) = 0;
- idx = truthTrainSet ~= 0;
- trueCounts = sum(idx,2);
- %FalseTraining
- falseTrainSet = outerjoin(falseTrainTable, allTermsTable, 'Keys', {'Terms'}, 'MergeKeys', true);
- falseTrainSet(:,1) = [];
- falseTrainSet = cell2mat(table2cell(falseTrainSet));
- falseTrainSet(isnan(falseTrainSet)) = 0;
- idx = falseTrainSet ~= 0;
- falseCounts = sum(idx,2);
- allTrainSet = table();
- allTrainSet(:,end+1) = allTermsTable(:,1);
- allTrainSet(:,end+1) = num2cell(trueCounts(:,1));
- allTrainSet(:,end+1) = num2cell(falseCounts(:,1));
- noTrueDocs = size(truthTrainSet,2) - 1;
- noFalseDocs = size(falseTrainSet,2) - 1;
- %classifications in here
- %define directory
- cd('C:\Users\ryan fitchett\Desktop\IR cw');
- thisDir = ['fold' num2str(thisFold)];
- cmd = ['cd ' thisDir];
- eval(cmd);
- % get names of true and deceptive files.
- trueFiles = dir('t_*.oneline');
- decepFiles = dir('d_*.oneline');
- %read contents of true files.
- for f = 1:size(trueFiles, 1)
- Qtrue = [];
- Qfalse = [];
- thisFile = trueFiles(f).name;
- %open the file to be read
- fileName = fopen(thisFile);
- %get the first line (one term per line)
- line = fgetl(fileName);
- %get the value representing true in the training set for the given
- %word of the document.
- temp2 = cell2table({line});
- temp2.Properties.VariableNames{1} = 'Terms';
- temp = find(ismember(allTermsTable, temp2, 'rows'),1);
- temp2 = cell2mat(table2cell(allTrainSet(temp,2)));
- pt = (temp2 + 1) / (noTrueDocs + 2);
- if(temp2 == 0)
- Qtrue{end+1,1} = (1-pt);
- else
- Qtrue{end+1,1} = pt;
- end
- temp2 = cell2mat(table2cell(allTrainSet(temp,3)));
- pf = (temp2 + 1) / (noFalseDocs + 2);
- if(temp2 == 0)
- Qfalse{end+1, 1} = (1-pf);
- else
- Qfalse{end+1,1} = pf;
- end
- %while there are more lines(terms) in the file
- while ischar(line)
- %get the next line(term)
- line = fgetl(fileName);
- %if the line isn't blank and the end hasn't been reached
- if(ischar(line) & (line ~= ' '))
- temp2 = cell2table({line});
- temp2.Properties.VariableNames{1} = 'Terms';
- temp = find(ismember(allTermsTable, temp2, 'rows'),1);
- temp2 = cell2mat(table2cell(allTrainSet(temp,2)));
- pt = (temp2 + 1) / (noTrueDocs + 2);
- if(temp2 == 0)
- Qtrue{end+1,1} = (1-pt);
- else
- Qtrue{end+1,1} = pt;
- end
- temp2 = cell2mat(table2cell(allTrainSet(temp,3)));
- pf = (temp2 + 1) / (noFalseDocs + 2);
- if(temp2 == 0)
- Qfalse{end+1, 1} = (1-pf);
- else
- Qfalse{end+1,1} = pf;
- end
- end
- end
- %close the file
- fclose(fileName);
- pTrue = prod(cell2mat(Qtrue));
- pFalse = prod(cell2mat(Qfalse));
- if(pTrue >= pFalse)
- count = count + 1;
- end
- end
- %read contents of false files.
- for f = 1:size(decepFiles, 1)
- Qtrue = [];
- Qfalse = [];
- thisFile = decepFiles(f).name;
- %open the file to be read
- fileName = fopen(thisFile);
- %get the first line (one term per line)
- line = fgetl(fileName);
- %get the value representing true in the training set for the given
- %word of the document.
- temp2 = cell2table({line});
- temp2.Properties.VariableNames{1} = 'Terms';
- temp = find(ismember(allTermsTable, temp2, 'rows'),1);
- temp2 = cell2mat(table2cell(allTrainSet(temp,2)));
- pt = (temp2 + 1) / (noTrueDocs + 2);
- Qtrue{end + 1,1} = pt;
- temp2 = cell2mat(table2cell(allTrainSet(temp,3)));
- pf = (temp2 + 1) / (noFalseDocs + 2);
- Qfalse{end+1, 1} = pf;
- %while there are more lines(terms) in the file
- while ischar(line)
- %get the next line(term)
- line = fgetl(fileName);
- %if the line isn't blank and the end hasn't been reached
- if(ischar(line) & (line ~= ' '))
- temp2 = cell2table({line});
- temp2.Properties.VariableNames{1} = 'Terms';
- temp = find(ismember(allTermsTable, temp2, 'rows'),1);
- temp2 = cell2mat(table2cell(allTrainSet(temp,2)));
- pt = (temp2 + 1) / (noTrueDocs + 2);
- Qtrue{end + 1, 1} = pt;
- temp2 = cell2mat(table2cell(allTrainSet(temp,3)));
- pf = (temp2 + 1) / (noFalseDocs + 2);
- Qfalse{end+1, 1} = pf;
- end
- end
- %close the file
- fclose(fileName);
- pTrue = prod(cell2mat(Qtrue));
- pFalse = prod(cell2mat(Qfalse));
- if(pFalse >= pTrue)
- count = count + 1;
- end
- end
- %clear to stop corruption across folds.
- clearvars -except count
- end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement