Guest User

Untitled

a guest
Jan 4th, 2018
60
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.85 KB | None | 0 0
  1. import Jama.EigenVectorValue;
  2. import Jama.EigenvalueDecomposition;
  3. import Jama.Matrix;
  4.  
  5. import java.io.*;
  6. import java.util.*;
  7. import java.util.stream.Collectors;
  8.  
  9.  
  10. public class EcoliTest {
  11.  
  12. private List<List<Double>> normalizedAttributes;
  13. private List<Double> averages;
  14. private double[][] finalData;
  15. private List<ReducedEntry> extractedAttributes;
  16. private Set<ReducedEntry> trainSet;
  17. private Set<ReducedEntry> testSet;
  18.  
  19. // fix this to return list of ReducedEntry
  20. private void getReducedEntries() {
  21. List<Entry> entries = parseData("data/ecoli_data.txt");
  22.  
  23. prepareData(entries);
  24.  
  25. double[][] covarianceMatrix = getCovarianceMatrix(normalizedAttributes, averages);
  26.  
  27. this.finalData = getFinalData(covarianceMatrix);
  28.  
  29. writeFinalDataToFile();
  30.  
  31. visualizeData();
  32.  
  33. extractAttributes();
  34.  
  35. divideSet();
  36. }
  37.  
  38.  
  39. private double[][] getFinalData(double[][] covarianceMatrix) {
  40. EigenvalueDecomposition eigenvalueDecomposition = new EigenvalueDecomposition(new Matrix(covarianceMatrix));
  41. List<EigenVectorValue> eigenVectorValues = new ArrayList<>();
  42.  
  43. double[][] eigenVectors = eigenvalueDecomposition.getV().getArray();
  44. double[] eigenValues = eigenvalueDecomposition.getRealEigenvalues();
  45.  
  46. for (int j = 0; j<eigenVectors.length; j++){
  47. eigenVectorValues.add(new EigenVectorValue(eigenVectors, j, eigenValues[j]));
  48. }
  49.  
  50. Collections.sort(eigenVectorValues, Comparator.comparing(EigenVectorValue::getEigenValue).reversed());
  51. eigenVectorValues = eigenVectorValues.stream().limit(3).collect(Collectors.toList());
  52.  
  53. int m = 3;
  54. int n = eigenValues.length;
  55. double[][] featureVector = new double[m][n]; // 8 x 3
  56.  
  57. for (int i = 0; i<m; i++){
  58. for (int j = 0; j<n; j++){
  59. featureVector[i][j] = eigenVectorValues.get(i).getEigenVector()[j];
  60. }
  61. }
  62.  
  63. Matrix featureMatrix = new Matrix(featureVector).transpose();
  64. Matrix dataAdjust = getDataAdjust().transpose();
  65. Matrix finalData = dataAdjust.times(featureMatrix);
  66.  
  67. return finalData.getArray();
  68. }
  69.  
  70. private Matrix getDataAdjust() {
  71. int m = normalizedAttributes.size();
  72. int n = normalizedAttributes.get(0).size();
  73. double[][] temp = new double[m][n];
  74.  
  75. for (int i = 0; i<m; i++){
  76. for (int j = 0; j<n; j++){
  77. temp[i][j] = normalizedAttributes.get(i).get(j);
  78. }
  79. }
  80.  
  81. return new Matrix(temp);
  82. }
  83.  
  84. private void printMatrix(double[][] matrix, int m, int n) {
  85. System.out.println();
  86. for (int i = 0; i<m; i++){
  87. for (int j = 0; j<n; j++){
  88. System.out.printf("%.2f\t\t\t\t", matrix[i][j]);
  89. }
  90. System.out.println();
  91. }
  92. }
  93.  
  94. private List<Entry> parseData(String fileName){
  95. try(BufferedReader bufferedReader =
  96. new BufferedReader(new InputStreamReader(new FileInputStream(fileName))) ) {
  97.  
  98. return bufferedReader.lines()
  99. .map(line -> {
  100. String[] parts = line.split("\\s+");
  101. return new Entry(parts[0],
  102. Double.parseDouble(parts[1]),
  103. Double.parseDouble(parts[2]),
  104. Double.parseDouble(parts[3]),
  105. Double.parseDouble(parts[4]),
  106. Double.parseDouble(parts[5]),
  107. Double.parseDouble(parts[6]),
  108. Double.parseDouble(parts[7]),
  109. parts[8]);})
  110. .collect(Collectors.toList());
  111.  
  112. } catch (IOException e) {
  113. e.printStackTrace();
  114. return new ArrayList<>();
  115. }
  116. }
  117.  
  118. private void prepareData(List<Entry> entries) {
  119. List<List<Double>> attributes = new ArrayList<>();
  120. List<Double> averagesPerVector = new ArrayList<>();
  121. double temporaryAverage;
  122.  
  123. List<Double> mcgList = entries.stream()
  124. .map(Entry::getMcg)
  125. .collect(Collectors.toList());
  126.  
  127. temporaryAverage = getAverage(mcgList);
  128. averagesPerVector.add(temporaryAverage);
  129. attributes.add(normalizeVector(mcgList, temporaryAverage));
  130.  
  131.  
  132. List<Double> gvhList = entries.stream()
  133. .map(Entry::getGvh)
  134. .collect(Collectors.toList());
  135.  
  136. temporaryAverage = getAverage(gvhList);
  137. averagesPerVector.add(temporaryAverage);
  138. attributes.add(normalizeVector(gvhList, temporaryAverage));
  139.  
  140.  
  141. List<Double> lipList = entries.stream()
  142. .map(Entry::getLip)
  143. .collect(Collectors.toList());
  144.  
  145. temporaryAverage = getAverage(lipList);
  146. averagesPerVector.add(temporaryAverage);
  147. attributes.add(normalizeVector(lipList, temporaryAverage));
  148.  
  149.  
  150. List<Double> chgList = entries.stream()
  151. .map(Entry::getChg)
  152. .collect(Collectors.toList());
  153.  
  154. temporaryAverage = getAverage(chgList);
  155. averagesPerVector.add(temporaryAverage);
  156. attributes.add(normalizeVector(chgList, temporaryAverage));
  157.  
  158.  
  159. List<Double> aacList = entries.stream()
  160. .map(Entry::getAac)
  161. .collect(Collectors.toList());
  162.  
  163. temporaryAverage = getAverage(aacList);
  164. averagesPerVector.add(temporaryAverage);
  165. attributes.add(normalizeVector(aacList, temporaryAverage));
  166.  
  167.  
  168. List<Double> a1m1List = entries.stream()
  169. .map(Entry::getA1m1)
  170. .collect(Collectors.toList());
  171.  
  172. temporaryAverage = getAverage(a1m1List);
  173. averagesPerVector.add(temporaryAverage);
  174. attributes.add(normalizeVector(a1m1List, temporaryAverage));
  175.  
  176.  
  177. List<Double> a1m2List = entries.stream()
  178. .map(Entry::getA1m2)
  179. .collect(Collectors.toList());
  180.  
  181. temporaryAverage = getAverage(a1m2List);
  182. averagesPerVector.add(temporaryAverage);
  183. attributes.add(normalizeVector(a1m2List, temporaryAverage));
  184.  
  185. this.normalizedAttributes = attributes;
  186. this.averages = averagesPerVector;
  187. }
  188.  
  189. private List<Double> normalizeVector(List<Double> vector, double average) {
  190. return vector
  191. .stream()
  192. .map(number -> number - average)
  193. .collect(Collectors.toList());
  194. }
  195.  
  196. private double getAverage(List<Double> vector){
  197. return vector
  198. .stream()
  199. .mapToDouble(number -> (double) number)
  200. .average()
  201. .orElse(0.0);
  202. }
  203.  
  204. private double[][] getCovarianceMatrix(List<List<Double>> attributes, List<Double> averages) {
  205. int size = attributes.size();
  206. double[][] covariance = new double[size][size];
  207.  
  208. for (int i = 0; i<size; i++){
  209. for (int j = i; j<size; j++){
  210. covariance[i][j] = calculateCovariance(attributes.get(i), attributes.get(j), averages.get(i), averages.get(j));
  211. }
  212. }
  213.  
  214. return covariance;
  215. }
  216.  
  217. private double calculateCovariance(List<Double> firstAttribute, List<Double> secondAttribute,
  218. Double firstAttributeAverage, Double secondAttributeAverage) {
  219.  
  220. int n = firstAttribute.size();
  221. double sum = 0;
  222.  
  223. for (int i = 0; i < n; i++){
  224. sum += (firstAttribute.get(i) - firstAttributeAverage) * (secondAttribute.get(i) - secondAttributeAverage);
  225. }
  226. sum /= (n-1);
  227.  
  228. return sum;
  229. }
  230.  
  231. private void writeFinalDataToFile() {
  232. int m = finalData.length;
  233. int n = finalData[0].length;
  234.  
  235. try(BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter("pyScripts/ecoli_data_transformed.txt"))){
  236. for (int i = 0; i<m; i++){
  237. for (int j = 0; j<n; j++){
  238. bufferedWriter.write(finalData[i][j] + "");
  239. if (j < n-1) {
  240. bufferedWriter.write("\t");
  241. }
  242. }
  243. if (i < m-1) {
  244. bufferedWriter.write("\n");
  245. }
  246. }
  247. } catch (IOException e) {
  248. e.printStackTrace();
  249. }
  250. }
  251.  
  252. private void visualizeData() {
  253. try {
  254. Runtime.getRuntime().exec("python pyScripts/visualize.py");
  255. } catch (IOException e) {
  256. e.printStackTrace();
  257. }
  258. }
  259.  
  260. private void extractAttributes() {
  261. extractedAttributes = new ArrayList<>();
  262. for (int i = 0; i<finalData.length; i++){
  263. extractedAttributes.add(new ReducedEntry(finalData[i][0], finalData[i][1], finalData[i][2]));
  264. }
  265. }
  266.  
  267. private void divideSet() {
  268. trainSet = new HashSet<>();
  269. testSet = new HashSet<>();
  270. Random random = new Random();
  271. int n = extractedAttributes.size();
  272.  
  273. int testSetSize = (int) (n * 0.3);
  274.  
  275. while (testSet.size() < testSetSize){
  276. testSet.add(extractedAttributes.get(random.nextInt(n)));
  277. }
  278.  
  279. System.out.println("TEST SET: " + testSet.size());
  280. testSet.forEach(System.out::println);
  281.  
  282. for (int i = 0; i<n; i++){
  283. ReducedEntry re = extractedAttributes.get(i);
  284. if (!testSet.contains(re)){
  285. trainSet.add(re);
  286. }
  287. }
  288.  
  289. System.out.println("TEST SET: " + trainSet.size());
  290. trainSet.forEach(System.out::println);
  291.  
  292. }
  293.  
  294.  
  295. public static void main(String[] args) {
  296. EcoliTest ecoliTest = new EcoliTest();
  297. ecoliTest.getReducedEntries();
  298. }
  299. }
Advertisement
Add Comment
Please, Sign In to add comment