Advertisement
Guest User

Untitled

a guest
Oct 20th, 2019
67
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.78 KB | None | 0 0
  1. package org.hsbo.bigdata.operator;
  2.  
  3. import java.util.LinkedList;
  4. import java.util.List;
  5. import java.util.ArrayList;
  6. import org.hsbo.bigdata.model.KMeansModel;
  7.  
  8. import com.rapidminer.example.Attribute;
  9. import com.rapidminer.example.Attributes;
  10. import com.rapidminer.example.Example;
  11. import com.rapidminer.example.ExampleSet;
  12. import com.rapidminer.operator.Operator;
  13. import com.rapidminer.operator.OperatorDescription;
  14. import com.rapidminer.operator.OperatorException;
  15. import com.rapidminer.operator.ports.InputPort;
  16. import com.rapidminer.operator.ports.OutputPort;
  17. import com.rapidminer.operator.ports.metadata.AttributeAddingExampleSetPassThroughRule;
  18. import com.rapidminer.operator.ports.metadata.AttributeMetaData;
  19. import com.rapidminer.operator.ports.metadata.ExampleSetMetaData;
  20. import com.rapidminer.operator.ports.metadata.SimplePrecondition;
  21. import com.rapidminer.parameter.ParameterType;
  22. import com.rapidminer.parameter.ParameterTypeInt;
  23. import com.rapidminer.tools.Ontology;
  24. import org.hsbo.bigdata.operator.Datenpunkt;
  25.  
  26. public class KMeansOperator extends Operator {
  27.  
  28. private static final String PARAMETER_K = "k";
  29.  
  30. private InputPort dataIn = getInputPorts().createPort("data");
  31. private OutputPort dataOut = getOutputPorts().createPort("data");
  32. private OutputPort modelOut = getOutputPorts().createPort("model");
  33.  
  34. // DO NOT STORE ANYTHING IN AN OPERATOR, EXCEPT PORTS! EVERY VARIABLE FOR THE
  35. // ALGORITHM NEEDS TO BE LOCAL! Otherwise it will be persistent from one
  36. // execution to the other.
  37.  
  38. public KMeansOperator(OperatorDescription description) {
  39. super(description);
  40. System.out.println(description.toString());
  41. /**
  42. * The following is only needed for UX First we add a precondition to the input
  43. * port, so that the user knows what kind of object is expected to be connected.
  44. */
  45. dataIn.addPrecondition(new SimplePrecondition(dataIn, new ExampleSetMetaData()));
  46. System.out.println(dataIn.toString());
  47. /**
  48. * Here we add transformer, that are simulating this operators behavior, so that
  49. * the structure of the output is known. First we just tell it, that there is
  50. * coming out a model, the second rule specifies that the original data set is
  51. * returned on the dataOut port with a new Attirbute named Cluster added.
  52. */
  53. getTransformer().addGenerationRule(modelOut, KMeansModel.class);
  54. getTransformer().addRule(new AttributeAddingExampleSetPassThroughRule(dataIn, dataOut,
  55. new AttributeMetaData("cluster", Ontology.POLYNOMINAL)));
  56. }
  57.  
  58. /**
  59. * Euklidische Distanz
  60. */
  61. private double berechneDistanzEuk(Datenpunkt dp0, Datenpunkt dp1)
  62. {
  63. //Vier Dimensionen a1, ..., a4
  64. double dsumme = 0.0;
  65. for(int i=0;i< dp0.getKoordinaten().size(); i++)
  66. {
  67. dsumme = dsumme + Math.pow((dp0.getKoordinaten().get(i)-dp1.getKoordinaten().get(i)),2.0);
  68. }
  69. return Math.sqrt(dsumme);
  70. }
  71.  
  72.  
  73.  
  74. /**
  75. * Within this method you have to implement the operators actual actions. You
  76. * need to get the parameters to know how to do kMeans, you need to get the data
  77. * from the port. Once you have all the inputs, you can implement the algorithm.
  78. * Finally you need to wrap the results into a model object.
  79. *
  80. * There are already a few lines of code to show you around within the framework
  81. * and give you the tools you need for implementing kMeans.
  82. */
  83. @Override
  84. public void doWork() throws OperatorException {
  85. // getting the parameters
  86. int k = getParameterAsInt(PARAMETER_K);
  87.  
  88. // getting the input data
  89. ExampleSet data = dataIn.getData(ExampleSet.class);
  90. //System.out.println(data.toString());
  91. /**
  92. * Here you need to implement the initialization and the optimization as
  93. * discussed during the lecture. This block contains some examples of useful
  94. * methods, that you will need
  95. */
  96.  
  97. Attributes attributes = data.getAttributes();
  98. // this way you can loop over examples
  99. int h = 0;
  100. ArrayList<Datenpunkt> dp = new ArrayList<Datenpunkt>();
  101. for (Example example : data) {
  102.  
  103. dp.add(new Datenpunkt()); // Index H
  104.  
  105. // here we loop over all (regular) attributes (=colums of the data set). Regular
  106. // attributes are the ones marked to be used by algorithms like this
  107.  
  108. for (Attribute attribute : data.getAttributes())
  109. {
  110. // we can access properties of the attribute with the object like this:
  111. if (attribute.isNumerical()) {
  112. // in this case we migh use it for euclidean distance
  113. double value = example.getNumericalValue(attribute);
  114. dp.get(h).getKoordinaten().add(value);
  115. //berechneDistanzEuk(value);
  116. } else if (attribute.isNominal()) {
  117.  
  118. // perhaps usefull for another exercise?
  119. }
  120. String attributeName = attribute.getName();
  121. //System.out.println(attributeName);
  122.  
  123. // here's how we can get numerical and nominal values:
  124.  
  125. //System.out.println(h);
  126. double value = example.getNumericalValue(attribute);
  127. //System.out.println("NumVal: " + value);
  128. String valueString = example.getValueAsString(attribute);
  129. //System.out.println("StrVal: " + valueString);
  130.  
  131. }
  132. if(h >= 1)
  133. {
  134. System.out.println("Distanz h-1 zw. h: " + berechneDistanzEuk(dp.get(h-1),dp.get(h)));
  135. }
  136.  
  137. System.out.println(h);
  138. System.out.println(dp.get(h).getKoordinaten().toString());
  139. /**
  140. * You must not use examples for storing data. It's purely a view onto the data,
  141. * you shouldn't change them as you would change the underlying data. Here is
  142. * how you could extrac the data into a simple double[].
  143. */
  144.  
  145. double[] rawNumericalData = new double[attributes.size()];
  146. int j = 0;
  147. for (Attribute attribute : attributes) {
  148. rawNumericalData[j++] = example.getValue(attribute);
  149. }
  150. h = h+1;
  151.  
  152. }
  153. // alternatively you can loop using the size of the example set and random
  154. // access on the examples like that:
  155. for (int i = 0; i < data.size(); i++) {
  156. Example example = data.getExample(i);
  157. }
  158.  
  159. /**
  160. * After we finished the optimiization, create the model with the results. You
  161. * will need to implement KMeansModel as well.
  162. */
  163. KMeansModel model = new KMeansModel(data, k);
  164.  
  165. /**
  166. * We are delivering the output below
  167. */
  168. if (dataOut.isConnected()) {
  169. // we only copy the data and apply the model if it is connected. otherwise the
  170. // effort is wasted.
  171. ExampleSet outputData = model.apply((ExampleSet) data.copy());
  172. dataOut.deliver(outputData);
  173. }
  174. modelOut.deliver(model);
  175. }
  176.  
  177. /**
  178. * This is the method you need to add parameters to. Do we need additional ones?
  179. * This is the same for all operators.
  180. */
  181. @Override
  182. public List<ParameterType> getParameterTypes() {
  183. List<ParameterType> types = new LinkedList<>();
  184. types.add(new ParameterTypeInt(PARAMETER_K, "The number clusters", 2, Integer.MAX_VALUE, 2));
  185. return types;
  186. }
  187.  
  188. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement