Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package org.hsbo.bigdata.operator;
- import java.util.LinkedList;
- import java.util.List;
- import java.util.ArrayList;
- import org.hsbo.bigdata.model.KMeansModel;
- import com.rapidminer.example.Attribute;
- import com.rapidminer.example.Attributes;
- import com.rapidminer.example.Example;
- import com.rapidminer.example.ExampleSet;
- import com.rapidminer.operator.Operator;
- import com.rapidminer.operator.OperatorDescription;
- import com.rapidminer.operator.OperatorException;
- import com.rapidminer.operator.ports.InputPort;
- import com.rapidminer.operator.ports.OutputPort;
- import com.rapidminer.operator.ports.metadata.AttributeAddingExampleSetPassThroughRule;
- import com.rapidminer.operator.ports.metadata.AttributeMetaData;
- import com.rapidminer.operator.ports.metadata.ExampleSetMetaData;
- import com.rapidminer.operator.ports.metadata.SimplePrecondition;
- import com.rapidminer.parameter.ParameterType;
- import com.rapidminer.parameter.ParameterTypeInt;
- import com.rapidminer.tools.Ontology;
- import org.hsbo.bigdata.operator.Datenpunkt;
- public class KMeansOperator extends Operator {
- private static final String PARAMETER_K = "k";
- private InputPort dataIn = getInputPorts().createPort("data");
- private OutputPort dataOut = getOutputPorts().createPort("data");
- private OutputPort modelOut = getOutputPorts().createPort("model");
- // DO NOT STORE ANYTHING IN AN OPERATOR, EXCEPT PORTS! EVERY VARIABLE FOR THE
- // ALGORITHM NEEDS TO BE LOCAL! Otherwise it will be persistent from one
- // execution to the other.
- public KMeansOperator(OperatorDescription description) {
- super(description);
- System.out.println(description.toString());
- /**
- * The following is only needed for UX First we add a precondition to the input
- * port, so that the user knows what kind of object is expected to be connected.
- */
- dataIn.addPrecondition(new SimplePrecondition(dataIn, new ExampleSetMetaData()));
- System.out.println(dataIn.toString());
- /**
- * Here we add transformer, that are simulating this operators behavior, so that
- * the structure of the output is known. First we just tell it, that there is
- * coming out a model, the second rule specifies that the original data set is
- * returned on the dataOut port with a new Attirbute named Cluster added.
- */
- getTransformer().addGenerationRule(modelOut, KMeansModel.class);
- getTransformer().addRule(new AttributeAddingExampleSetPassThroughRule(dataIn, dataOut,
- new AttributeMetaData("cluster", Ontology.POLYNOMINAL)));
- }
- /**
- * Euklidische Distanz
- */
- private double berechneDistanzEuk(Datenpunkt dp0, Datenpunkt dp1)
- {
- //Vier Dimensionen a1, ..., a4
- double dsumme = 0.0;
- for(int i=0;i< dp0.getKoordinaten().size(); i++)
- {
- dsumme = dsumme + Math.pow((dp0.getKoordinaten().get(i)-dp1.getKoordinaten().get(i)),2.0);
- }
- return Math.sqrt(dsumme);
- }
- /**
- * Within this method you have to implement the operators actual actions. You
- * need to get the parameters to know how to do kMeans, you need to get the data
- * from the port. Once you have all the inputs, you can implement the algorithm.
- * Finally you need to wrap the results into a model object.
- *
- * There are already a few lines of code to show you around within the framework
- * and give you the tools you need for implementing kMeans.
- */
- @Override
- public void doWork() throws OperatorException {
- // getting the parameters
- int k = getParameterAsInt(PARAMETER_K);
- // getting the input data
- ExampleSet data = dataIn.getData(ExampleSet.class);
- //System.out.println(data.toString());
- /**
- * Here you need to implement the initialization and the optimization as
- * discussed during the lecture. This block contains some examples of useful
- * methods, that you will need
- */
- Attributes attributes = data.getAttributes();
- // this way you can loop over examples
- int h = 0;
- ArrayList<Datenpunkt> dp = new ArrayList<Datenpunkt>();
- for (Example example : data) {
- dp.add(new Datenpunkt()); // Index H
- // here we loop over all (regular) attributes (=colums of the data set). Regular
- // attributes are the ones marked to be used by algorithms like this
- for (Attribute attribute : data.getAttributes())
- {
- // we can access properties of the attribute with the object like this:
- if (attribute.isNumerical()) {
- // in this case we migh use it for euclidean distance
- double value = example.getNumericalValue(attribute);
- dp.get(h).getKoordinaten().add(value);
- //berechneDistanzEuk(value);
- } else if (attribute.isNominal()) {
- // perhaps usefull for another exercise?
- }
- String attributeName = attribute.getName();
- //System.out.println(attributeName);
- // here's how we can get numerical and nominal values:
- //System.out.println(h);
- double value = example.getNumericalValue(attribute);
- //System.out.println("NumVal: " + value);
- String valueString = example.getValueAsString(attribute);
- //System.out.println("StrVal: " + valueString);
- }
- if(h >= 1)
- {
- System.out.println("Distanz h-1 zw. h: " + berechneDistanzEuk(dp.get(h-1),dp.get(h)));
- }
- System.out.println(h);
- System.out.println(dp.get(h).getKoordinaten().toString());
- /**
- * You must not use examples for storing data. It's purely a view onto the data,
- * you shouldn't change them as you would change the underlying data. Here is
- * how you could extrac the data into a simple double[].
- */
- double[] rawNumericalData = new double[attributes.size()];
- int j = 0;
- for (Attribute attribute : attributes) {
- rawNumericalData[j++] = example.getValue(attribute);
- }
- h = h+1;
- }
- // alternatively you can loop using the size of the example set and random
- // access on the examples like that:
- for (int i = 0; i < data.size(); i++) {
- Example example = data.getExample(i);
- }
- /**
- * After we finished the optimiization, create the model with the results. You
- * will need to implement KMeansModel as well.
- */
- KMeansModel model = new KMeansModel(data, k);
- /**
- * We are delivering the output below
- */
- if (dataOut.isConnected()) {
- // we only copy the data and apply the model if it is connected. otherwise the
- // effort is wasted.
- ExampleSet outputData = model.apply((ExampleSet) data.copy());
- dataOut.deliver(outputData);
- }
- modelOut.deliver(model);
- }
- /**
- * This is the method you need to add parameters to. Do we need additional ones?
- * This is the same for all operators.
- */
- @Override
- public List<ParameterType> getParameterTypes() {
- List<ParameterType> types = new LinkedList<>();
- types.add(new ParameterTypeInt(PARAMETER_K, "The number clusters", 2, Integer.MAX_VALUE, 2));
- return types;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement