Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package org.datavec.transform.basic;
- import org.datavec.api.records.reader.RecordReader;
- import org.datavec.api.records.reader.impl.csv.CSVRecordReader;
- import org.datavec.api.split.FileSplit;
- import org.datavec.api.transform.TransformProcess;
- import org.datavec.api.transform.schema.Schema;
- import org.datavec.api.transform.transform.sequence.SequenceOffsetTransform;
- import org.datavec.api.writable.Writable;
- import org.datavec.local.transforms.LocalTransformExecutor;
- import org.joda.time.DateTimeZone;
- import org.nd4j.linalg.io.ClassPathResource;
- import java.io.File;
- import java.util.ArrayList;
- import java.util.Arrays;
- import java.util.List;
- public class myExample {
- public static void main(String[] args) throws Exception {
- Schema inputDataSchema = new Schema.Builder()
- .addColumnString("DateTimeString")
- .addColumnsString("CustomerID", "MerchantID")
- .addColumnInteger("NumItemsInTransaction")
- .addColumnCategorical("MerchantCountryCode", Arrays.asList("USA","CAN","FR","MX"))
- .addColumnDouble("TransactionAmountUSD",0.0,null,false,false) //$0.0 or more, no maximum limit, no NaN and no Infinite values
- .addColumnCategorical("FraudLabel", Arrays.asList("Fraud","Legit"))
- .build();
- TransformProcess tp = new TransformProcess.Builder(inputDataSchema)
- .removeAllColumnsExceptFor("DateTimeString","TransactionAmountUSD")
- .stringToTimeTransform("DateTimeString","YYYY-MM-DD HH:mm:ss.SSS", DateTimeZone.UTC)
- .offsetSequence(Arrays.asList("TransactionAmountUSD"),1, SequenceOffsetTransform.OperationType.NewColumn)
- .build();
- File inputFile = new ClassPathResource("BasicDataVecExample/exampledata.csv").getFile();
- //Define input reader and output writer:
- RecordReader rr = new CSVRecordReader(0, ',');
- //RecordReader rr = new CSVVariableSlidingWindowRecordReader(5, 2);
- rr.initialize(new FileSplit(inputFile));
- //Process the data:
- List<List<Writable>> originalData = new ArrayList<>();
- while(rr.hasNext()){
- originalData.add(rr.next());
- }
- List<List<Writable>> processedData = LocalTransformExecutor.execute(originalData, tp);
- int numRows = 5;
- System.out.println("=== BEFORE ===");
- for (int i=0;i<=numRows;i++) {
- System.out.println(originalData.get(i));
- }
- System.out.println("=== AFTER ===");
- for (int i=0;i<=numRows;i++) {
- System.out.println(processedData.get(i));
- }
- }
- }
- /* ERROR I GET:
- Exception in thread "main" java.lang.UnsupportedOperationException: SequenceOffsetTransform cannot be applied to non-sequence data
- at org.datavec.api.transform.transform.sequence.SequenceOffsetTransform.map(SequenceOffsetTransform.java:159)
- at org.datavec.local.transforms.transform.LocalTransformFunction.apply(LocalTransformFunction.java:48)
- at org.datavec.local.transforms.transform.LocalTransformFunction.apply(LocalTransformFunction.java:32)
- at org.datavec.local.transforms.LocalTransformExecutor.lambda$execute$3(LocalTransformExecutor.java:340)
- at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)
- at java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1382)
- at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:481)
- at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:471)
- at java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:708)
- at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
- at java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:499)
- at org.datavec.local.transforms.LocalTransformExecutor.execute(LocalTransformExecutor.java:340)
- at org.datavec.local.transforms.LocalTransformExecutor.execute(LocalTransformExecutor.java:93)
- at org.datavec.transform.basic.myExample.main(myExample.java:51)
- */
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement