Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.io.File;
- import java.io.IOException;
- import java.util.ArrayList;
- import java.util.HashMap;
- import java.util.List;
- import java.util.Map;
- import java.util.Scanner;
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.fs.Path;
- import org.apache.hadoop.hbase.HBaseConfiguration;
- import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
- import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
- import org.apache.hadoop.hbase.mapreduce.TableMapper;
- import org.apache.hadoop.io.LongWritable;
- import org.apache.hadoop.mapreduce.Job;
- import org.apache.hadoop.mapreduce.Reducer;
- import org.apache.hadoop.mapreduce.Reducer.Context;
- import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
- import org.apache.hadoop.hbase.client.Result;
- import org.apache.hadoop.hbase.client.Scan;
- import org.apache.hadoop.io.DoubleWritable;
- import org.apache.hadoop.io.IntWritable;
- public class Logistic {
- private static double[] theta = new double[1601];
- public static double a = 0.01;
- public static class mapper extends TableMapper<IntWritable, DoubleWritable>{
- private static double[] theta = new double[1601];
- private static double[] grad = new double[1601];
- @Override
- public void setup(Context ct){
- Configuration cg = ct.getConfiguration();
- for(int i = 0; i < 1601; ++i){
- theta[i] = cg.getDouble("theta"+i, 0.0);
- }
- }
- @Override
- public void map(ImmutableBytesWritable row, Result value, Context context) {
- String usuario = new String(row.get());
- double y = 0.0;
- char tipo = 'a';
- Map<byte[], byte[]> caracteristicas = new HashMap<byte[], byte[]>();
- caracteristicas = value.getFamilyMap("a".getBytes());
- if(caracteristicas.isEmpty()){
- caracteristicas = value.getFamilyMap("b".getBytes());
- tipo = 'b';
- }
- if(tipo == 'b'){
- y = 1.0;
- }
- List<byte[]> cs = new ArrayList<>(caracteristicas.keySet());
- List<byte[]> vs = new ArrayList<>(caracteristicas.values());
- double[] x = new double[1601];
- x[0] = 1;
- for(int i = 0; i < vs.size();++i){
- int j = Integer.parseInt((new String(cs.get(i))).substring(1));
- x[j] = Double.parseDouble(new String(vs.get(i)));
- }
- gradX(x, y);
- }
- private static void gradX(double[] x, double y){
- double aux = 0;
- for(int i = 0; i < 1601;++i){
- aux += theta[i] * x[i];
- }
- double g = (1 / (1 + java.lang.Math.exp(-aux))) - y;
- for(int i = 0; i < 1601;++i){
- grad[i] += g * x[i];
- }
- }
- @Override
- protected void cleanup (Context context) throws IOException, InterruptedException {
- for(int i = 0; i < 1601;++i){
- context.write(new IntWritable(i), new DoubleWritable(grad[i]));
- }
- }
- }
- public static class reducer
- extends Reducer<IntWritable,DoubleWritable,IntWritable,DoubleWritable> {
- public void reduce(IntWritable key, Iterable<DoubleWritable> values, Context context)
- throws IOException, InterruptedException {
- double sum = 0;
- for(DoubleWritable v: values){
- sum += v.get();
- }
- context.write(key,new DoubleWritable(sum));
- }
- }
- private static double g(double[] x){
- double aux = 0;
- for(int i = 0; i < 1601;++i){
- aux += theta[i] * x[i];
- }
- return 1 / (1 + java.lang.Math.exp(-aux));
- }
- private static double[] grad(double[][] x, double[] y){
- double aux = 0;
- double[] grad = new double[1601];
- for(int j = 0; j < 1601;++j){
- grad[j] = 0;
- }
- int row = x.length;
- int col = x[0].length;
- for(int j = 0; j < 1601;++j){
- for(int i = 0; i < row;++i){
- aux += g(x[i]) * x[i][j];
- }
- grad[j] = aux;
- aux = 0;
- }
- return grad;
- }
- private static void calcularModelo(){
- for(int i = 0; i < 1601;++i){
- theta[i] = 0;
- }
- double[] grad = new double[1601];
- for(int i = 0; i < 20; ++i){
- Configuration conf = HBaseConfiguration.create();
- for(int j=0;j<1601;j++){
- //theta[j]= conf.getDouble("tetha"+j,0.0);
- conf.setDouble("tetha"+j,theta[j]);
- }
- try{
- Job job = Job.getInstance(conf,"Regresion logistica");
- Scan scan = new Scan();
- scan.setCaching(500);
- //scan.addColumn("t".getBytes(), "a".getBytes());
- //scan.addColumn("t".getBytes(), "b".getBytes());
- job.setJarByClass(Logistic.class);
- job.setJarByClass(Logistic.class);
- TableMapReduceUtil.initTableMapperJob("p", scan,mapper.class, IntWritable.class, DoubleWritable.class, job);
- job.setReducerClass(reducer.class);
- job.setNumReduceTasks(6);
- job.setOutputKeyClass(IntWritable.class);
- job.setOutputValueClass(DoubleWritable.class);
- FileOutputFormat.setOutputPath(job, new Path("out"));
- if (!job.waitForCompletion(true))
- return;
- /*for(int j=0;j<theta.length;j++){
- theta[j]= conf.getDouble("tetha"+j,0.0);
- }*/
- }catch(Exception e){
- System.out.println(e.getMessage());
- }
- try{
- Scanner out = new Scanner(new File("out/part-r-00000"));
- while(out.hasNextLine()){
- int j = out.nextInt();
- double valor = out.nextDouble();
- theta[j] = theta[j] - 0.01*valor;
- out.nextLine();
- System.out.println(theta[j]);
- }
- }
- catch(Exception e){}
- }
- }
- public static void main(String[] args) throws Exception {
- calcularModelo();
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement