Advertisement
libjack

Cassandra CF "Census" using Hector

Aug 31st, 2011
460
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 5 4.38 KB | None | 0 0
  1. import me.prettyprint.cassandra.serializers.LongSerializer;
  2. import me.prettyprint.hector.api.Cluster;
  3. import me.prettyprint.hector.api.Keyspace;
  4. import me.prettyprint.hector.api.beans.ColumnSlice;
  5. import me.prettyprint.hector.api.beans.OrderedRows;
  6. import me.prettyprint.hector.api.beans.Row;
  7. import me.prettyprint.hector.api.factory.HFactory;
  8. import me.prettyprint.hector.api.query.QueryResult;
  9. import me.prettyprint.hector.api.query.RangeSlicesQuery;
  10. import me.prettyprint.hector.api.query.SliceQuery;
  11.  
  12. import com.google.common.collect.Iterables;
  13.  
  14.  
  15. public class LongDataCensus {
  16.     // no magic here, just set big chunks, can probably go much bigger is increase jvm memory
  17.     public static final int ROW_COUNT_REQUEST = 50001;
  18.     public static final int COLUMN_RANGE_REQUEST = 5001;
  19.    
  20.     public static <K, N, V> int getColumnCount(Row<K, N, V> row, SliceQuery<K, N, V> sliceQuery, String columnFamilyName, int requestSize) {
  21.         int size = 0;
  22.         int sliceSize;
  23.  
  24.         ColumnSlice<N,V> slice = row.getColumnSlice();
  25.  
  26.         // if we got as many as we asked for, we might have more available, so keep doing that until we get shorted..
  27.         while ((sliceSize = slice.getColumns().size()) == requestSize) {
  28.             // need to get just the additional columns for this key...
  29.             N start = slice.getColumns().get(sliceSize - 1).getName();
  30.             size += sliceSize-1; // don't count last entry... as will be used as start for next range get
  31.             sliceQuery.setColumnFamily(columnFamilyName).setKey(row.getKey()).setRange(start, null, false, requestSize);
  32.             slice =  sliceQuery.execute().get();
  33.         }
  34.         // add in the "shorted" row
  35.         size += sliceSize;
  36.        
  37.         return size;
  38.     }
  39.    
  40.     public static void main( String[] args ) {
  41.         String hostPort = "localhost:9160";
  42.         String clusterName = "mycluster";
  43.         String keyspaceName = "mykeyspace";
  44.         String columnFamilyName = "Data";
  45.        
  46.        
  47.         Cluster cluster = HFactory.getOrCreateCluster(clusterName,hostPort);
  48.         Keyspace ks = HFactory.createKeyspace(keyspaceName, cluster);
  49.        
  50.         RangeSlicesQuery<Long,Long,Long> rangeSlicesQuery =
  51.             HFactory.createRangeSlicesQuery(ks, LongSerializer.get(), LongSerializer.get(), LongSerializer.get());
  52.         // need this to find the rest of the columns for each row
  53.         SliceQuery<Long, Long, Long> sliceQuery = HFactory.createSliceQuery(ks, LongSerializer.get(), LongSerializer.get(), LongSerializer.get());
  54.        
  55.         Long lastEnd = null;
  56.        
  57.         int keyCount = 0;
  58.         long totalColCount = 0;
  59.         int maxColCount = 0;
  60.         Long maxColCountKey = null;    
  61.         long startTime = -1 * System.currentTimeMillis()/1000L;
  62.        
  63.         boolean allDone = false;
  64.         do {
  65.             rangeSlicesQuery.setColumnFamily(columnFamilyName).setKeys(lastEnd, null).setRowCount(ROW_COUNT_REQUEST)
  66.                             .setRange(null, null, false, COLUMN_RANGE_REQUEST);
  67.            
  68.             QueryResult<OrderedRows<Long, Long, Long>> result = rangeSlicesQuery.execute();
  69.             OrderedRows<Long, Long, Long> resultRows = result.get();
  70.            
  71.             if (resultRows.getCount() < 1) {
  72.                 allDone = true;
  73.             } else {
  74.                 Iterable<Row<Long, Long, Long>> rowIterable;
  75.                 // if we get exact #rows as we asked, then we need to skip last to use that as start of next slice
  76.                 // else if just a small slice, then assume it is last, and then we can process and then quit
  77.                 if (resultRows.getCount() == ROW_COUNT_REQUEST) {
  78.                     rowIterable= Iterables.limit(resultRows, resultRows.getCount() - 1);
  79.                 } else {
  80.                     rowIterable = resultRows;
  81.                     allDone = true;
  82.                 }
  83.                
  84.                 for (Row<Long,Long,Long> row: rowIterable) {
  85.                     keyCount++;
  86.                     int size = getColumnCount(row, sliceQuery, columnFamilyName, COLUMN_RANGE_REQUEST);
  87.  
  88.                     if (size >= maxColCount) {
  89.                         maxColCount = size;
  90.                         maxColCountKey = row.getKey();
  91.                     }
  92.                    
  93.                     totalColCount += size;
  94.                    
  95.                     if (keyCount % 100000 == 0) {
  96.                         long time = startTime + System.currentTimeMillis()/1000L;
  97.                         System.out.println(time + "," + keyCount + "," + keyCount/(time+1));
  98.                     }
  99.                 }
  100.                 lastEnd = resultRows.peekLast().getKey();
  101.             }
  102.  
  103.         } while (!allDone);
  104.        
  105.         long time = startTime + System.currentTimeMillis()/1000L;
  106.         System.out.println(time + "," + keyCount + "," + keyCount/(time+1));
  107.        
  108.         System.out.println("Key count: " + keyCount);
  109.         System.out.println("Col count: " + totalColCount);
  110.        
  111.         System.out.println("Max col count: " + maxColCount + " - (last) at: " + maxColCountKey);
  112.        
  113.        
  114.         cluster.getConnectionManager().shutdown();
  115.         System.exit(0);
  116.        
  117.     }
  118. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement