Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import me.prettyprint.cassandra.serializers.LongSerializer;
- import me.prettyprint.hector.api.Cluster;
- import me.prettyprint.hector.api.Keyspace;
- import me.prettyprint.hector.api.beans.ColumnSlice;
- import me.prettyprint.hector.api.beans.OrderedRows;
- import me.prettyprint.hector.api.beans.Row;
- import me.prettyprint.hector.api.factory.HFactory;
- import me.prettyprint.hector.api.query.QueryResult;
- import me.prettyprint.hector.api.query.RangeSlicesQuery;
- import me.prettyprint.hector.api.query.SliceQuery;
- import com.google.common.collect.Iterables;
- public class LongDataCensus {
- // no magic here, just set big chunks, can probably go much bigger is increase jvm memory
- public static final int ROW_COUNT_REQUEST = 50001;
- public static final int COLUMN_RANGE_REQUEST = 5001;
- public static <K, N, V> int getColumnCount(Row<K, N, V> row, SliceQuery<K, N, V> sliceQuery, String columnFamilyName, int requestSize) {
- int size = 0;
- int sliceSize;
- ColumnSlice<N,V> slice = row.getColumnSlice();
- // if we got as many as we asked for, we might have more available, so keep doing that until we get shorted..
- while ((sliceSize = slice.getColumns().size()) == requestSize) {
- // need to get just the additional columns for this key...
- N start = slice.getColumns().get(sliceSize - 1).getName();
- size += sliceSize-1; // don't count last entry... as will be used as start for next range get
- sliceQuery.setColumnFamily(columnFamilyName).setKey(row.getKey()).setRange(start, null, false, requestSize);
- slice = sliceQuery.execute().get();
- }
- // add in the "shorted" row
- size += sliceSize;
- return size;
- }
- public static void main( String[] args ) {
- String hostPort = "localhost:9160";
- String clusterName = "mycluster";
- String keyspaceName = "mykeyspace";
- String columnFamilyName = "Data";
- Cluster cluster = HFactory.getOrCreateCluster(clusterName,hostPort);
- Keyspace ks = HFactory.createKeyspace(keyspaceName, cluster);
- RangeSlicesQuery<Long,Long,Long> rangeSlicesQuery =
- HFactory.createRangeSlicesQuery(ks, LongSerializer.get(), LongSerializer.get(), LongSerializer.get());
- // need this to find the rest of the columns for each row
- SliceQuery<Long, Long, Long> sliceQuery = HFactory.createSliceQuery(ks, LongSerializer.get(), LongSerializer.get(), LongSerializer.get());
- Long lastEnd = null;
- int keyCount = 0;
- long totalColCount = 0;
- int maxColCount = 0;
- Long maxColCountKey = null;
- long startTime = -1 * System.currentTimeMillis()/1000L;
- boolean allDone = false;
- do {
- rangeSlicesQuery.setColumnFamily(columnFamilyName).setKeys(lastEnd, null).setRowCount(ROW_COUNT_REQUEST)
- .setRange(null, null, false, COLUMN_RANGE_REQUEST);
- QueryResult<OrderedRows<Long, Long, Long>> result = rangeSlicesQuery.execute();
- OrderedRows<Long, Long, Long> resultRows = result.get();
- if (resultRows.getCount() < 1) {
- allDone = true;
- } else {
- Iterable<Row<Long, Long, Long>> rowIterable;
- // if we get exact #rows as we asked, then we need to skip last to use that as start of next slice
- // else if just a small slice, then assume it is last, and then we can process and then quit
- if (resultRows.getCount() == ROW_COUNT_REQUEST) {
- rowIterable= Iterables.limit(resultRows, resultRows.getCount() - 1);
- } else {
- rowIterable = resultRows;
- allDone = true;
- }
- for (Row<Long,Long,Long> row: rowIterable) {
- keyCount++;
- int size = getColumnCount(row, sliceQuery, columnFamilyName, COLUMN_RANGE_REQUEST);
- if (size >= maxColCount) {
- maxColCount = size;
- maxColCountKey = row.getKey();
- }
- totalColCount += size;
- if (keyCount % 100000 == 0) {
- long time = startTime + System.currentTimeMillis()/1000L;
- System.out.println(time + "," + keyCount + "," + keyCount/(time+1));
- }
- }
- lastEnd = resultRows.peekLast().getKey();
- }
- } while (!allDone);
- long time = startTime + System.currentTimeMillis()/1000L;
- System.out.println(time + "," + keyCount + "," + keyCount/(time+1));
- System.out.println("Key count: " + keyCount);
- System.out.println("Col count: " + totalColCount);
- System.out.println("Max col count: " + maxColCount + " - (last) at: " + maxColCountKey);
- cluster.getConnectionManager().shutdown();
- System.exit(0);
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement