Cassandra CF "Census" using Hector

import me.prettyprint.cassandra.serializers.LongSerializer;
import me.prettyprint.hector.api.Cluster;
import me.prettyprint.hector.api.Keyspace;
import me.prettyprint.hector.api.beans.ColumnSlice;
import me.prettyprint.hector.api.beans.OrderedRows;
import me.prettyprint.hector.api.beans.Row;
import me.prettyprint.hector.api.factory.HFactory;
import me.prettyprint.hector.api.query.QueryResult;
import me.prettyprint.hector.api.query.RangeSlicesQuery;
import me.prettyprint.hector.api.query.SliceQuery;

import com.google.common.collect.Iterables;


public class LongDataCensus {
    // no magic here, just set big chunks, can probably go much bigger is increase jvm memory
    public static final int ROW_COUNT_REQUEST = 50001;
    public static final int COLUMN_RANGE_REQUEST = 5001;

    public static <K, N, V> int getColumnCount(Row<K, N, V> row, SliceQuery<K, N, V> sliceQuery, String columnFamilyName, int requestSize) {
        int size = 0;
        int sliceSize;

        ColumnSlice<N,V> slice = row.getColumnSlice();

        // if we got as many as we asked for, we might have more available, so keep doing that until we get shorted..
        while ((sliceSize = slice.getColumns().size()) == requestSize) {
            // need to get just the additional columns for this key...
            N start = slice.getColumns().get(sliceSize - 1).getName();
            size += sliceSize-1; // don't count last entry... as will be used as start for next range get
            sliceQuery.setColumnFamily(columnFamilyName).setKey(row.getKey()).setRange(start, null, false, requestSize);
            slice =  sliceQuery.execute().get();
        }
        // add in the "shorted" row
        size += sliceSize;

        return size;
    }

    public static void main( String[] args ) {
        String hostPort = "localhost:9160";
        String clusterName = "mycluster";
        String keyspaceName = "mykeyspace";
        String columnFamilyName = "Data";


        Cluster cluster = HFactory.getOrCreateCluster(clusterName,hostPort);
        Keyspace ks = HFactory.createKeyspace(keyspaceName, cluster);

        RangeSlicesQuery<Long,Long,Long> rangeSlicesQuery =
            HFactory.createRangeSlicesQuery(ks, LongSerializer.get(), LongSerializer.get(), LongSerializer.get());
        // need this to find the rest of the columns for each row
        SliceQuery<Long, Long, Long> sliceQuery = HFactory.createSliceQuery(ks, LongSerializer.get(), LongSerializer.get(), LongSerializer.get());

        Long lastEnd = null;

        int keyCount = 0;
        long totalColCount = 0;
        int maxColCount = 0;
        Long maxColCountKey = null;
        long startTime = -1 * System.currentTimeMillis()/1000L;

        boolean allDone = false;
        do {
            rangeSlicesQuery.setColumnFamily(columnFamilyName).setKeys(lastEnd, null).setRowCount(ROW_COUNT_REQUEST)
                            .setRange(null, null, false, COLUMN_RANGE_REQUEST);

            QueryResult<OrderedRows<Long, Long, Long>> result = rangeSlicesQuery.execute();
            OrderedRows<Long, Long, Long> resultRows = result.get();

            if (resultRows.getCount() < 1) {
                allDone = true;
            } else {
                Iterable<Row<Long, Long, Long>> rowIterable;
                // if we get exact #rows as we asked, then we need to skip last to use that as start of next slice
                // else if just a small slice, then assume it is last, and then we can process and then quit
                if (resultRows.getCount() == ROW_COUNT_REQUEST) {
                    rowIterable= Iterables.limit(resultRows, resultRows.getCount() - 1);
                } else {
                    rowIterable = resultRows;
                    allDone = true;
                }

                for (Row<Long,Long,Long> row: rowIterable) {
                    keyCount++;
                    int size = getColumnCount(row, sliceQuery, columnFamilyName, COLUMN_RANGE_REQUEST);

                    if (size >= maxColCount) {
                        maxColCount = size;
                        maxColCountKey = row.getKey();
                    }

                    totalColCount += size;

                    if (keyCount % 100000 == 0) {
                        long time = startTime + System.currentTimeMillis()/1000L;
                        System.out.println(time + "," + keyCount + "," + keyCount/(time+1));
                    }
                }
                lastEnd = resultRows.peekLast().getKey();
            }

        } while (!allDone);

        long time = startTime + System.currentTimeMillis()/1000L;
        System.out.println(time + "," + keyCount + "," + keyCount/(time+1));

        System.out.println("Key count: " + keyCount);
        System.out.println("Col count: " + totalColCount);

        System.out.println("Max col count: " + maxColCount + " - (last) at: " + maxColCountKey);


        cluster.getConnectionManager().shutdown();
        System.exit(0);

    }
}