Untitled

#
# This is an example YAML profile for cassandra-stress
#
# The general form of the command line is as follows:
#
#   cassandra-stress user profile=<profile.yaml> ops([insert|<read-op>]=<op-ratio>, ...) n=<partition-ops>
#
# cassandra-stress will then run multiple parallel consumers (controlled by the
# -rate threads=<consumers> option):
#
# * Each consumer draws an operation at random from the list of ops;
# * The distribution of the ops is controlled by the <op-ratio> parameter;
# * Each op will then generate cassandra queries.  When limited by the
#   <partition-ops> value:
#   * Each <read-op> will generate a single SELECT query, decrementing
#     <partition-ops> by 1;
#   * Each insert will generate multiple UPDATE queries, decrementing
#     <partition-ops> by the number of unique partitions inserted;
#   * When <partition-ops> is exhausted, cassandra-stress stops.

# insert data
# cassandra-stress user profile=/home/jake/stress1.yaml ops(insert=1)
#
# read, using query simple1:
# cassandra-stress profile=/home/jake/stress1.yaml ops(simple1=1)
#
# mixed workload (90/10)
# cassandra-stress user profile=/home/jake/stress1.yaml ops(insert=1,simple1=9)

#
# Keyspace info
#
keyspace: stresscql

#
# The CQL for creating a keyspace (optional if it already exists)
#
keyspace_definition: |
  CREATE KEYSPACE stresscql WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 3};

#
# Table info
#
table: typestest

#
# The CQL for creating a table you wish to stress (optional if it already exists)
#
table_definition: |
  CREATE TABLE typestest (
        name text,
        choice boolean,
        date timestamp,
        address inet,
        dbl double,
        lval bigint,
        ival int,
        uid timeuuid,
        value blob,
        PRIMARY KEY((name,choice), date, address, dbl, lval, ival, uid)
  )
    WITH compaction = { 'class':'LeveledCompactionStrategy' }
#    AND compression = { 'sstable_compression' : '' }
#    AND comment='A table of many types to test wide rows'

#
# Optional meta information on the generated columns in the above table
# The min and max only apply to text and blob types
# The distribution field represents the total unique population
# distribution of that column across rows.  Supported types are
#
#      EXP(min..max)                        An exponential distribution over the range [min..max]
#      EXTREME(min..max,shape)              An extreme value (Weibull) distribution over the range [min..max]
#      GAUSSIAN(min..max,stdvrng)           A gaussian/normal distribution, where mean=(min+max)/2, and stdev is (mean-min)/stdvrng
#      GAUSSIAN(min..max,mean,stdev)        A gaussian/normal distribution, with explicitly defined mean and stdev
#      UNIFORM(min..max)                    A uniform distribution over the range [min, max]
#      FIXED(val)                           A fixed distribution, always returning the same value
#      SEQ(min..max)                        A fixed sequence, returning values in the range min to max sequentially (starting based on seed), wrapping if necessary.
#      Aliases: extr, gauss, normal, norm, weibull
#
#      If preceded by ~, the distribution is inverted
#
# Defaults for all columns are size: uniform(4..8), population: uniform(1..100B), cluster: fixed(1)
#
columnspec:
  - name: name
    size: uniform(1..10)
    population: uniform(1..1B)     # the range of unique values to select for the field (default is 100Billion)
  - name: date
    cluster: uniform(20..40)
  - name: lval
    population: gaussian(1..1000)
    cluster: uniform(1..4)


# The insert operation
insert:
  partitions: uniform(1..50)      # Number of unique partitions to update in a single insert op.
                                  # Defaults to fixed(1)
  partitions-per-batch: MULTIPLE  # SINGLE or MULTIPLE partitions per-batch; multiple partitions in a
                                  # single batch is a pessimization, but it's allowed.  Defaults to SINGLE.
  max-rows-per-batch: uniform(1..100)
                                  # Maximum size of a batch.  Rows are inserted in batches of up to
                                  # max-rows-per-batch, and after each batch is sent max-rows-per-batch
                                  # is regenerated from this distribution. If the generated value is 0,
                                  # then each partition (for SINGLE partitions-per-batch) or all partitions
                                  # (for MULTIPLE partitions-per-batch) is inserted in a single batch (so if you
                                  # always require this behaviour, use fixed(0)).  Defaults to fixed(100).
  batchtype: LOGGED               # Type of batch to use: LOGGED, UNLOGGED or COUNTER
  select: uniform(1..10)/10       # Proportion of rows that will be generated in each partition.  The number of rows
                                  # per-partition will be determined by <select-value> * <partition-size>, where
                                  # <partition-size> is the number of possible rows in a partition, as determined by
                                  # the columns with cluster keys in the columnspec above. This will be
                                  # generated for each partition in a single insert op.
                                  # Defaults to fixed(1)/1
  row-population: fixed(1)/1      # Proportion of populated columns in a row.
                                  # Defaults to fixed(1)/1
#
# A list of queries you wish to run against the schema
#
queries:
   simple1:
      cql: select * from typestest where name = ? and choice = ? LIMIT 100
      fields: samerow             # samerow or multirow (select arguments from the same row, or randomly from all rows in the partition)
   range1:
      cql: select * from typestest where name = ? and choice = ? and date >= ? LIMIT 100
      fields: multirow            # samerow or multirow (select arguments from the same row, or randomly from all rows in the partition)


#
# A list of bulk read queries that analytics tools may perform against the schema
# Each query will sweep an entire token range, page by page.
#
token_range_queries:
  all_columns_tr_query:
    columns: '*'
    page_size: 5000

  value_tr_query:
    columns: value