Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #
- # This is an example YAML profile for cassandra-stress
- #
- # The general form of the command line is as follows:
- #
- # cassandra-stress user profile=<profile.yaml> ops([insert|<read-op>]=<op-ratio>, ...) n=<partition-ops>
- #
- # cassandra-stress will then run multiple parallel consumers (controlled by the
- # -rate threads=<consumers> option):
- #
- # * Each consumer draws an operation at random from the list of ops;
- # * The distribution of the ops is controlled by the <op-ratio> parameter;
- # * Each op will then generate cassandra queries. When limited by the
- # <partition-ops> value:
- # * Each <read-op> will generate a single SELECT query, decrementing
- # <partition-ops> by 1;
- # * Each insert will generate multiple UPDATE queries, decrementing
- # <partition-ops> by the number of unique partitions inserted;
- # * When <partition-ops> is exhausted, cassandra-stress stops.
- # insert data
- # cassandra-stress user profile=/home/jake/stress1.yaml ops(insert=1)
- #
- # read, using query simple1:
- # cassandra-stress profile=/home/jake/stress1.yaml ops(simple1=1)
- #
- # mixed workload (90/10)
- # cassandra-stress user profile=/home/jake/stress1.yaml ops(insert=1,simple1=9)
- #
- # Keyspace info
- #
- keyspace: stresscql
- #
- # The CQL for creating a keyspace (optional if it already exists)
- #
- keyspace_definition: |
- CREATE KEYSPACE stresscql WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 3};
- #
- # Table info
- #
- table: typestest
- #
- # The CQL for creating a table you wish to stress (optional if it already exists)
- #
- table_definition: |
- CREATE TABLE typestest (
- name text,
- choice boolean,
- date timestamp,
- address inet,
- dbl double,
- lval bigint,
- ival int,
- uid timeuuid,
- value blob,
- PRIMARY KEY((name,choice), date, address, dbl, lval, ival, uid)
- )
- WITH compaction = { 'class':'LeveledCompactionStrategy' }
- # AND compression = { 'sstable_compression' : '' }
- # AND comment='A table of many types to test wide rows'
- #
- # Optional meta information on the generated columns in the above table
- # The min and max only apply to text and blob types
- # The distribution field represents the total unique population
- # distribution of that column across rows. Supported types are
- #
- # EXP(min..max) An exponential distribution over the range [min..max]
- # EXTREME(min..max,shape) An extreme value (Weibull) distribution over the range [min..max]
- # GAUSSIAN(min..max,stdvrng) A gaussian/normal distribution, where mean=(min+max)/2, and stdev is (mean-min)/stdvrng
- # GAUSSIAN(min..max,mean,stdev) A gaussian/normal distribution, with explicitly defined mean and stdev
- # UNIFORM(min..max) A uniform distribution over the range [min, max]
- # FIXED(val) A fixed distribution, always returning the same value
- # SEQ(min..max) A fixed sequence, returning values in the range min to max sequentially (starting based on seed), wrapping if necessary.
- # Aliases: extr, gauss, normal, norm, weibull
- #
- # If preceded by ~, the distribution is inverted
- #
- # Defaults for all columns are size: uniform(4..8), population: uniform(1..100B), cluster: fixed(1)
- #
- columnspec:
- - name: name
- size: uniform(1..10)
- population: uniform(1..1B) # the range of unique values to select for the field (default is 100Billion)
- - name: date
- cluster: uniform(20..40)
- - name: lval
- population: gaussian(1..1000)
- cluster: uniform(1..4)
- # The insert operation
- insert:
- partitions: uniform(1..50) # Number of unique partitions to update in a single insert op.
- # Defaults to fixed(1)
- partitions-per-batch: MULTIPLE # SINGLE or MULTIPLE partitions per-batch; multiple partitions in a
- # single batch is a pessimization, but it's allowed. Defaults to SINGLE.
- max-rows-per-batch: uniform(1..100)
- # Maximum size of a batch. Rows are inserted in batches of up to
- # max-rows-per-batch, and after each batch is sent max-rows-per-batch
- # is regenerated from this distribution. If the generated value is 0,
- # then each partition (for SINGLE partitions-per-batch) or all partitions
- # (for MULTIPLE partitions-per-batch) is inserted in a single batch (so if you
- # always require this behaviour, use fixed(0)). Defaults to fixed(100).
- batchtype: LOGGED # Type of batch to use: LOGGED, UNLOGGED or COUNTER
- select: uniform(1..10)/10 # Proportion of rows that will be generated in each partition. The number of rows
- # per-partition will be determined by <select-value> * <partition-size>, where
- # <partition-size> is the number of possible rows in a partition, as determined by
- # the columns with cluster keys in the columnspec above. This will be
- # generated for each partition in a single insert op.
- # Defaults to fixed(1)/1
- row-population: fixed(1)/1 # Proportion of populated columns in a row.
- # Defaults to fixed(1)/1
- #
- # A list of queries you wish to run against the schema
- #
- queries:
- simple1:
- cql: select * from typestest where name = ? and choice = ? LIMIT 100
- fields: samerow # samerow or multirow (select arguments from the same row, or randomly from all rows in the partition)
- range1:
- cql: select * from typestest where name = ? and choice = ? and date >= ? LIMIT 100
- fields: multirow # samerow or multirow (select arguments from the same row, or randomly from all rows in the partition)
- #
- # A list of bulk read queries that analytics tools may perform against the schema
- # Each query will sweep an entire token range, page by page.
- #
- token_range_queries:
- all_columns_tr_query:
- columns: '*'
- page_size: 5000
- value_tr_query:
- columns: value
Add Comment
Please, Sign In to add comment