Untitled

apiVersion: "serving.kubeflow.org/v1alpha2"
kind: "InferenceNode"
metadata:
  name: "my-model"
spec:
  default:
    predictor:
      tensorflow:
        storageUri: "gs://mybucket/mymodel-2"

  # A/B testing add-on
  ABTest:
    # which metric (the log request can log multiple) are we looking for an improvement in?
    metricName: "my_metric"

    # what percent of users are randomly routed to this model?
    # default: 50
    trafficPercent: 10

    # over the course of a single A/B test, all users _must_ consistently be routed to the same model
    # however, we must _not_ have correlations between user assignment to group A vs group B
    # example: if user 1 and user 2 are both in group A in one A/B test,
    #          it shouldn't be guaranteed that they are in the same group in the next A/B test
    # thus: assuming we have 1) user ID and 2) some unique A/B test id,
    #       we may determine what model to route a user to by hashing user ID and A/B test ID
    #       `hash(user_id, abtest_id) / MAX_VALUE < trafficPercent / 100` (0 -> model A, 1 -> model B)
    # default: number of seconds after epoch (just has to be unique)
    seed: 93020191153

    # STATISTICAL PARAMETERS

    # For metric estimation (before the A/B test starts; to calculate sample size required for the A/B test)
    # maximumPercentError - largest possible percent error at the specified confidence value, default: 1
    # estimationConfidence - percent chance that our true estimation error <= maximumPercentError, default: 95
    maximumPercentError: 1
    estimationConfidence: 95

    # For A/B testing
    # *note*: for convenience, define the output of our A/B test, C = (B's metric >= A's metric + minimumDetectableEffect)
    # minimumDetectableEffect - smallest detectable absolute improvement
    #                           in B's metric over A's at the specified confidence & power values
    # confidence - percent chance that we predict C is false given C is indeed false, default: 95
    # power - percent chance that we predict C is true given C is indeed true, default: 80
    # *note*: higher confidence/power and lower minimumDetectableEffect -> longer experiments (more samples required)
    minimumDetectableEffect: 0.01
    confidence: 95
    power: 80

    # CONSIDERING:
    # timeout for sessions which do not receive a log request?

    # specifying our second predictor
    predictor:
      tensorflow:
        storageUri: "gs://mybucket/mymodel-3"