SHARE
TWEET

Untitled

a guest Mar 20th, 2017 61 in 4 days
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. /*
  2.  * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
  3.  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4.  *
  5.  * This code is free software; you can redistribute it and/or modify it
  6.  * under the terms of the GNU General Public License version 2 only, as
  7.  * published by the Free Software Foundation.  Oracle designates this
  8.  * particular file as subject to the "Classpath" exception as provided
  9.  * by Oracle in the LICENSE file that accompanied this code.
  10.  *
  11.  * This code is distributed in the hope that it will be useful, but WITHOUT
  12.  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13.  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14.  * version 2 for more details (a copy is included in the LICENSE file that
  15.  * accompanied this code).
  16.  *
  17.  * You should have received a copy of the GNU General Public License version
  18.  * 2 along with this work; if not, write to the Free Software Foundation,
  19.  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20.  *
  21.  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22.  * or visit www.oracle.com if you need additional information or have any
  23.  * questions.
  24.  */
  25. package org.openjdk.jmh.infra;
  26.  
  27. import org.openjdk.jmh.util.Utils;
  28.  
  29. import java.lang.ref.WeakReference;
  30. import java.util.Random;
  31.  
  32. /*
  33.     See the rationale for BlackholeL1..BlackholeL4 classes below.
  34.  */
  35.  
  36. abstract class BlackholeL0 {
  37.     private int markerBegin;
  38. }
  39.  
  40. abstract class BlackholeL1 extends BlackholeL0 {
  41.     private boolean p001, p002, p003, p004, p005, p006, p007, p008;
  42.     private boolean p011, p012, p013, p014, p015, p016, p017, p018;
  43.     private boolean p021, p022, p023, p024, p025, p026, p027, p028;
  44.     private boolean p031, p032, p033, p034, p035, p036, p037, p038;
  45.     private boolean p041, p042, p043, p044, p045, p046, p047, p048;
  46.     private boolean p051, p052, p053, p054, p055, p056, p057, p058;
  47.     private boolean p061, p062, p063, p064, p065, p066, p067, p068;
  48.     private boolean p071, p072, p073, p074, p075, p076, p077, p078;
  49.     private boolean p101, p102, p103, p104, p105, p106, p107, p108;
  50.     private boolean p111, p112, p113, p114, p115, p116, p117, p118;
  51.     private boolean p121, p122, p123, p124, p125, p126, p127, p128;
  52.     private boolean p131, p132, p133, p134, p135, p136, p137, p138;
  53.     private boolean p141, p142, p143, p144, p145, p146, p147, p148;
  54.     private boolean p151, p152, p153, p154, p155, p156, p157, p158;
  55.     private boolean p161, p162, p163, p164, p165, p166, p167, p168;
  56.     private boolean p171, p172, p173, p174, p175, p176, p177, p178;
  57. }
  58.  
  59. abstract class BlackholeL2 extends BlackholeL1 {
  60.     public volatile byte b1;
  61.     public volatile boolean bool1;
  62.     public volatile char c1;
  63.     public volatile short s1;
  64.     public volatile int i1;
  65.     public volatile long l1;
  66.     public volatile float f1;
  67.     public volatile double d1;
  68.     public byte b2;
  69.     public boolean bool2;
  70.     public char c2;
  71.     public short s2;
  72.     public int i2;
  73.     public long l2;
  74.     public float f2;
  75.     public double d2;
  76.     public volatile Object obj1;
  77.     public volatile BlackholeL2 nullBait = null;
  78.     public int tlr;
  79.     public volatile int tlrMask;
  80.  
  81.     public BlackholeL2() {
  82.         Random r = new Random(System.nanoTime());
  83.         tlr = r.nextInt();
  84.         tlrMask = 1;
  85.         obj1 = new Object();
  86.  
  87.         b1 = (byte) r.nextInt(); b2 = (byte) (b1 + 1);
  88.         bool1 = r.nextBoolean(); bool2 = !bool1;
  89.         c1 = (char) r.nextInt(); c2 = (char) (c1 + 1);
  90.         s1 = (short) r.nextInt(); s2 = (short) (s1 + 1);
  91.         i1 = r.nextInt(); i2 = i1 + 1;
  92.         l1 = r.nextLong(); l2 = l1 + 1;
  93.         f1 = r.nextFloat(); f2 = f1 + Math.ulp(f1);
  94.         d1 = r.nextDouble(); d2 = d1 + Math.ulp(d1);
  95.  
  96.         if (b1 == b2) {
  97.             throw new IllegalStateException("byte tombstones are equal");
  98.         }
  99.         if (bool1 == bool2) {
  100.             throw new IllegalStateException("boolean tombstones are equal");
  101.         }
  102.         if (c1 == c2) {
  103.             throw new IllegalStateException("char tombstones are equal");
  104.         }
  105.         if (s1 == s2) {
  106.             throw new IllegalStateException("short tombstones are equal");
  107.         }
  108.         if (i1 == i2) {
  109.             throw new IllegalStateException("int tombstones are equal");
  110.         }
  111.         if (l1 == l2) {
  112.             throw new IllegalStateException("long tombstones are equal");
  113.         }
  114.         if (f1 == f2) {
  115.             throw new IllegalStateException("float tombstones are equal");
  116.         }
  117.         if (d1 == d2) {
  118.             throw new IllegalStateException("double tombstones are equal");
  119.         }
  120.     }
  121. }
  122.  
  123. abstract class BlackholeL3 extends BlackholeL2 {
  124.     private boolean q001, q002, q003, q004, q005, q006, q007, q008;
  125.     private boolean q011, q012, q013, q014, q015, q016, q017, q018;
  126.     private boolean q021, q022, q023, q024, q025, q026, q027, q028;
  127.     private boolean q031, q032, q033, q034, q035, q036, q037, q038;
  128.     private boolean q041, q042, q043, q044, q045, q046, q047, q048;
  129.     private boolean q051, q052, q053, q054, q055, q056, q057, q058;
  130.     private boolean q061, q062, q063, q064, q065, q066, q067, q068;
  131.     private boolean q071, q072, q073, q074, q075, q076, q077, q078;
  132.     private boolean q101, q102, q103, q104, q105, q106, q107, q108;
  133.     private boolean q111, q112, q113, q114, q115, q116, q117, q118;
  134.     private boolean q121, q122, q123, q124, q125, q126, q127, q128;
  135.     private boolean q131, q132, q133, q134, q135, q136, q137, q138;
  136.     private boolean q141, q142, q143, q144, q145, q146, q147, q148;
  137.     private boolean q151, q152, q153, q154, q155, q156, q157, q158;
  138.     private boolean q161, q162, q163, q164, q165, q166, q167, q168;
  139.     private boolean q171, q172, q173, q174, q175, q176, q177, q178;
  140. }
  141.  
  142. abstract class BlackholeL4 extends BlackholeL3 {
  143.     private int markerEnd;
  144. }
  145.  
  146. /**
  147.  * Black Hole.
  148.  *
  149.  * <p>Black hole "consumes" the values, conceiving no information to JIT whether the
  150.  * value is actually used afterwards. This can save from the dead-code elimination
  151.  * of the computations resulting in the given values.</p>
  152.  */
  153. public final class Blackhole extends BlackholeL4 {
  154.  
  155.     /**
  156.      * IMPLEMENTATION NOTES:
  157.      *
  158.      * The major things to dodge with Blackholes are:
  159.      *
  160.      *   a) Dead-code elimination: the arguments should be used on every call,
  161.      *      so that compilers are unable to fold them into constants or
  162.      *      otherwise optimize them away along with the computations resulted
  163.      *      in them.
  164.      *
  165.      *   b) False sharing: reading/writing the state may disturb the cache
  166.      *      lines. We need to isolate the critical fields to achieve tolerable
  167.      *      performance.
  168.      *
  169.      *   c) Write wall: we need to ease off on writes as much as possible,
  170.      *      since it disturbs the caches, pollutes the write buffers, etc.
  171.      *      This may very well result in hitting the memory wall prematurely.
  172.      *      Reading memory is fine as long as it is cacheable.
  173.      *
  174.      * To achieve these goals, we are piggybacking on several things in the
  175.      * compilers:
  176.      *
  177.      *  1. Superclass fields are not reordered with the subclass' fields.
  178.      *     No practical VM that we are aware of is doing this. It is unpractical,
  179.      *     because if the superclass fields are at the different offsets in two
  180.      *     subclasses, the VMs would then need to do the polymorphic access for
  181.      *     the superclass fields.
  182.      *
  183.      *  2. Compilers are unable to predict the value of the volatile read.
  184.      *     While the compilers can speculatively optimize until the relevant
  185.      *     volatile write happens, it is unlikely to be practical to be able to stop
  186.      *     all the threads the instant that write had happened.
  187.      *
  188.      *  3. Compilers' code motion usually respects data dependencies, and they would
  189.      *     not normally schedule the consumer block before the code that generated
  190.      *     a value.
  191.      *
  192.      *  4. Compilers are not doing aggressive inter-procedural optimizations,
  193.      *     and/or break them when the target method is forced to be non-inlineable.
  194.      *
  195.      * Observation (1) allows us to "squash" the protected fields in the inheritance
  196.      * hierarchy so that the padding in super- and sub-class are laid out right before
  197.      * and right after the protected fields. We also pad with booleans so that dense
  198.      * layout in superclass does not have the gap where runtime can fit the subclass field.
  199.      *
  200.      * Observation (2) allows us to compare the incoming primitive values against
  201.      * the relevant volatile-guarded fields. The values in those guarded fields are
  202.      * never changing, but due to (2), we should re-read the values again and again.
  203.      * Also, observation (3) requires us to to use the incoming value in the computation,
  204.      * thus anchoring the Blackhole code after the generating expression.
  205.      *
  206.      * Primitives are a bit hard, because we can't predict what values we
  207.      * will be fed. But we can compare the incoming value with *two* distinct
  208.      * known values, and both checks will never be true at the same time.
  209.      * Note the bitwise AND in all the predicates: both to spare additional
  210.      * branch, and also to provide more uniformity in the performance. Where possible,
  211.      * we are using a specific code shape to force generating a single branch, e.g.
  212.      * making compiler to evaluate the predicate in full, not speculate on components.
  213.      *
  214.      * Objects should normally abide the Java's referential semantics, i.e. the
  215.      * incoming objects will never be equal to the distinct object we have, and
  216.      * volatile read will break the speculation about what we compare with.
  217.      * However, smart compilers may deduce that the distinct non-escaped object
  218.      * on the other side is not equal to anything we have, and fold the comparison
  219.      * to "false". We do inlined thread-local random to get those objects escaped
  220.      * with infinitesimal probability. Then again, smart compilers may skip from
  221.      * generating the slow path, and apply the previous logic to constant-fold
  222.      * the condition to "false". We are warming up the slow-path in the beginning
  223.      * to evade that effect. Some caution needs to be exercised not to retain the
  224.      * captured objects forever: this is normally achieved by calling evaporate()
  225.      * regularly, but we also additionally protect with retaining the object on
  226.      * weak reference (contrary to phantom-ref, publishing object still has to
  227.      * happen, because reference users might need to discover the object).
  228.      *
  229.      * Observation (4) provides us with an opportunity to create a safety net in case
  230.      * either (1), (2) or (3) fails. This is why Blackhole methods are prohibited from
  231.      * being inlined. This is treated specially in JMH runner code (see CompilerHints).
  232.      * Conversely, both (1), (2), (3) are covering in case (4) fails. This provides
  233.      * a defense in depth for Blackhole implementation, where a point failure is a
  234.      * performance nuisance, but not a correctness catastrophe.
  235.      *
  236.      * In all cases, consumes do the volatile reads to have a consistent memory
  237.      * semantics across all consume methods.
  238.      *
  239.      * An utmost caution should be exercised when changing the Blackhole code. Nominally,
  240.      * the JMH Core Benchmarks should be run on multiple platforms (and their generated code
  241.      * examined) to check the effects are still in place, and the overheads are not prohibitive.
  242.      * Or, in other words:
  243.      *
  244.      * IMPLEMENTING AN EFFICIENT / CORRECT BLACKHOLE IS NOT A SIMPLE TASK YOU CAN
  245.      * DO OVERNIGHT. IT REQUIRES A SIGNIFICANT JVM/COMPILER/PERFORMANCE EXPERTISE,
  246.      * AND LOTS OF TIME OVER THAT. ADJUST YOUR PLANS ACCORDINGLY.
  247.      */
  248.  
  249.     static {
  250.         Utils.check(Blackhole.class, "b1", "b2");
  251.         Utils.check(Blackhole.class, "bool1", "bool2");
  252.         Utils.check(Blackhole.class, "c1", "c2");
  253.         Utils.check(Blackhole.class, "s1", "s2");
  254.         Utils.check(Blackhole.class, "i1", "i2");
  255.         Utils.check(Blackhole.class, "l1", "l2");
  256.         Utils.check(Blackhole.class, "f1", "f2");
  257.         Utils.check(Blackhole.class, "d1", "d2");
  258.         Utils.check(Blackhole.class, "obj1");
  259.     }
  260.  
  261.     public Blackhole(String challengeResponse) {
  262.         /*
  263.          * Prevent instantiation by user code. Without additional countermeasures
  264.          * to properly escape Blackhole, its magic is not working. The instances
  265.          * of Blackholes which are injected into benchmark methods are treated by JMH,
  266.          * and users are supposed to only use the injected instances.
  267.          *
  268.          * It only *seems* simple to make the constructor non-public, but then
  269.          * there is a lot of infrastructure code which assumes @State has a default
  270.          * constructor. One might suggest doing the internal factory method to instantiate,
  271.          * but that does not help when extending the Blackhole. There is a *messy* way to
  272.          * special-case most of these problems within the JMH code, but it does not seem
  273.          * to worth the effort.
  274.          *
  275.          * Therefore, we choose to fail at runtime. It will only affect the users who thought
  276.          * "new Blackhole()" is a good idea, and these users are rare. If you are reading this
  277.          * comment, you might be one of those users. Stay cool! Don't instantiate Blackholes
  278.          * directly though.
  279.          */
  280.  
  281.         if (!challengeResponse.equals("Today's password is swordfish. I understand instantiating Blackholes directly is dangerous.")) {
  282.             throw new IllegalStateException("Blackholes should not be instantiated directly.");
  283.         }
  284.     }
  285.  
  286.     /**
  287.      * Make any consumed data begone.
  288.      *
  289.      * WARNING: This method should only be called by the infrastructure code, in clearly understood cases.
  290.      * Even though it is public, it is not supposed to be called by users.
  291.      */
  292.     public void evaporate(String challengeResponse) {
  293.         if (!challengeResponse.equals("Yes, I am Stephen Hawking, and know a thing or two about black holes.")) {
  294.             throw new IllegalStateException("Who are you?");
  295.         }
  296.         obj1 = null;
  297.     }
  298.  
  299.     /**
  300.      * Consume object. This call provides a side effect preventing JIT to eliminate dependent computations.
  301.      *
  302.      * @param obj object to consume.
  303.      */
  304.     public final void consume(Object obj) {
  305.         int tlrMask = this.tlrMask; // volatile read
  306.         int tlr = (this.tlr = (this.tlr * 1664525 + 1013904223));
  307.         if ((tlr & tlrMask) == 0) {
  308.             // SHOULD ALMOST NEVER HAPPEN IN MEASUREMENT
  309.             this.obj1 = new WeakReference<>(obj);
  310.             this.tlrMask = (tlrMask << 1) + 1;
  311.         }
  312.     }
  313.  
  314.     /**
  315.      * Consume object. This call provides a side effect preventing JIT to eliminate dependent computations.
  316.      *
  317.      * @param b object to consume.
  318.      */
  319.     public final void consume(byte b) {
  320.         byte b1 = this.b1; // volatile read
  321.         byte b2 = this.b2;
  322.         if ((b ^ b1) == (b ^ b2)) {
  323.             // SHOULD NEVER HAPPEN
  324.             nullBait.b1 = b; // implicit null pointer exception
  325.         }
  326.     }
  327.  
  328.     /**
  329.      * Consume object. This call provides a side effect preventing JIT to eliminate dependent computations.
  330.      *
  331.      * @param bool object to consume.
  332.      */
  333.     public final void consume(boolean bool) {
  334.         boolean bool1 = this.bool1; // volatile read
  335.         boolean bool2 = this.bool2;
  336.         if ((bool ^ bool1) == (bool ^ bool2)) {
  337.             // SHOULD NEVER HAPPEN
  338.             nullBait.bool1 = bool; // implicit null pointer exception
  339.         }
  340.     }
  341.  
  342.     /**
  343.      * Consume object. This call provides a side effect preventing JIT to eliminate dependent computations.
  344.      *
  345.      * @param c object to consume.
  346.      */
  347.     public final void consume(char c) {
  348.         char c1 = this.c1; // volatile read
  349.         char c2 = this.c2;
  350.         if ((c ^ c1) == (c ^ c2)) {
  351.             // SHOULD NEVER HAPPEN
  352.             nullBait.c1 = c; // implicit null pointer exception
  353.         }
  354.     }
  355.  
  356.     /**
  357.      * Consume object. This call provides a side effect preventing JIT to eliminate dependent computations.
  358.      *
  359.      * @param s object to consume.
  360.      */
  361.     public final void consume(short s) {
  362.         short s1 = this.s1; // volatile read
  363.         short s2 = this.s2;
  364.         if ((s ^ s1) == (s ^ s2)) {
  365.             // SHOULD NEVER HAPPEN
  366.             nullBait.s1 = s; // implicit null pointer exception
  367.         }
  368.     }
  369.  
  370.     /**
  371.      * Consume object. This call provides a side effect preventing JIT to eliminate dependent computations.
  372.      *
  373.      * @param i object to consume.
  374.      */
  375.     public final void consume(int i) {
  376.         int i1 = this.i1; // volatile read
  377.         int i2 = this.i2;
  378.         if ((i ^ i1) == (i ^ i2)) {
  379.             // SHOULD NEVER HAPPEN
  380.             nullBait.i1 = i; // implicit null pointer exception
  381.         }
  382.     }
  383.  
  384.     /**
  385.      * Consume object. This call provides a side effect preventing JIT to eliminate dependent computations.
  386.      *
  387.      * @param l object to consume.
  388.      */
  389.     public final void consume(long l) {
  390.         long l1 = this.l1; // volatile read
  391.         long l2 = this.l2;
  392.         if ((l ^ l1) == (l ^ l2)) {
  393.             // SHOULD NEVER HAPPEN
  394.             nullBait.l1 = l; // implicit null pointer exception
  395.         }
  396.     }
  397.  
  398.     /**
  399.      * Consume object. This call provides a side effect preventing JIT to eliminate dependent computations.
  400.      *
  401.      * @param f object to consume.
  402.      */
  403.     public final void consume(float f) {
  404.         float f1 = this.f1; // volatile read
  405.         float f2 = this.f2;
  406.         if (f == f1 & f == f2) {
  407.             // SHOULD NEVER HAPPEN
  408.             nullBait.f1 = f; // implicit null pointer exception
  409.         }
  410.     }
  411.  
  412.     /**
  413.      * Consume object. This call provides a side effect preventing JIT to eliminate dependent computations.
  414.      *
  415.      * @param d object to consume.
  416.      */
  417.     public final void consume(double d) {
  418.         double d1 = this.d1; // volatile read
  419.         double d2 = this.d2;
  420.         if (d == d1 & d == d2) {
  421.             // SHOULD NEVER HAPPEN
  422.             nullBait.d1 = d; // implicit null pointer exception
  423.         }
  424.     }
  425.  
  426.     private static volatile long consumedCPU = System.nanoTime();
  427.  
  428.     /**
  429.      * Consume some amount of time tokens.
  430.      *
  431.      * This method does the CPU work almost linear to the number of tokens.
  432.      * The token cost may vary from system to system, and may change in
  433.      * future. (Translation: it is as reliable as we can get, but not absolutely
  434.      * reliable).
  435.      *
  436.      * See JMH samples for the complete demo, and core benchmarks for
  437.      * the performance assessments.
  438.      *
  439.      * @param tokens CPU tokens to consume
  440.      */
  441.     public static void consumeCPU(long tokens) {
  442.         // If you are looking at this code trying to understand
  443.         // the non-linearity on low token counts, know this:
  444.         // we are pretty sure the generated assembly for almost all
  445.         // cases is the same, and the only explanation for the
  446.         // performance difference is hardware-specific effects.
  447.         // Be wary to waste more time on this. If you know more
  448.         // advanced and clever option to implement consumeCPU, let us
  449.         // know.
  450.  
  451.         // Randomize start so that JIT could not memoize; this helps
  452.         // to break the loop optimizations if the method is called
  453.         // from the external loop body.
  454.         long t = consumedCPU;
  455.  
  456.         // One of the rare cases when counting backwards is meaningful:
  457.         // for the forward loop HotSpot/x86 generates "cmp" with immediate
  458.         // on the hot path, while the backward loop tests against zero
  459.         // with "test". The immediate can have different lengths, which
  460.         // attribute to different machine code for different cases. We
  461.         // counter that with always counting backwards. We also mix the
  462.         // induction variable in, so that reversing the loop is the
  463.         // non-trivial optimization.
  464.         for (long i = tokens; i > 0; i--) {
  465.             t += (t * 0x5DEECE66DL + 0xBL + i) & (0xFFFFFFFFFFFFL);
  466.         }
  467.  
  468.         // Need to guarantee side-effect on the result, but can't afford
  469.         // contention; make sure we update the shared state only in the
  470.         // unlikely case, so not to do the furious writes, but still
  471.         // dodge DCE.
  472.         if (t == 42) {
  473.             consumedCPU += t;
  474.         }
  475.     }
  476.  
  477. }
RAW Paste Data
Top