Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package org.acme.logfilter;
- import java.io.UnsupportedEncodingException;
- /**
- * Comparison of implementations removing multiple zero bytes (four or more)
- * from an input array.
- *
- * removeDuplicates1() was proposed by another answer in this question:
- *
- * Related to https://stackoverflow.com/questions/1387027/j
- */
- public class StackoverflowTestcase {
- // Implementation from https://stackoverflow.com/a/1387184/1078886
- private static byte[] removeDuplicates1(byte[] inBytes) throws UnsupportedEncodingException {
- String s0 = new String(inBytes, "ISO-8859-1");
- String s1 = s0.replaceAll("\\x00{4,}", "");
- return s1.getBytes("ISO-8859-1");
- }
- /**
- * Remove four or more zero byte sequences from the input array.
- *
- * @param inBytes the input array
- * @return a new array with four or more zero bytes removed form the input array
- */
- private static byte[] removeDuplicates2(byte[] inBytes) {
- int size = inBytes.length;
- // Use an array with the same size in the first place
- byte[] newBytes = new byte[size];
- byte value;
- int newIdx = 0;
- int zeroCounter = 0;
- for (int i = 0; i < size; i++) {
- value = inBytes[i];
- if (value == 0) {
- zeroCounter++;
- } else {
- if (zeroCounter >= 4) {
- // Rewind output buffer index
- newIdx -= zeroCounter;
- }
- zeroCounter = 0;
- }
- newBytes[newIdx] = value;
- newIdx++;
- }
- if (zeroCounter >= 4) {
- // Rewind output buffer index for four zero bytes at the end too
- newIdx -= zeroCounter;
- }
- // Copy data into an array that has the correct length
- byte[] finalOut = new byte[newIdx];
- System.arraycopy(newBytes, 0, finalOut, 0, newIdx);
- return finalOut;
- }
- public static void test1(byte[] inBytes) throws UnsupportedEncodingException {
- for (int i = 0; i < ITERATIONS; i++) {
- removeDuplicates1(inBytes);
- }
- }
- public static void test2(byte[] inBytes) {
- for (int i = 0; i < ITERATIONS; i++) {
- removeDuplicates2(inBytes);
- }
- }
- // Test multiple times to average out fluctuations
- private static final int ITERATIONS = 10000;
- public static void main(String[] args) throws UnsupportedEncodingException {
- byte[] bytes;
- // Prepare the test data with some zero byte patterns added
- // Buffer size
- int size = 32768 * 8;
- // Enable bad case: 0101... vs. 1111111...000
- boolean alternate = true;
- // Add four or more zeros to the array (actually necessary for this test case)
- boolean addZeroSequences = true;
- // Increase the zero sequence length by one with each addition (add variation)
- boolean increaseZeroSequences = false;
- // Controls how frequent zero sequences are spread
- // (The lower the more often zero sequences appear)
- int zeroSequenceCount = 8192; // 32768;
- // Increased with each occurrence
- int zeroSequenceLen = 4;
- bytes = new byte[size];
- int zeroSeqCount;
- for (int i = 0; i < size; i++) {
- if (addZeroSequences && (i % zeroSequenceCount) == 0) {
- zeroSeqCount = zeroSequenceLen;
- // Increase length with every sequence
- if (increaseZeroSequences) {
- zeroSequenceLen++;
- }
- while (zeroSeqCount > 0 && i < size) {
- bytes[i] = 0;
- zeroSeqCount--;
- i++;
- }
- } else {
- if (alternate) {
- bytes[i] = ((i % 2 == 0) ? (byte) 1 : (byte) 0);
- } else {
- bytes[i] = 1;
- }
- }
- }
- long t0;
- float d1, d2, d3;
- t0 = System.currentTimeMillis();
- test1(bytes);
- d1 = (System.currentTimeMillis() - t0);
- System.out.println(d1 + "ms");
- t0 = System.currentTimeMillis();
- test2(bytes);
- d2 = (System.currentTimeMillis() - t0);
- System.out.println(d2 + "ms");
- System.out.println(String.format("Algorithm #1 is %.02fx slower than #2", (d1 / d2)));
- }
- }
Add Comment
Please, Sign In to add comment