Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- using System;
- namespace Lucene.Net.Util
- {
- /// <summary>An "open" BitSet implementation that allows direct access to the array of words
- /// storing the bits.
- /// <p/>
- /// Unlike java.util.bitset, the fact that bits are packed into an array of longs
- /// is part of the interface. This allows efficient implementation of other algorithms
- /// by someone other than the author. It also allows one to efficiently implement
- /// alternate serialization or interchange formats.
- /// <p/>
- /// <code>OpenBitSet</code> is faster than <code>java.util.BitSet</code> in most operations
- /// and *much* faster at calculating cardinality of sets and results of set operations.
- /// It can also handle sets of larger cardinality (up to 64 * 2**32-1)
- /// <p/>
- /// The goals of <code>OpenBitSet</code> are the fastest implementation possible, and
- /// maximum code reuse. Extra safety and encapsulation
- /// may always be built on top, but if that's built in, the cost can never be removed (and
- /// hence people re-implement their own version in order to get better performance).
- /// If you want a "safe", totally encapsulated (and slower and limited) BitSet
- /// class, use <code>java.util.BitSet</code>.
- /// <p/>
- /// <h3>Performance Results</h3>
- ///
- /// Test system: Pentium 4, Sun Java 1.5_06 -server -Xbatch -Xmx64M
- /// <br/>BitSet size = 1,000,000
- /// <br/>Results are java.util.BitSet time divided by OpenBitSet time.
- /// <table border="1">
- /// <tr>
- /// <th></th> <th>cardinality</th> <th>intersect_count</th> <th>union</th> <th>nextSetBit</th> <th>get</th> <th>iterator</th>
- /// </tr>
- /// <tr>
- /// <th>50% full</th> <td>3.36</td> <td>3.96</td> <td>1.44</td> <td>1.46</td> <td>1.99</td> <td>1.58</td>
- /// </tr>
- /// <tr>
- /// <th>1% full</th> <td>3.31</td> <td>3.90</td> <td> </td> <td>1.04</td> <td> </td> <td>0.99</td>
- /// </tr>
- /// </table>
- /// <br/>
- /// Test system: AMD Opteron, 64 bit linux, Sun Java 1.5_06 -server -Xbatch -Xmx64M
- /// <br/>BitSet size = 1,000,000
- /// <br/>Results are java.util.BitSet time divided by OpenBitSet time.
- /// <table border="1">
- /// <tr>
- /// <th></th> <th>cardinality</th> <th>intersect_count</th> <th>union</th> <th>nextSetBit</th> <th>get</th> <th>iterator</th>
- /// </tr>
- /// <tr>
- /// <th>50% full</th> <td>2.50</td> <td>3.50</td> <td>1.00</td> <td>1.03</td> <td>1.12</td> <td>1.25</td>
- /// </tr>
- /// <tr>
- /// <th>1% full</th> <td>2.51</td> <td>3.49</td> <td> </td> <td>1.00</td> <td> </td> <td>1.02</td>
- /// </tr>
- /// </table>
- /// </summary>
- /// <version> $Id$
- /// </version>
- [Serializable]
- public class OpenBitSet : System.ICloneable
- {
- protected internal long[] bits;
- protected internal int wlen; // number of words (elements) used in the array
- /// <summary>Constructs an OpenBitSet large enough to hold numBits.
- ///
- /// </summary>
- /// <param name="numBits">
- /// </param>
- public OpenBitSet(long numBits)
- {
- bits = new long[Bits2words(numBits)];
- wlen = bits.Length;
- }
- public OpenBitSet()
- : this(64)
- {
- }
- /// <summary>Constructs an OpenBitSet from an existing long[].
- /// <br/>
- /// The first 64 bits are in long[0],
- /// with bit index 0 at the least significant bit, and bit index 63 at the most significant.
- /// Given a bit index,
- /// the word containing it is long[index/64], and it is at bit number index%64 within that word.
- /// <p/>
- /// numWords are the number of elements in the array that contain
- /// set bits (non-zero longs).
- /// numWords should be <= bits.length, and
- /// any existing words in the array at position >= numWords should be zero.
- ///
- /// </summary>
- public OpenBitSet(long[] bits, int numWords)
- {
- this.bits = bits;
- this.wlen = numWords;
- }
- /// <summary>This DocIdSet implementation is cacheable. </summary>
- public bool IsCacheable()
- {
- return true;
- }
- /// <summary>Returns the current capacity in bits (1 greater than the index of the last bit) </summary>
- public virtual long Capacity()
- {
- return bits.Length << 6;
- }
- /// <summary> Returns the current capacity of this set. Included for
- /// compatibility. This is *not* equal to {@link #cardinality}
- /// </summary>
- public virtual long Size()
- {
- return Capacity();
- }
- /// <summary>Returns true if there are no set bits </summary>
- public virtual bool IsEmpty()
- {
- return Cardinality() == 0;
- }
- /// <summary>Expert: returns the long[] storing the bits </summary>
- public virtual long[] GetBits()
- {
- return bits;
- }
- /// <summary>Expert: sets a new long[] to use as the bit storage </summary>
- public virtual void SetBits(long[] bits)
- {
- this.bits = bits;
- }
- /// <summary>Expert: gets the number of longs in the array that are in use </summary>
- public virtual int GetNumWords()
- {
- return wlen;
- }
- /// <summary>Expert: sets the number of longs in the array that are in use </summary>
- public virtual void SetNumWords(int nWords)
- {
- this.wlen = nWords;
- }
- /// <summary>Returns true or false for the specified bit index. </summary>
- public virtual bool Get(int index)
- {
- int i = index >> 6; // div 64
- // signed shift will keep a negative index and force an
- // array-index-out-of-bounds-exception, removing the need for an explicit check.
- if (i >= bits.Length)
- return false;
- int bit = index & 0x3f; // mod 64
- long bitmask = 1L << bit;
- return (bits[i] & bitmask) != 0;
- }
- /// <summary>Returns true or false for the specified bit index.
- /// The index should be less than the OpenBitSet size
- /// </summary>
- public virtual bool FastGet(int index)
- {
- int i = index >> 6; // div 64
- // signed shift will keep a negative index and force an
- // array-index-out-of-bounds-exception, removing the need for an explicit check.
- int bit = index & 0x3f; // mod 64
- long bitmask = 1L << bit;
- return (bits[i] & bitmask) != 0;
- }
- /// <summary>Returns true or false for the specified bit index</summary>
- public virtual bool Get(long index)
- {
- int i = (int)(index >> 6); // div 64
- if (i >= bits.Length)
- return false;
- int bit = (int)index & 0x3f; // mod 64
- long bitmask = 1L << bit;
- return (bits[i] & bitmask) != 0;
- }
- /// <summary>Returns true or false for the specified bit index.
- /// The index should be less than the OpenBitSet size.
- /// </summary>
- public virtual bool FastGet(long index)
- {
- int i = (int)(index >> 6); // div 64
- int bit = (int)index & 0x3f; // mod 64
- long bitmask = 1L << bit;
- return (bits[i] & bitmask) != 0;
- }
- /*
- // alternate implementation of get()
- public boolean get1(int index) {
- int i = index >> 6; // div 64
- int bit = index & 0x3f; // mod 64
- return ((bits[i]>>>bit) & 0x01) != 0;
- // this does a long shift and a bittest (on x86) vs
- // a long shift, and a long AND, (the test for zero is prob a no-op)
- // testing on a P4 indicates this is slower than (bits[i] & bitmask) != 0;
- }
- */
- /// <summary>returns 1 if the bit is set, 0 if not.
- /// The index should be less than the OpenBitSet size
- /// </summary>
- public virtual int GetBit(int index)
- {
- int i = index >> 6; // div 64
- int bit = index & 0x3f; // mod 64
- return ((int)((ulong)(bits[i]) >> bit)) & 0x01;
- }
- /*
- public boolean get2(int index) {
- int word = index >> 6; // div 64
- int bit = index & 0x0000003f; // mod 64
- return (bits[word] << bit) < 0; // hmmm, this would work if bit order were reversed
- // we could right shift and check for parity bit, if it was available to us.
- }
- */
- /// <summary>sets a bit, expanding the set size if necessary </summary>
- public virtual void Set(long index)
- {
- int wordNum = ExpandingWordNum(index);
- int bit = (int)index & 0x3f;
- long bitmask = 1L << bit;
- bits[wordNum] |= bitmask;
- }
- /// <summary>Sets the bit at the specified index.
- /// The index should be less than the OpenBitSet size.
- /// </summary>
- public virtual void FastSet(int index)
- {
- int wordNum = index >> 6; // div 64
- int bit = index & 0x3f; // mod 64
- long bitmask = 1L << bit;
- bits[wordNum] |= bitmask;
- }
- /// <summary>Sets the bit at the specified index.
- /// The index should be less than the OpenBitSet size.
- /// </summary>
- public virtual void FastSet(long index)
- {
- int wordNum = (int)(index >> 6);
- int bit = (int)index & 0x3f;
- long bitmask = 1L << bit;
- bits[wordNum] |= bitmask;
- }
- /// <summary>Sets a range of bits, expanding the set size if necessary
- ///
- /// </summary>
- /// <param name="startIndex">lower index
- /// </param>
- /// <param name="endIndex">one-past the last bit to set
- /// </param>
- public virtual void Set(long startIndex, long endIndex)
- {
- if (endIndex <= startIndex)
- return;
- int startWord = (int)(startIndex >> 6);
- // since endIndex is one past the end, this is index of the last
- // word to be changed.
- int endWord = ExpandingWordNum(endIndex - 1);
- long startmask = -1L << (int)startIndex;
- long endmask = (long)(0xffffffffffffffffUL >> (int)-endIndex); // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
- if (startWord == endWord)
- {
- bits[startWord] |= (startmask & endmask);
- return;
- }
- bits[startWord] |= startmask;
- for (int i = startWord + 1; i < endWord; i++)
- bits[i] = -1L;
- bits[endWord] |= endmask;
- }
- protected internal virtual int ExpandingWordNum(long index)
- {
- int wordNum = (int)(index >> 6);
- if (wordNum >= wlen)
- {
- EnsureCapacity(index + 1);
- wlen = wordNum + 1;
- }
- return wordNum;
- }
- /// <summary>clears a bit.
- /// The index should be less than the OpenBitSet size.
- /// </summary>
- public virtual void FastClear(int index)
- {
- int wordNum = index >> 6;
- int bit = index & 0x03f;
- long bitmask = 1L << bit;
- bits[wordNum] &= ~bitmask;
- // hmmm, it takes one more instruction to clear than it does to set... any
- // way to work around this? If there were only 63 bits per word, we could
- // use a right shift of 10111111...111 in binary to position the 0 in the
- // correct place (using sign extension).
- // Could also use Long.rotateRight() or rotateLeft() *if* they were converted
- // by the JVM into a native instruction.
- // bits[word] &= Long.rotateLeft(0xfffffffe,bit);
- }
- /// <summary>clears a bit.
- /// The index should be less than the OpenBitSet size.
- /// </summary>
- public virtual void FastClear(long index)
- {
- int wordNum = (int)(index >> 6); // div 64
- int bit = (int)index & 0x3f; // mod 64
- long bitmask = 1L << bit;
- bits[wordNum] &= ~bitmask;
- }
- /// <summary>clears a bit, allowing access beyond the current set size without changing the size.</summary>
- public virtual void Clear(long index)
- {
- int wordNum = (int)(index >> 6); // div 64
- if (wordNum >= wlen)
- return;
- int bit = (int)index & 0x3f; // mod 64
- long bitmask = 1L << bit;
- bits[wordNum] &= ~bitmask;
- }
- /// <summary>Clears a range of bits. Clearing past the end does not change the size of the set.
- ///
- /// </summary>
- /// <param name="startIndex">lower index
- /// </param>
- /// <param name="endIndex">one-past the last bit to clear
- /// </param>
- public virtual void Clear(int startIndex, int endIndex)
- {
- if (endIndex <= startIndex)
- return;
- int startWord = (startIndex >> 6);
- if (startWord >= wlen)
- return;
- // since endIndex is one past the end, this is index of the last
- // word to be changed.
- int endWord = ((endIndex - 1) >> 6);
- long startmask = -1L << startIndex;
- long endmask = (long)(0xffffffffffffffffUL >> -endIndex); // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
- // invert masks since we are clearing
- startmask = ~startmask;
- endmask = ~endmask;
- if (startWord == endWord)
- {
- bits[startWord] &= (startmask | endmask);
- return;
- }
- bits[startWord] &= startmask;
- int middle = System.Math.Min(wlen, endWord);
- for (int i = startWord + 1; i < middle; i++)
- bits[i] = 0L;
- if (endWord < wlen)
- {
- bits[endWord] &= endmask;
- }
- }
- /// <summary>Clears a range of bits. Clearing past the end does not change the size of the set.
- ///
- /// </summary>
- /// <param name="startIndex">lower index
- /// </param>
- /// <param name="endIndex">one-past the last bit to clear
- /// </param>
- public virtual void Clear(long startIndex, long endIndex)
- {
- if (endIndex <= startIndex)
- return;
- int startWord = (int)(startIndex >> 6);
- if (startWord >= wlen)
- return;
- // since endIndex is one past the end, this is index of the last
- // word to be changed.
- int endWord = (int)((endIndex - 1) >> 6);
- long startmask = -1L << (int)startIndex;
- long endmask = (long)(0xffffffffffffffffUL >> (int)-endIndex); // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
- // invert masks since we are clearing
- startmask = ~startmask;
- endmask = ~endmask;
- if (startWord == endWord)
- {
- bits[startWord] &= (startmask | endmask);
- return;
- }
- bits[startWord] &= startmask;
- int middle = System.Math.Min(wlen, endWord);
- for (int i = startWord + 1; i < middle; i++)
- bits[i] = 0L;
- if (endWord < wlen)
- {
- bits[endWord] &= endmask;
- }
- }
- /// <summary>Sets a bit and returns the previous value.
- /// The index should be less than the OpenBitSet size.
- /// </summary>
- public virtual bool GetAndSet(int index)
- {
- int wordNum = index >> 6; // div 64
- int bit = index & 0x3f; // mod 64
- long bitmask = 1L << bit;
- bool val = (bits[wordNum] & bitmask) != 0;
- bits[wordNum] |= bitmask;
- return val;
- }
- /// <summary>Sets a bit and returns the previous value.
- /// The index should be less than the OpenBitSet size.
- /// </summary>
- public virtual bool GetAndSet(long index)
- {
- int wordNum = (int)(index >> 6); // div 64
- int bit = (int)index & 0x3f; // mod 64
- long bitmask = 1L << bit;
- bool val = (bits[wordNum] & bitmask) != 0;
- bits[wordNum] |= bitmask;
- return val;
- }
- /// <summary>flips a bit.
- /// The index should be less than the OpenBitSet size.
- /// </summary>
- public virtual void FastFlip(int index)
- {
- int wordNum = index >> 6; // div 64
- int bit = index & 0x3f; // mod 64
- long bitmask = 1L << bit;
- bits[wordNum] ^= bitmask;
- }
- /// <summary>flips a bit.
- /// The index should be less than the OpenBitSet size.
- /// </summary>
- public virtual void FastFlip(long index)
- {
- int wordNum = (int)(index >> 6); // div 64
- int bit = (int)index & 0x3f; // mod 64
- long bitmask = 1L << bit;
- bits[wordNum] ^= bitmask;
- }
- /// <summary>flips a bit, expanding the set size if necessary </summary>
- public virtual void Flip(long index)
- {
- int wordNum = ExpandingWordNum(index);
- int bit = (int)index & 0x3f; // mod 64
- long bitmask = 1L << bit;
- bits[wordNum] ^= bitmask;
- }
- /// <summary>flips a bit and returns the resulting bit value.
- /// The index should be less than the OpenBitSet size.
- /// </summary>
- public virtual bool FlipAndGet(int index)
- {
- int wordNum = index >> 6; // div 64
- int bit = index & 0x3f; // mod 64
- long bitmask = 1L << bit;
- bits[wordNum] ^= bitmask;
- return (bits[wordNum] & bitmask) != 0;
- }
- /// <summary>flips a bit and returns the resulting bit value.
- /// The index should be less than the OpenBitSet size.
- /// </summary>
- public virtual bool FlipAndGet(long index)
- {
- int wordNum = (int)(index >> 6); // div 64
- int bit = (int)index & 0x3f; // mod 64
- long bitmask = 1L << bit;
- bits[wordNum] ^= bitmask;
- return (bits[wordNum] & bitmask) != 0;
- }
- /// <summary>Flips a range of bits, expanding the set size if necessary
- ///
- /// </summary>
- /// <param name="startIndex">lower index
- /// </param>
- /// <param name="endIndex">one-past the last bit to flip
- /// </param>
- public virtual void Flip(long startIndex, long endIndex)
- {
- if (endIndex <= startIndex)
- return;
- int startWord = (int)(startIndex >> 6);
- // since endIndex is one past the end, this is index of the last
- // word to be changed.
- int endWord = ExpandingWordNum(endIndex - 1);
- /*** Grrr, java shifting wraps around so -1L>>>64 == -1
- * for that reason, make sure not to use endmask if the bits to flip will
- * be zero in the last word (redefine endWord to be the last changed...)
- long startmask = -1L << (startIndex & 0x3f); // example: 11111...111000
- long endmask = -1L >>> (64-(endIndex & 0x3f)); // example: 00111...111111
- ***/
- long startmask = -1L << (int)startIndex;
- long endmask = (long)(0xffffffffffffffffUL >> (int)-endIndex); // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
- if (startWord == endWord)
- {
- bits[startWord] ^= (startmask & endmask);
- return;
- }
- bits[startWord] ^= startmask;
- for (int i = startWord + 1; i < endWord; i++)
- {
- bits[i] = ~bits[i];
- }
- bits[endWord] ^= endmask;
- }
- /*
- public static int pop(long v0, long v1, long v2, long v3) {
- // derived from pop_array by setting last four elems to 0.
- // exchanges one pop() call for 10 elementary operations
- // saving about 7 instructions... is there a better way?
- long twosA=v0 & v1;
- long ones=v0^v1;
- long u2=ones^v2;
- long twosB =(ones&v2)|(u2&v3);
- ones=u2^v3;
- long fours=(twosA&twosB);
- long twos=twosA^twosB;
- return (pop(fours)<<2)
- + (pop(twos)<<1)
- + pop(ones);
- }
- */
- /// <returns> the number of set bits
- /// </returns>
- public virtual long Cardinality()
- {
- return BitUtil.Pop_array(bits, 0, wlen);
- }
- /// <summary>Returns the popcount or cardinality of the intersection of the two sets.
- /// Neither set is modified.
- /// </summary>
- public static long IntersectionCount(OpenBitSet a, OpenBitSet b)
- {
- return BitUtil.Pop_intersect(a.bits, b.bits, 0, System.Math.Min(a.wlen, b.wlen));
- }
- /// <summary>Returns the popcount or cardinality of the union of the two sets.
- /// Neither set is modified.
- /// </summary>
- public static long UnionCount(OpenBitSet a, OpenBitSet b)
- {
- long tot = BitUtil.Pop_union(a.bits, b.bits, 0, System.Math.Min(a.wlen, b.wlen));
- if (a.wlen < b.wlen)
- {
- tot += BitUtil.Pop_array(b.bits, a.wlen, b.wlen - a.wlen);
- }
- else if (a.wlen > b.wlen)
- {
- tot += BitUtil.Pop_array(a.bits, b.wlen, a.wlen - b.wlen);
- }
- return tot;
- }
- /// <summary>Returns the popcount or cardinality of "a and not b"
- /// or "intersection(a, not(b))".
- /// Neither set is modified.
- /// </summary>
- public static long AndNotCount(OpenBitSet a, OpenBitSet b)
- {
- long tot = BitUtil.Pop_andnot(a.bits, b.bits, 0, System.Math.Min(a.wlen, b.wlen));
- if (a.wlen > b.wlen)
- {
- tot += BitUtil.Pop_array(a.bits, b.wlen, a.wlen - b.wlen);
- }
- return tot;
- }
- /// <summary>Returns the popcount or cardinality of the exclusive-or of the two sets.
- /// Neither set is modified.
- /// </summary>
- public static long XorCount(OpenBitSet a, OpenBitSet b)
- {
- long tot = BitUtil.Pop_xor(a.bits, b.bits, 0, System.Math.Min(a.wlen, b.wlen));
- if (a.wlen < b.wlen)
- {
- tot += BitUtil.Pop_array(b.bits, a.wlen, b.wlen - a.wlen);
- }
- else if (a.wlen > b.wlen)
- {
- tot += BitUtil.Pop_array(a.bits, b.wlen, a.wlen - b.wlen);
- }
- return tot;
- }
- /// <summary>Returns the index of the first set bit starting at the index specified.
- /// -1 is returned if there are no more set bits.
- /// </summary>
- public virtual int NextSetBit(int index)
- {
- int i = index >> 6;
- if (i >= wlen)
- return -1;
- int subIndex = index & 0x3f; // index within the word
- long word = bits[i] >> subIndex; // skip all the bits to the right of index
- if (word != 0)
- {
- return (i << 6) + subIndex + BitUtil.Ntz(word);
- }
- while (++i < wlen)
- {
- word = bits[i];
- if (word != 0)
- return (i << 6) + BitUtil.Ntz(word);
- }
- return -1;
- }
- /// <summary>Returns the index of the first set bit starting at the index specified.
- /// -1 is returned if there are no more set bits.
- /// </summary>
- public virtual long NextSetBit(long index)
- {
- int i = (int)(index >> 6);
- if (i >= wlen)
- return -1;
- int subIndex = (int)index & 0x3f; // index within the word
- long word = (long)((ulong)bits[i] >> subIndex); // skip all the bits to the right of index
- if (word != 0)
- {
- return (((long)i) << 6) + (subIndex + BitUtil.Ntz(word));
- }
- while (++i < wlen)
- {
- word = bits[i];
- if (word != 0)
- return (((long)i) << 6) + BitUtil.Ntz(word);
- }
- return -1;
- }
- public virtual System.Object Clone()
- {
- try
- {
- OpenBitSet obs = new OpenBitSet((long[])bits.Clone(), wlen);
- //obs.bits = new long[obs.bits.Length];
- //obs.bits.CopyTo(obs.bits, 0); // hopefully an array clone is as fast(er) than arraycopy
- return obs;
- }
- catch (System.Exception e)
- {
- throw new System.SystemException(e.Message, e);
- }
- }
- /// <summary>this = this AND other </summary>
- public virtual void Intersect(OpenBitSet other)
- {
- int newLen = System.Math.Min(this.wlen, other.wlen);
- long[] thisArr = this.bits;
- long[] otherArr = other.bits;
- // testing against zero can be more efficient
- int pos = newLen;
- while (--pos >= 0)
- {
- thisArr[pos] &= otherArr[pos];
- }
- if (this.wlen > newLen)
- {
- // fill zeros from the new shorter length to the old length
- for (int i = newLen; i < this.wlen; i++)
- bits[i] = 0L;
- }
- this.wlen = newLen;
- }
- /// <summary>this = this OR other </summary>
- public virtual void Union(OpenBitSet other)
- {
- int newLen = System.Math.Max(wlen, other.wlen);
- EnsureCapacityWords(newLen);
- long[] thisArr = this.bits;
- long[] otherArr = other.bits;
- int pos = System.Math.Min(wlen, other.wlen);
- while (--pos >= 0)
- {
- thisArr[pos] |= otherArr[pos];
- }
- if (this.wlen < newLen)
- {
- Array.Copy(otherArr, this.wlen, thisArr, this.wlen, newLen - this.wlen);
- }
- this.wlen = newLen;
- }
- /// <summary>Remove all elements set in other. this = this AND_NOT other </summary>
- public virtual void Remove(OpenBitSet other)
- {
- int idx = System.Math.Min(wlen, other.wlen);
- long[] thisArr = this.bits;
- long[] otherArr = other.bits;
- while (--idx >= 0)
- {
- thisArr[idx] &= ~otherArr[idx];
- }
- }
- /// <summary>this = this XOR other </summary>
- public virtual void Xor(OpenBitSet other)
- {
- int newLen = System.Math.Max(wlen, other.wlen);
- EnsureCapacityWords(newLen);
- long[] thisArr = this.bits;
- long[] otherArr = other.bits;
- int pos = System.Math.Min(wlen, other.wlen);
- while (--pos >= 0)
- {
- thisArr[pos] ^= otherArr[pos];
- }
- if (this.wlen < newLen)
- {
- Array.Copy(otherArr, this.wlen, thisArr, this.wlen, newLen - this.wlen);
- }
- this.wlen = newLen;
- }
- // some BitSet compatability methods
- //** see {@link intersect} */
- public virtual void And(OpenBitSet other)
- {
- Intersect(other);
- }
- //** see {@link union} */
- public virtual void Or(OpenBitSet other)
- {
- Union(other);
- }
- //** see {@link andNot} */
- public virtual void AndNot(OpenBitSet other)
- {
- Remove(other);
- }
- /// <summary>returns true if the sets have any elements in common </summary>
- public virtual bool Intersects(OpenBitSet other)
- {
- int pos = System.Math.Min(this.wlen, other.wlen);
- long[] thisArr = this.bits;
- long[] otherArr = other.bits;
- while (--pos >= 0)
- {
- if ((thisArr[pos] & otherArr[pos]) != 0)
- return true;
- }
- return false;
- }
- /// <summary>Expand the long[] with the size given as a number of words (64 bit longs).
- /// getNumWords() is unchanged by this call.
- /// </summary>
- public virtual void EnsureCapacityWords(int numWords)
- {
- if (bits.Length < numWords)
- {
- bits = ArrayUtil.Grow(bits, numWords);
- }
- }
- /// <summary>Ensure that the long[] is big enough to hold numBits, expanding it if necessary.
- /// getNumWords() is unchanged by this call.
- /// </summary>
- public virtual void EnsureCapacity(long numBits)
- {
- EnsureCapacityWords(Bits2words(numBits));
- }
- /// <summary>Lowers numWords, the number of words in use,
- /// by checking for trailing zero words.
- /// </summary>
- public virtual void TrimTrailingZeros()
- {
- int idx = wlen - 1;
- while (idx >= 0 && bits[idx] == 0)
- idx--;
- wlen = idx + 1;
- }
- /// <summary>returns the number of 64 bit words it would take to hold numBits </summary>
- public static int Bits2words(long numBits)
- {
- return (int)((((numBits - 1) >> 6)) + 1);
- }
- /// <summary>returns true if both sets have the same bits set </summary>
- public override bool Equals(System.Object o)
- {
- if (this == o)
- return true;
- if (!(o is OpenBitSet))
- return false;
- OpenBitSet a;
- OpenBitSet b = (OpenBitSet)o;
- // make a the larger set.
- if (b.wlen > this.wlen)
- {
- a = b; b = this;
- }
- else
- {
- a = this;
- }
- // check for any set bits out of the range of b
- for (int i = a.wlen - 1; i >= b.wlen; i--)
- {
- if (a.bits[i] != 0)
- return false;
- }
- for (int i = b.wlen - 1; i >= 0; i--)
- {
- if (a.bits[i] != b.bits[i])
- return false;
- }
- return true;
- }
- public override int GetHashCode()
- {
- // Start with a zero hash and use a mix that results in zero if the input is zero.
- // This effectively truncates trailing zeros without an explicit check.
- long h = 0;
- for (int i = bits.Length; --i >= 0; )
- {
- h ^= bits[i];
- h = (h << 1) | (Support.URShift(h, 63)); // rotate left
- }
- // fold leftmost bits into right and add a constant to prevent
- // empty sets from returning 0, which is too common.
- return (int)(((h >> 32) ^ h) + 0x98761234);
- }
- }
- // from org.apache.solr.util rev 555343
- /// <summary>A variety of high efficiencly bit twiddling routines.
- ///
- /// </summary>
- /// <version> $Id$
- /// </version>
- public class BitUtil
- {
- /// <summary>Returns the number of bits set in the long </summary>
- public static int Pop(long x)
- {
- /* Hacker's Delight 32 bit pop function:
- * http://www.hackersdelight.org/HDcode/newCode/pop_arrayHS.cc
- *
- int pop(unsigned x) {
- x = x - ((x >> 1) & 0x55555555);
- x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
- x = (x + (x >> 4)) & 0x0F0F0F0F;
- x = x + (x >> 8);
- x = x + (x >> 16);
- return x & 0x0000003F;
- }
- ***/
- // 64 bit java version of the C function from above
- x = x - ((Support.URShift(x, 1)) & 0x5555555555555555L);
- x = (x & 0x3333333333333333L) + ((Support.URShift(x, 2)) & 0x3333333333333333L);
- x = (x + (Support.URShift(x, 4))) & 0x0F0F0F0F0F0F0F0FL;
- x = x + (Support.URShift(x, 8));
- x = x + (Support.URShift(x, 16));
- x = x + (Support.URShift(x, 32));
- return ((int)x) & 0x7F;
- }
- /// <summary> Returns the number of set bits in an array of longs. </summary>
- public static long Pop_array(long[] A, int wordOffset, int numWords)
- {
- /*
- * Robert Harley and David Seal's bit counting algorithm, as documented
- * in the revisions of Hacker's Delight
- * http://www.hackersdelight.org/revisions.pdf
- * http://www.hackersdelight.org/HDcode/newCode/pop_arrayHS.cc
- *
- * This function was adapted to Java, and extended to use 64 bit words.
- * if only we had access to wider registers like SSE from java...
- *
- * This function can be transformed to compute the popcount of other functions
- * on bitsets via something like this:
- * sed 's/A\[\([^]]*\)\]/\(A[\1] \& B[\1]\)/g'
- *
- */
- int n = wordOffset + numWords;
- long tot = 0, tot8 = 0;
- long ones = 0, twos = 0, fours = 0;
- int i;
- for (i = wordOffset; i <= n - 8; i += 8)
- {
- /*** C macro from Hacker's Delight
- #define CSA(h,l, a,b,c) \
- {unsigned u = a ^ b; unsigned v = c; \
- h = (a & b) | (u & v); l = u ^ v;}
- ***/
- long twosA, twosB, foursA, foursB, eights;
- // CSA(twosA, ones, ones, A[i], A[i+1])
- {
- long b = A[i], c = A[i + 1];
- long u = ones ^ b;
- twosA = (ones & b) | (u & c);
- ones = u ^ c;
- }
- // CSA(twosB, ones, ones, A[i+2], A[i+3])
- {
- long b = A[i + 2], c = A[i + 3];
- long u = ones ^ b;
- twosB = (ones & b) | (u & c);
- ones = u ^ c;
- }
- //CSA(foursA, twos, twos, twosA, twosB)
- {
- long u = twos ^ twosA;
- foursA = (twos & twosA) | (u & twosB);
- twos = u ^ twosB;
- }
- //CSA(twosA, ones, ones, A[i+4], A[i+5])
- {
- long b = A[i + 4], c = A[i + 5];
- long u = ones ^ b;
- twosA = (ones & b) | (u & c);
- ones = u ^ c;
- }
- // CSA(twosB, ones, ones, A[i+6], A[i+7])
- {
- long b = A[i + 6], c = A[i + 7];
- long u = ones ^ b;
- twosB = (ones & b) | (u & c);
- ones = u ^ c;
- }
- //CSA(foursB, twos, twos, twosA, twosB)
- {
- long u = twos ^ twosA;
- foursB = (twos & twosA) | (u & twosB);
- twos = u ^ twosB;
- }
- //CSA(eights, fours, fours, foursA, foursB)
- {
- long u = fours ^ foursA;
- eights = (fours & foursA) | (u & foursB);
- fours = u ^ foursB;
- }
- tot8 += Pop(eights);
- }
- // handle trailing words in a binary-search manner...
- // derived from the loop above by setting specific elements to 0.
- // the original method in Hackers Delight used a simple for loop:
- // for (i = i; i < n; i++) // Add in the last elements
- // tot = tot + pop(A[i]);
- if (i <= n - 4)
- {
- long twosA, twosB, foursA, eights;
- {
- long b = A[i], c = A[i + 1];
- long u = ones ^ b;
- twosA = (ones & b) | (u & c);
- ones = u ^ c;
- }
- {
- long b = A[i + 2], c = A[i + 3];
- long u = ones ^ b;
- twosB = (ones & b) | (u & c);
- ones = u ^ c;
- }
- {
- long u = twos ^ twosA;
- foursA = (twos & twosA) | (u & twosB);
- twos = u ^ twosB;
- }
- eights = fours & foursA;
- fours = fours ^ foursA;
- tot8 += Pop(eights);
- i += 4;
- }
- if (i <= n - 2)
- {
- long b = A[i], c = A[i + 1];
- long u = ones ^ b;
- long twosA = (ones & b) | (u & c);
- ones = u ^ c;
- long foursA = twos & twosA;
- twos = twos ^ twosA;
- long eights = fours & foursA;
- fours = fours ^ foursA;
- tot8 += Pop(eights);
- i += 2;
- }
- if (i < n)
- {
- tot += Pop(A[i]);
- }
- tot += (Pop(fours) << 2) + (Pop(twos) << 1) + Pop(ones) + (tot8 << 3);
- return tot;
- }
- /// <summary>Returns the popcount or cardinality of the two sets after an intersection.
- /// Neither array is modified.
- /// </summary>
- public static long Pop_intersect(long[] A, long[] B, int wordOffset, int numWords)
- {
- // generated from pop_array via sed 's/A\[\([^]]*\)\]/\(A[\1] \& B[\1]\)/g'
- int n = wordOffset + numWords;
- long tot = 0, tot8 = 0;
- long ones = 0, twos = 0, fours = 0;
- int i;
- for (i = wordOffset; i <= n - 8; i += 8)
- {
- long twosA, twosB, foursA, foursB, eights;
- // CSA(twosA, ones, ones, (A[i] & B[i]), (A[i+1] & B[i+1]))
- {
- long b = (A[i] & B[i]), c = (A[i + 1] & B[i + 1]);
- long u = ones ^ b;
- twosA = (ones & b) | (u & c);
- ones = u ^ c;
- }
- // CSA(twosB, ones, ones, (A[i+2] & B[i+2]), (A[i+3] & B[i+3]))
- {
- long b = (A[i + 2] & B[i + 2]), c = (A[i + 3] & B[i + 3]);
- long u = ones ^ b;
- twosB = (ones & b) | (u & c);
- ones = u ^ c;
- }
- //CSA(foursA, twos, twos, twosA, twosB)
- {
- long u = twos ^ twosA;
- foursA = (twos & twosA) | (u & twosB);
- twos = u ^ twosB;
- }
- //CSA(twosA, ones, ones, (A[i+4] & B[i+4]), (A[i+5] & B[i+5]))
- {
- long b = (A[i + 4] & B[i + 4]), c = (A[i + 5] & B[i + 5]);
- long u = ones ^ b;
- twosA = (ones & b) | (u & c);
- ones = u ^ c;
- }
- // CSA(twosB, ones, ones, (A[i+6] & B[i+6]), (A[i+7] & B[i+7]))
- {
- long b = (A[i + 6] & B[i + 6]), c = (A[i + 7] & B[i + 7]);
- long u = ones ^ b;
- twosB = (ones & b) | (u & c);
- ones = u ^ c;
- }
- //CSA(foursB, twos, twos, twosA, twosB)
- {
- long u = twos ^ twosA;
- foursB = (twos & twosA) | (u & twosB);
- twos = u ^ twosB;
- }
- //CSA(eights, fours, fours, foursA, foursB)
- {
- long u = fours ^ foursA;
- eights = (fours & foursA) | (u & foursB);
- fours = u ^ foursB;
- }
- tot8 += Pop(eights);
- }
- if (i <= n - 4)
- {
- long twosA, twosB, foursA, eights;
- {
- long b = (A[i] & B[i]), c = (A[i + 1] & B[i + 1]);
- long u = ones ^ b;
- twosA = (ones & b) | (u & c);
- ones = u ^ c;
- }
- {
- long b = (A[i + 2] & B[i + 2]), c = (A[i + 3] & B[i + 3]);
- long u = ones ^ b;
- twosB = (ones & b) | (u & c);
- ones = u ^ c;
- }
- {
- long u = twos ^ twosA;
- foursA = (twos & twosA) | (u & twosB);
- twos = u ^ twosB;
- }
- eights = fours & foursA;
- fours = fours ^ foursA;
- tot8 += Pop(eights);
- i += 4;
- }
- if (i <= n - 2)
- {
- long b = (A[i] & B[i]), c = (A[i + 1] & B[i + 1]);
- long u = ones ^ b;
- long twosA = (ones & b) | (u & c);
- ones = u ^ c;
- long foursA = twos & twosA;
- twos = twos ^ twosA;
- long eights = fours & foursA;
- fours = fours ^ foursA;
- tot8 += Pop(eights);
- i += 2;
- }
- if (i < n)
- {
- tot += Pop((A[i] & B[i]));
- }
- tot += (Pop(fours) << 2) + (Pop(twos) << 1) + Pop(ones) + (tot8 << 3);
- return tot;
- }
- /// <summary>Returns the popcount or cardinality of the union of two sets.
- /// Neither array is modified.
- /// </summary>
- public static long Pop_union(long[] A, long[] B, int wordOffset, int numWords)
- {
- // generated from pop_array via sed 's/A\[\([^]]*\)\]/\(A[\1] \| B[\1]\)/g'
- int n = wordOffset + numWords;
- long tot = 0, tot8 = 0;
- long ones = 0, twos = 0, fours = 0;
- int i;
- for (i = wordOffset; i <= n - 8; i += 8)
- {
- /*** C macro from Hacker's Delight
- #define CSA(h,l, a,b,c) \
- {unsigned u = a ^ b; unsigned v = c; \
- h = (a & b) | (u & v); l = u ^ v;}
- ***/
- long twosA, twosB, foursA, foursB, eights;
- // CSA(twosA, ones, ones, (A[i] | B[i]), (A[i+1] | B[i+1]))
- {
- long b = (A[i] | B[i]), c = (A[i + 1] | B[i + 1]);
- long u = ones ^ b;
- twosA = (ones & b) | (u & c);
- ones = u ^ c;
- }
- // CSA(twosB, ones, ones, (A[i+2] | B[i+2]), (A[i+3] | B[i+3]))
- {
- long b = (A[i + 2] | B[i + 2]), c = (A[i + 3] | B[i + 3]);
- long u = ones ^ b;
- twosB = (ones & b) | (u & c);
- ones = u ^ c;
- }
- //CSA(foursA, twos, twos, twosA, twosB)
- {
- long u = twos ^ twosA;
- foursA = (twos & twosA) | (u & twosB);
- twos = u ^ twosB;
- }
- //CSA(twosA, ones, ones, (A[i+4] | B[i+4]), (A[i+5] | B[i+5]))
- {
- long b = (A[i + 4] | B[i + 4]), c = (A[i + 5] | B[i + 5]);
- long u = ones ^ b;
- twosA = (ones & b) | (u & c);
- ones = u ^ c;
- }
- // CSA(twosB, ones, ones, (A[i+6] | B[i+6]), (A[i+7] | B[i+7]))
- {
- long b = (A[i + 6] | B[i + 6]), c = (A[i + 7] | B[i + 7]);
- long u = ones ^ b;
- twosB = (ones & b) | (u & c);
- ones = u ^ c;
- }
- //CSA(foursB, twos, twos, twosA, twosB)
- {
- long u = twos ^ twosA;
- foursB = (twos & twosA) | (u & twosB);
- twos = u ^ twosB;
- }
- //CSA(eights, fours, fours, foursA, foursB)
- {
- long u = fours ^ foursA;
- eights = (fours & foursA) | (u & foursB);
- fours = u ^ foursB;
- }
- tot8 += Pop(eights);
- }
- if (i <= n - 4)
- {
- long twosA, twosB, foursA, eights;
- {
- long b = (A[i] | B[i]), c = (A[i + 1] | B[i + 1]);
- long u = ones ^ b;
- twosA = (ones & b) | (u & c);
- ones = u ^ c;
- }
- {
- long b = (A[i + 2] | B[i + 2]), c = (A[i + 3] | B[i + 3]);
- long u = ones ^ b;
- twosB = (ones & b) | (u & c);
- ones = u ^ c;
- }
- {
- long u = twos ^ twosA;
- foursA = (twos & twosA) | (u & twosB);
- twos = u ^ twosB;
- }
- eights = fours & foursA;
- fours = fours ^ foursA;
- tot8 += Pop(eights);
- i += 4;
- }
- if (i <= n - 2)
- {
- long b = (A[i] | B[i]), c = (A[i + 1] | B[i + 1]);
- long u = ones ^ b;
- long twosA = (ones & b) | (u & c);
- ones = u ^ c;
- long foursA = twos & twosA;
- twos = twos ^ twosA;
- long eights = fours & foursA;
- fours = fours ^ foursA;
- tot8 += Pop(eights);
- i += 2;
- }
- if (i < n)
- {
- tot += Pop((A[i] | B[i]));
- }
- tot += (Pop(fours) << 2) + (Pop(twos) << 1) + Pop(ones) + (tot8 << 3);
- return tot;
- }
- /// <summary>Returns the popcount or cardinality of A & ~B
- /// Neither array is modified.
- /// </summary>
- public static long Pop_andnot(long[] A, long[] B, int wordOffset, int numWords)
- {
- // generated from pop_array via sed 's/A\[\([^]]*\)\]/\(A[\1] \& ~B[\1]\)/g'
- int n = wordOffset + numWords;
- long tot = 0, tot8 = 0;
- long ones = 0, twos = 0, fours = 0;
- int i;
- for (i = wordOffset; i <= n - 8; i += 8)
- {
- /*** C macro from Hacker's Delight
- #define CSA(h,l, a,b,c) \
- {unsigned u = a ^ b; unsigned v = c; \
- h = (a & b) | (u & v); l = u ^ v;}
- ***/
- long twosA, twosB, foursA, foursB, eights;
- // CSA(twosA, ones, ones, (A[i] & ~B[i]), (A[i+1] & ~B[i+1]))
- {
- long b = (A[i] & ~B[i]), c = (A[i + 1] & ~B[i + 1]);
- long u = ones ^ b;
- twosA = (ones & b) | (u & c);
- ones = u ^ c;
- }
- // CSA(twosB, ones, ones, (A[i+2] & ~B[i+2]), (A[i+3] & ~B[i+3]))
- {
- long b = (A[i + 2] & ~B[i + 2]), c = (A[i + 3] & ~B[i + 3]);
- long u = ones ^ b;
- twosB = (ones & b) | (u & c);
- ones = u ^ c;
- }
- //CSA(foursA, twos, twos, twosA, twosB)
- {
- long u = twos ^ twosA;
- foursA = (twos & twosA) | (u & twosB);
- twos = u ^ twosB;
- }
- //CSA(twosA, ones, ones, (A[i+4] & ~B[i+4]), (A[i+5] & ~B[i+5]))
- {
- long b = (A[i + 4] & ~B[i + 4]), c = (A[i + 5] & ~B[i + 5]);
- long u = ones ^ b;
- twosA = (ones & b) | (u & c);
- ones = u ^ c;
- }
- // CSA(twosB, ones, ones, (A[i+6] & ~B[i+6]), (A[i+7] & ~B[i+7]))
- {
- long b = (A[i + 6] & ~B[i + 6]), c = (A[i + 7] & ~B[i + 7]);
- long u = ones ^ b;
- twosB = (ones & b) | (u & c);
- ones = u ^ c;
- }
- //CSA(foursB, twos, twos, twosA, twosB)
- {
- long u = twos ^ twosA;
- foursB = (twos & twosA) | (u & twosB);
- twos = u ^ twosB;
- }
- //CSA(eights, fours, fours, foursA, foursB)
- {
- long u = fours ^ foursA;
- eights = (fours & foursA) | (u & foursB);
- fours = u ^ foursB;
- }
- tot8 += Pop(eights);
- }
- if (i <= n - 4)
- {
- long twosA, twosB, foursA, eights;
- {
- long b = (A[i] & ~B[i]), c = (A[i + 1] & ~B[i + 1]);
- long u = ones ^ b;
- twosA = (ones & b) | (u & c);
- ones = u ^ c;
- }
- {
- long b = (A[i + 2] & ~B[i + 2]), c = (A[i + 3] & ~B[i + 3]);
- long u = ones ^ b;
- twosB = (ones & b) | (u & c);
- ones = u ^ c;
- }
- {
- long u = twos ^ twosA;
- foursA = (twos & twosA) | (u & twosB);
- twos = u ^ twosB;
- }
- eights = fours & foursA;
- fours = fours ^ foursA;
- tot8 += Pop(eights);
- i += 4;
- }
- if (i <= n - 2)
- {
- long b = (A[i] & ~B[i]), c = (A[i + 1] & ~B[i + 1]);
- long u = ones ^ b;
- long twosA = (ones & b) | (u & c);
- ones = u ^ c;
- long foursA = twos & twosA;
- twos = twos ^ twosA;
- long eights = fours & foursA;
- fours = fours ^ foursA;
- tot8 += Pop(eights);
- i += 2;
- }
- if (i < n)
- {
- tot += Pop((A[i] & ~B[i]));
- }
- tot += (Pop(fours) << 2) + (Pop(twos) << 1) + Pop(ones) + (tot8 << 3);
- return tot;
- }
- public static long Pop_xor(long[] A, long[] B, int wordOffset, int numWords)
- {
- int n = wordOffset + numWords;
- long tot = 0, tot8 = 0;
- long ones = 0, twos = 0, fours = 0;
- int i;
- for (i = wordOffset; i <= n - 8; i += 8)
- {
- /*** C macro from Hacker's Delight
- #define CSA(h,l, a,b,c) \
- {unsigned u = a ^ b; unsigned v = c; \
- h = (a & b) | (u & v); l = u ^ v;}
- ***/
- long twosA, twosB, foursA, foursB, eights;
- // CSA(twosA, ones, ones, (A[i] ^ B[i]), (A[i+1] ^ B[i+1]))
- {
- long b = (A[i] ^ B[i]), c = (A[i + 1] ^ B[i + 1]);
- long u = ones ^ b;
- twosA = (ones & b) | (u & c);
- ones = u ^ c;
- }
- // CSA(twosB, ones, ones, (A[i+2] ^ B[i+2]), (A[i+3] ^ B[i+3]))
- {
- long b = (A[i + 2] ^ B[i + 2]), c = (A[i + 3] ^ B[i + 3]);
- long u = ones ^ b;
- twosB = (ones & b) | (u & c);
- ones = u ^ c;
- }
- //CSA(foursA, twos, twos, twosA, twosB)
- {
- long u = twos ^ twosA;
- foursA = (twos & twosA) | (u & twosB);
- twos = u ^ twosB;
- }
- //CSA(twosA, ones, ones, (A[i+4] ^ B[i+4]), (A[i+5] ^ B[i+5]))
- {
- long b = (A[i + 4] ^ B[i + 4]), c = (A[i + 5] ^ B[i + 5]);
- long u = ones ^ b;
- twosA = (ones & b) | (u & c);
- ones = u ^ c;
- }
- // CSA(twosB, ones, ones, (A[i+6] ^ B[i+6]), (A[i+7] ^ B[i+7]))
- {
- long b = (A[i + 6] ^ B[i + 6]), c = (A[i + 7] ^ B[i + 7]);
- long u = ones ^ b;
- twosB = (ones & b) | (u & c);
- ones = u ^ c;
- }
- //CSA(foursB, twos, twos, twosA, twosB)
- {
- long u = twos ^ twosA;
- foursB = (twos & twosA) | (u & twosB);
- twos = u ^ twosB;
- }
- //CSA(eights, fours, fours, foursA, foursB)
- {
- long u = fours ^ foursA;
- eights = (fours & foursA) | (u & foursB);
- fours = u ^ foursB;
- }
- tot8 += Pop(eights);
- }
- if (i <= n - 4)
- {
- long twosA, twosB, foursA, eights;
- {
- long b = (A[i] ^ B[i]), c = (A[i + 1] ^ B[i + 1]);
- long u = ones ^ b;
- twosA = (ones & b) | (u & c);
- ones = u ^ c;
- }
- {
- long b = (A[i + 2] ^ B[i + 2]), c = (A[i + 3] ^ B[i + 3]);
- long u = ones ^ b;
- twosB = (ones & b) | (u & c);
- ones = u ^ c;
- }
- {
- long u = twos ^ twosA;
- foursA = (twos & twosA) | (u & twosB);
- twos = u ^ twosB;
- }
- eights = fours & foursA;
- fours = fours ^ foursA;
- tot8 += Pop(eights);
- i += 4;
- }
- if (i <= n - 2)
- {
- long b = (A[i] ^ B[i]), c = (A[i + 1] ^ B[i + 1]);
- long u = ones ^ b;
- long twosA = (ones & b) | (u & c);
- ones = u ^ c;
- long foursA = twos & twosA;
- twos = twos ^ twosA;
- long eights = fours & foursA;
- fours = fours ^ foursA;
- tot8 += Pop(eights);
- i += 2;
- }
- if (i < n)
- {
- tot += Pop((A[i] ^ B[i]));
- }
- tot += (Pop(fours) << 2) + (Pop(twos) << 1) + Pop(ones) + (tot8 << 3);
- return tot;
- }
- /* python code to generate ntzTable
- def ntz(val):
- if val==0: return 8
- i=0
- while (val&0x01)==0:
- i = i+1
- val >>= 1
- return i
- print ','.join([ str(ntz(i)) for i in range(256) ])
- ***/
- /// <summary>table of number of trailing zeros in a byte </summary>
- public static readonly sbyte[] ntzTable = new sbyte[] { 8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 };
- /// <summary>Returns number of trailing zeros in a 64 bit long value. </summary>
- public static int Ntz(long val)
- {
- // A full binary search to determine the low byte was slower than
- // a linear search for nextSetBit(). This is most likely because
- // the implementation of nextSetBit() shifts bits to the right, increasing
- // the probability that the first non-zero byte is in the rhs.
- //
- // This implementation does a single binary search at the top level only
- // so that all other bit shifting can be done on ints instead of longs to
- // remain friendly to 32 bit architectures. In addition, the case of a
- // non-zero first byte is checked for first because it is the most common
- // in dense bit arrays.
- int lower = (int)val;
- int lowByte = lower & 0xff;
- if (lowByte != 0)
- return ntzTable[lowByte];
- if (lower != 0)
- {
- lowByte = (Support.URShift(lower, 8)) & 0xff;
- if (lowByte != 0)
- return ntzTable[lowByte] + 8;
- lowByte = (Support.URShift(lower, 16)) & 0xff;
- if (lowByte != 0)
- return ntzTable[lowByte] + 16;
- // no need to mask off low byte for the last byte in the 32 bit word
- // no need to check for zero on the last byte either.
- return ntzTable[Support.URShift(lower, 24)] + 24;
- }
- else
- {
- // grab upper 32 bits
- int upper = (int)(val >> 32);
- lowByte = upper & 0xff;
- if (lowByte != 0)
- return ntzTable[lowByte] + 32;
- lowByte = (Support.URShift(upper, 8)) & 0xff;
- if (lowByte != 0)
- return ntzTable[lowByte] + 40;
- lowByte = (Support.URShift(upper, 16)) & 0xff;
- if (lowByte != 0)
- return ntzTable[lowByte] + 48;
- // no need to mask off low byte for the last byte in the 32 bit word
- // no need to check for zero on the last byte either.
- return ntzTable[Support.URShift(upper, 24)] + 56;
- }
- }
- /// <summary>Returns number of trailing zeros in a 32 bit int value. </summary>
- public static int Ntz(int val)
- {
- // This implementation does a single binary search at the top level only.
- // In addition, the case of a non-zero first byte is checked for first
- // because it is the most common in dense bit arrays.
- int lowByte = val & 0xff;
- if (lowByte != 0)
- return ntzTable[lowByte];
- lowByte = (Support.URShift(val, 8)) & 0xff;
- if (lowByte != 0)
- return ntzTable[lowByte] + 8;
- lowByte = (Support.URShift(val, 16)) & 0xff;
- if (lowByte != 0)
- return ntzTable[lowByte] + 16;
- // no need to mask off low byte for the last byte.
- // no need to check for zero on the last byte either.
- return ntzTable[Support.URShift(val, 24)] + 24;
- }
- /// <summary>returns 0 based index of first set bit
- /// (only works for x!=0)
- /// <br/> This is an alternate implementation of ntz()
- /// </summary>
- public static int Ntz2(long x)
- {
- int n = 0;
- int y = (int)x;
- if (y == 0)
- {
- n += 32; y = (int)(Support.URShift(x, 32));
- } // the only 64 bit shift necessary
- if ((y & 0x0000FFFF) == 0)
- {
- n += 16; y = Support.URShift(y, 16);
- }
- if ((y & 0x000000FF) == 0)
- {
- n += 8; y = Support.URShift(y, 8);
- }
- return (ntzTable[y & 0xff]) + n;
- }
- /// <summary>returns 0 based index of first set bit
- /// <br/> This is an alternate implementation of ntz()
- /// </summary>
- public static int Ntz3(long x)
- {
- // another implementation taken from Hackers Delight, extended to 64 bits
- // and converted to Java.
- // Many 32 bit ntz algorithms are at http://www.hackersdelight.org/HDcode/ntz.cc
- int n = 1;
- // do the first step as a long, all others as ints.
- int y = (int)x;
- if (y == 0)
- {
- n += 32; y = (int)(Support.URShift(x, 32));
- }
- if ((y & 0x0000FFFF) == 0)
- {
- n += 16; y = Support.URShift(y, 16);
- }
- if ((y & 0x000000FF) == 0)
- {
- n += 8; y = Support.URShift(y, 8);
- }
- if ((y & 0x0000000F) == 0)
- {
- n += 4; y = Support.URShift(y, 4);
- }
- if ((y & 0x00000003) == 0)
- {
- n += 2; y = Support.URShift(y, 2);
- }
- return n - (y & 1);
- }
- /// <summary>returns true if v is a power of two or zero</summary>
- public static bool IsPowerOfTwo(int v)
- {
- return ((v & (v - 1)) == 0);
- }
- /// <summary>returns true if v is a power of two or zero</summary>
- public static bool IsPowerOfTwo(long v)
- {
- return ((v & (v - 1)) == 0);
- }
- /// <summary>returns the next highest power of two, or the current value if it's already a power of two or zero</summary>
- public static int NextHighestPowerOfTwo(int v)
- {
- v--;
- v |= v >> 1;
- v |= v >> 2;
- v |= v >> 4;
- v |= v >> 8;
- v |= v >> 16;
- v++;
- return v;
- }
- /// <summary>returns the next highest power of two, or the current value if it's already a power of two or zero</summary>
- public static long NextHighestPowerOfTwo(long v)
- {
- v--;
- v |= v >> 1;
- v |= v >> 2;
- v |= v >> 4;
- v |= v >> 8;
- v |= v >> 16;
- v |= v >> 32;
- v++;
- return v;
- }
- }
- /// <summary> Methods for manipulating arrays.</summary>
- public sealed class ArrayUtil
- {
- /*
- Begin Apache Harmony code
- Revision taken on Friday, June 12. https://svn.apache.org/repos/asf/harmony/enhanced/classlib/archive/java6/modules/luni/src/main/java/java/lang/Integer.java
- */
- /// <summary> Parses the string argument as if it was an int value and returns the
- /// result. Throws NumberFormatException if the string does not represent an
- /// int quantity.
- ///
- /// </summary>
- /// <param name="chars">a string representation of an int quantity.
- /// </param>
- /// <returns> int the value represented by the argument
- /// </returns>
- /// <throws> NumberFormatException if the argument could not be parsed as an int quantity. </throws>
- public static int ParseInt(char[] chars)
- {
- return ParseInt(chars, 0, chars.Length, 10);
- }
- /// <summary> Parses a char array into an int.</summary>
- /// <param name="chars">the character array
- /// </param>
- /// <param name="offset">The offset into the array
- /// </param>
- /// <param name="len">The length
- /// </param>
- /// <returns> the int
- /// </returns>
- /// <throws> NumberFormatException if it can't parse </throws>
- public static int ParseInt(char[] chars, int offset, int len)
- {
- return ParseInt(chars, offset, len, 10);
- }
- /// <summary> Parses the string argument as if it was an int value and returns the
- /// result. Throws NumberFormatException if the string does not represent an
- /// int quantity. The second argument specifies the radix to use when parsing
- /// the value.
- ///
- /// </summary>
- /// <param name="chars">a string representation of an int quantity.
- /// </param>
- /// <param name="radix">the base to use for conversion.
- /// </param>
- /// <returns> int the value represented by the argument
- /// </returns>
- /// <throws> NumberFormatException if the argument could not be parsed as an int quantity. </throws>
- public static int ParseInt(char[] chars, int offset, int len, int radix)
- {
- if (chars == null || radix < 2 || radix > 36)
- {
- throw new System.FormatException();
- }
- int i = 0;
- if (len == 0)
- {
- throw new System.FormatException("chars length is 0");
- }
- bool negative = chars[offset + i] == '-';
- if (negative && ++i == len)
- {
- throw new System.FormatException("can't convert to an int");
- }
- if (negative == true)
- {
- offset++;
- len--;
- }
- return Parse(chars, offset, len, radix, negative);
- }
- private static int Parse(char[] chars, int offset, int len, int radix, bool negative)
- {
- int max = System.Int32.MinValue / radix;
- int result = 0;
- for (int i = 0; i < len; i++)
- {
- int digit = (int)System.Char.GetNumericValue(chars[i + offset]);
- if (digit == -1)
- {
- throw new System.FormatException("Unable to parse");
- }
- if (max > result)
- {
- throw new System.FormatException("Unable to parse");
- }
- int next = result * radix - digit;
- if (next > result)
- {
- throw new System.FormatException("Unable to parse");
- }
- result = next;
- }
- /*while (offset < len) {
- }*/
- if (!negative)
- {
- result = -result;
- if (result < 0)
- {
- throw new System.FormatException("Unable to parse");
- }
- }
- return result;
- }
- /*
- END APACHE HARMONY CODE
- */
- public static int GetNextSize(int targetSize)
- {
- /* This over-allocates proportional to the list size, making room
- * for additional growth. The over-allocation is mild, but is
- * enough to give linear-time amortized behavior over a long
- * sequence of appends() in the presence of a poorly-performing
- * system realloc().
- * The growth pattern is: 0, 4, 8, 16, 25, 35, 46, 58, 72, 88, ...
- */
- return (targetSize >> 3) + (targetSize < 9 ? 3 : 6) + targetSize;
- }
- public static int GetShrinkSize(int currentSize, int targetSize)
- {
- int newSize = GetNextSize(targetSize);
- // Only reallocate if we are "substantially" smaller.
- // This saves us from "running hot" (constantly making a
- // bit bigger then a bit smaller, over and over):
- if (newSize < currentSize / 2)
- return newSize;
- else
- return currentSize;
- }
- public static int[] Grow(int[] array, int minSize)
- {
- if (array.Length < minSize)
- {
- int[] newArray = new int[GetNextSize(minSize)];
- Array.Copy(array, 0, newArray, 0, array.Length);
- return newArray;
- }
- else
- return array;
- }
- public static int[] Grow(int[] array)
- {
- return Grow(array, 1 + array.Length);
- }
- public static int[] Shrink(int[] array, int targetSize)
- {
- int newSize = GetShrinkSize(array.Length, targetSize);
- if (newSize != array.Length)
- {
- int[] newArray = new int[newSize];
- Array.Copy(array, 0, newArray, 0, newSize);
- return newArray;
- }
- else
- return array;
- }
- public static long[] Grow(long[] array, int minSize)
- {
- if (array.Length < minSize)
- {
- long[] newArray = new long[GetNextSize(minSize)];
- Array.Copy(array, 0, newArray, 0, array.Length);
- return newArray;
- }
- else
- return array;
- }
- public static long[] Grow(long[] array)
- {
- return Grow(array, 1 + array.Length);
- }
- public static long[] Shrink(long[] array, int targetSize)
- {
- int newSize = GetShrinkSize(array.Length, targetSize);
- if (newSize != array.Length)
- {
- long[] newArray = new long[newSize];
- Array.Copy(array, 0, newArray, 0, newSize);
- return newArray;
- }
- else
- return array;
- }
- public static byte[] Grow(byte[] array, int minSize)
- {
- if (array.Length < minSize)
- {
- byte[] newArray = new byte[GetNextSize(minSize)];
- Array.Copy(array, 0, newArray, 0, array.Length);
- return newArray;
- }
- else
- return array;
- }
- public static byte[] Grow(byte[] array)
- {
- return Grow(array, 1 + array.Length);
- }
- public static byte[] Shrink(byte[] array, int targetSize)
- {
- int newSize = GetShrinkSize(array.Length, targetSize);
- if (newSize != array.Length)
- {
- byte[] newArray = new byte[newSize];
- Array.Copy(array, 0, newArray, 0, newSize);
- return newArray;
- }
- else
- return array;
- }
- /// <summary> Returns hash of chars in range start (inclusive) to
- /// end (inclusive)
- /// </summary>
- public static int HashCode(char[] array, int start, int end)
- {
- int code = 0;
- for (int i = end - 1; i >= start; i--)
- code = code * 31 + array[i];
- return code;
- }
- /// <summary> Returns hash of chars in range start (inclusive) to
- /// end (inclusive)
- /// </summary>
- public static int HashCode(byte[] array, int start, int end)
- {
- int code = 0;
- for (int i = end - 1; i >= start; i--)
- code = code * 31 + array[i];
- return code;
- }
- }
- public class Support
- {
- /// <summary>
- /// Performs an unsigned bitwise right shift with the specified number
- /// </summary>
- /// <param name="number">Number to operate on</param>
- /// <param name="bits">Ammount of bits to shift</param>
- /// <returns>The resulting number from the shift operation</returns>
- public static int URShift(int number, int bits)
- {
- return (int)(((uint)number) >> bits);
- }
- /// <summary>
- /// Performs an unsigned bitwise right shift with the specified number
- /// </summary>
- /// <param name="number">Number to operate on</param>
- /// <param name="bits">Ammount of bits to shift</param>
- /// <returns>The resulting number from the shift operation</returns>
- public static long URShift(long number, int bits)
- {
- return (long)(((ulong)number) >> bits);
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement