L_B

TakeOrdered

L_B
Feb 1st, 2012
644
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Linq;
  4. using System.Text;
  5.  
  6. namespace System.Linq
  7. {
  8.     public static class LuceneNetExtensions
  9.     {
  10.         /// <summary>
  11.         /// <para>
  12.         /// Replacement for list.OrderBy(?).Take(n)
  13.         /// </para>
  14.         /// <para>
  15.         /// 1- Low Memory usage (since this method requires only ~"2*n" elements be in memory)
  16.         /// </para>
  17.         /// 2- Speed up by an order of magnitude especially when the input is large.
  18.         /// </summary>
  19.         public static IEnumerable<T> TakeOrdered<T, TKey>(this IEnumerable<T> list, int n, Func<T, TKey> keySelector, bool ascending = true) where TKey : IComparable<TKey>
  20.         {
  21.  
  22.             var pq = new MyPriorityQueue<T>(n,
  23.                     ascending
  24.                         ?
  25.                         (Func<T, T, bool>)((a, b) => keySelector(a).CompareTo(keySelector(b)) >= 0)
  26.                         :
  27.                         (Func<T, T, bool>)((a, b) => keySelector(a).CompareTo(keySelector(b)) < 0)
  28.                 );
  29.  
  30.             Stack<T> stack = new Stack<T>();
  31.  
  32.             int count = 0;
  33.             foreach (T item in list)
  34.             {
  35.                 pq.InsertWithOverflow(item);
  36.                 count++;
  37.             }
  38.  
  39.             int min = Math.Min(count, n);
  40.  
  41.             for (int i = 0; i < min; i++)
  42.                 stack.Push(pq.Pop());
  43.  
  44.             for (int i = 0; i < min; i++)
  45.                 yield return stack.Pop();
  46.  
  47.         }
  48.  
  49.         public static IEnumerable<T> TakeOrdered<T>(this IEnumerable<T> list, int n, bool ascending = true) where T : IComparable<T>
  50.         {
  51.             return TakeOrdered(list, n, x => x,ascending);
  52.         }
  53.  
  54.  
  55.         class MyPriorityQueue<T> : Lucene.Net.Util.PriorityQueue<T>
  56.         {
  57.             Func<T, T, bool> _LessThan;
  58.  
  59.             internal MyPriorityQueue(int n, Func<T, T, bool> lessThan)
  60.             {
  61.                 _LessThan = lessThan;
  62.                 Initialize(n);
  63.             }
  64.  
  65.             public override bool LessThan(T a, T b)
  66.             {
  67.                 return _LessThan(a, b);
  68.             }
  69.         }
  70.     }
  71. }
  72.  
  73. #region Lucene.Net
  74. /// --------------------------------------------------------------------------------------
  75. /// PriorityQueue from Lucene.Net
  76. /// http://incubator.apache.org/lucene.net/
  77. ///
  78. /// Remove this part if you already have a reference to Lucene.Net
  79. /// --------------------------------------------------------------------------------------
  80.  
  81. /*
  82.  * Licensed to the Apache Software Foundation (ASF) under one or more
  83.  * contributor license agreements.  See the NOTICE file distributed with
  84.  * this work for additional information regarding copyright ownership.
  85.  * The ASF licenses this file to You under the Apache License, Version 2.0
  86.  * (the "License"); you may not use this file except in compliance with
  87.  * the License.  You may obtain a copy of the License at
  88.  *
  89.  * http://www.apache.org/licenses/LICENSE-2.0
  90.  *
  91.  * Unless required by applicable law or agreed to in writing, software
  92.  * distributed under the License is distributed on an "AS IS" BASIS,
  93.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  94.  * See the License for the specific language governing permissions and
  95.  * limitations under the License.
  96.  */
  97.  
  98. namespace Lucene.Net.Util
  99. {
  100.  
  101.     /// <summary>A PriorityQueue maintains a partial ordering of its elements such that the
  102.     /// least element can always be found in constant time.  Put()'s and pop()'s
  103.     /// require log(size) time.
  104.     ///
  105.     /// <p/><b>NOTE</b>: This class pre-allocates a full array of
  106.     /// length <code>maxSize+1</code>, in {@link #initialize}.
  107.     ///
  108.     /// </summary>
  109.     public abstract class PriorityQueue<T>
  110.     {
  111.         private int size;
  112.         private int maxSize;
  113.         protected internal T[] heap;
  114.  
  115.         /// <summary>Determines the ordering of objects in this priority queue.  Subclasses
  116.         /// must define this one method.
  117.         /// </summary>
  118.         public abstract bool LessThan(T a, T b);
  119.  
  120.         /// <summary> This method can be overridden by extending classes to return a sentinel
  121.         /// object which will be used by {@link #Initialize(int)} to fill the queue, so
  122.         /// that the code which uses that queue can always assume it's full and only
  123.         /// change the top without attempting to insert any new object.<br/>
  124.         ///
  125.         /// Those sentinel values should always compare worse than any non-sentinel
  126.         /// value (i.e., {@link #LessThan(Object, Object)} should always favor the
  127.         /// non-sentinel values).<br/>
  128.         ///
  129.         /// By default, this method returns false, which means the queue will not be
  130.         /// filled with sentinel values. Otherwise, the value returned will be used to
  131.         /// pre-populate the queue. Adds sentinel values to the queue.<br/>
  132.         ///
  133.         /// If this method is extended to return a non-null value, then the following
  134.         /// usage pattern is recommended:
  135.         ///
  136.         /// <pre>
  137.         /// // extends getSentinelObject() to return a non-null value.
  138.         /// PriorityQueue pq = new MyQueue(numHits);
  139.         /// // save the 'top' element, which is guaranteed to not be null.
  140.         /// MyObject pqTop = (MyObject) pq.top();
  141.         /// &lt;...&gt;
  142.         /// // now in order to add a new element, which is 'better' than top (after
  143.         /// // you've verified it is better), it is as simple as:
  144.         /// pqTop.change().
  145.         /// pqTop = pq.updateTop();
  146.         /// </pre>
  147.         ///
  148.         /// <b>NOTE:</b> if this method returns a non-null value, it will be called by
  149.         /// {@link #Initialize(int)} {@link #Size()} times, relying on a new object to
  150.         /// be returned and will not check if it's null again. Therefore you should
  151.         /// ensure any call to this method creates a new instance and behaves
  152.         /// consistently, e.g., it cannot return null if it previously returned
  153.         /// non-null.
  154.         ///
  155.         /// </summary>
  156.         /// <returns> the sentinel object to use to pre-populate the queue, or null if
  157.         /// sentinel objects are not supported.
  158.         /// </returns>
  159.         protected internal virtual T GetSentinelObject()
  160.         {
  161.             return default(T);
  162.         }
  163.  
  164.         /// <summary>Subclass constructors must call this. </summary>
  165.         protected internal void Initialize(int maxSize)
  166.         {
  167.             size = 0;
  168.             int heapSize;
  169.             if (0 == maxSize)
  170.                 // We allocate 1 extra to avoid if statement in top()
  171.                 heapSize = 2;
  172.             else
  173.             {
  174.                 if (maxSize == Int32.MaxValue)
  175.                 {
  176.                     // Don't wrap heapSize to -1, in this case, which
  177.                     // causes a confusing NegativeArraySizeException.
  178.                     // Note that very likely this will simply then hit
  179.                     // an OOME, but at least that's more indicative to
  180.                     // caller that this values is too big.  We don't +1
  181.                     // in this case, but it's very unlikely in practice
  182.                     // one will actually insert this many objects into
  183.                     // the PQ:
  184.                     heapSize = Int32.MaxValue;
  185.                 }
  186.                 else
  187.                 {
  188.                     // NOTE: we add +1 because all access to heap is
  189.                     // 1-based not 0-based.  heap[0] is unused.
  190.                     heapSize = maxSize + 1;
  191.                 }
  192.             }
  193.             heap = new T[heapSize];
  194.             this.maxSize = maxSize;
  195.  
  196.             // If sentinel objects are supported, populate the queue with them
  197.             T sentinel = GetSentinelObject();
  198.             if (sentinel != null && !sentinel.Equals(default(T)))
  199.             {
  200.                 heap[1] = sentinel;
  201.                 for (int i = 2; i < heap.Length; i++)
  202.                 {
  203.                     heap[i] = GetSentinelObject();
  204.                 }
  205.                 size = maxSize;
  206.             }
  207.         }
  208.  
  209.         /// <summary> Adds an Object to a PriorityQueue in log(size) time. If one tries to add
  210.         /// more objects than maxSize from initialize a RuntimeException
  211.         /// (ArrayIndexOutOfBound) is thrown.
  212.         ///
  213.         /// </summary>
  214.         /// <deprecated> use {@link #Add(Object)} which returns the new top object,
  215.         /// saving an additional call to {@link #Top()}.
  216.         /// </deprecated>
  217.         [Obsolete("use Add(Object) which returns the new top object, saving an additional call to Top().")]
  218.         public void Put(T element)
  219.         {
  220.             size++;
  221.             heap[size] = element;
  222.             UpHeap();
  223.         }
  224.  
  225.         /// <summary> Adds an Object to a PriorityQueue in log(size) time. If one tries to add
  226.         /// more objects than maxSize from initialize an
  227.         /// {@link ArrayIndexOutOfBoundsException} is thrown.
  228.         ///
  229.         /// </summary>
  230.         /// <returns> the new 'top' element in the queue.
  231.         /// </returns>
  232.         public T Add(T element)
  233.         {
  234.             size++;
  235.             heap[size] = element;
  236.             UpHeap();
  237.             return heap[1];
  238.         }
  239.  
  240.         /// <summary> Adds element to the PriorityQueue in log(size) time if either the
  241.         /// PriorityQueue is not full, or not lessThan(element, top()).
  242.         ///
  243.         /// </summary>
  244.         /// <param name="element">
  245.         /// </param>
  246.         /// <returns> true if element is added, false otherwise.
  247.         /// </returns>
  248.         /// <deprecated> use {@link #InsertWithOverflow(Object)} instead, which
  249.         /// encourages objects reuse.
  250.         /// </deprecated>
  251.         [Obsolete("use InsertWithOverflow(Object) instead, which encourages objects reuse.")]
  252.         public virtual bool Insert(T element)
  253.         {
  254.             return !element.Equals(InsertWithOverflow(element));
  255.         }
  256.  
  257.         /// <summary> insertWithOverflow() is the same as insert() except its
  258.         /// return value: it returns the object (if any) that was
  259.         /// dropped off the heap because it was full. This can be
  260.         /// the given parameter (in case it is smaller than the
  261.         /// full heap's minimum, and couldn't be added), or another
  262.         /// object that was previously the smallest value in the
  263.         /// heap and now has been replaced by a larger one, or null
  264.         /// if the queue wasn't yet full with maxSize elements.
  265.         /// </summary>
  266.         public virtual T InsertWithOverflow(T element)
  267.         {
  268.             if (size < maxSize)
  269.             {
  270.                 Put(element);
  271.                 return default(T);
  272.             }
  273.             else if (size > 0 && !LessThan(element, heap[1]))
  274.             {
  275.                 T ret = heap[1];
  276.                 heap[1] = element;
  277.                 AdjustTop();
  278.                 return ret;
  279.             }
  280.             else
  281.             {
  282.                 return element;
  283.             }
  284.         }
  285.  
  286.         /// <summary>Returns the least element of the PriorityQueue in constant time. </summary>
  287.         public T Top()
  288.         {
  289.             // We don't need to check size here: if maxSize is 0,
  290.             // then heap is length 2 array with both entries null.
  291.             // If size is 0 then heap[1] is already null.
  292.             return heap[1];
  293.         }
  294.  
  295.         /// <summary>Removes and returns the least element of the PriorityQueue in log(size)
  296.         /// time.
  297.         /// </summary>
  298.         public T Pop()
  299.         {
  300.             if (size > 0)
  301.             {
  302.                 T result = heap[1]; // save first value
  303.                 heap[1] = heap[size]; // move last to first
  304.                 heap[size] = default(T); // permit GC of objects
  305.                 size--;
  306.                 DownHeap(); // adjust heap
  307.                 return result;
  308.             }
  309.             else
  310.                 return default(T);
  311.         }
  312.  
  313.         /// <summary> Should be called when the Object at top changes values. Still log(n) worst
  314.         /// case, but it's at least twice as fast to
  315.         ///
  316.         /// <pre>
  317.         /// pq.top().change();
  318.         /// pq.adjustTop();
  319.         /// </pre>
  320.         ///
  321.         /// instead of
  322.         ///
  323.         /// <pre>
  324.         /// o = pq.pop();
  325.         /// o.change();
  326.         /// pq.push(o);
  327.         /// </pre>
  328.         ///
  329.         /// </summary>
  330.         /// <deprecated> use {@link #UpdateTop()} which returns the new top element and
  331.         /// saves an additional call to {@link #Top()}.
  332.         /// </deprecated>
  333.         [Obsolete("use UpdateTop() which returns the new top element and saves an additional call to Top()")]
  334.         public void AdjustTop()
  335.         {
  336.             DownHeap();
  337.         }
  338.  
  339.         /// <summary> Should be called when the Object at top changes values. Still log(n) worst
  340.         /// case, but it's at least twice as fast to
  341.         ///
  342.         /// <pre>
  343.         /// pq.top().change();
  344.         /// pq.updateTop();
  345.         /// </pre>
  346.         ///
  347.         /// instead of
  348.         ///
  349.         /// <pre>
  350.         /// o = pq.pop();
  351.         /// o.change();
  352.         /// pq.push(o);
  353.         /// </pre>
  354.         ///
  355.         /// </summary>
  356.         /// <returns> the new 'top' element.
  357.         /// </returns>
  358.         public T UpdateTop()
  359.         {
  360.             DownHeap();
  361.             return heap[1];
  362.         }
  363.  
  364.         /// <summary>Returns the number of elements currently stored in the PriorityQueue. </summary>
  365.         public int Size()
  366.         {
  367.             return size;
  368.         }
  369.  
  370.         /// <summary>Removes all entries from the PriorityQueue. </summary>
  371.         public void Clear()
  372.         {
  373.             for (int i = 0; i <= size; i++)
  374.             {
  375.                 heap[i] = default(T);
  376.             }
  377.             size = 0;
  378.         }
  379.  
  380.         private void UpHeap()
  381.         {
  382.             int i = size;
  383.             T node = heap[i]; // save bottom node
  384.             int j = URShift(i, 1);
  385.             while (j > 0 && LessThan(node, heap[j]))
  386.             {
  387.                 heap[i] = heap[j]; // shift parents down
  388.                 i = j;
  389.                 j = URShift(j, 1);
  390.             }
  391.             heap[i] = node; // install saved node
  392.         }
  393.  
  394.         private void DownHeap()
  395.         {
  396.             int i = 1;
  397.             T node = heap[i]; // save top node
  398.             int j = i << 1; // find smaller child
  399.             int k = j + 1;
  400.             if (k <= size && LessThan(heap[k], heap[j]))
  401.             {
  402.                 j = k;
  403.             }
  404.             while (j <= size && LessThan(heap[j], node))
  405.             {
  406.                 heap[i] = heap[j]; // shift up child
  407.                 i = j;
  408.                 j = i << 1;
  409.                 k = j + 1;
  410.                 if (k <= size && LessThan(heap[k], heap[j]))
  411.                 {
  412.                     j = k;
  413.                 }
  414.             }
  415.             heap[i] = node; // install saved node
  416.         }
  417.  
  418.         public int URShift(int number, int bits)
  419.         {
  420.             return (int)(((uint)number) >> bits);
  421.         }
  422.     }
  423. }
  424. #endregion
RAW Paste Data

Adblocker detected! Please consider disabling it...

We've detected AdBlock Plus or some other adblocking software preventing Pastebin.com from fully loading.

We don't have any obnoxious sound, or popup ads, we actively block these annoying types of ads!

Please add Pastebin.com to your ad blocker whitelist or disable your adblocking software.

×