Advertisement
L_B

TakeOrdered

L_B
Feb 1st, 2012
1,103
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C# 15.05 KB | None | 0 0
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Linq;
  4. using System.Text;
  5.  
  6. namespace System.Linq
  7. {
  8.     public static class LuceneNetExtensions
  9.     {
  10.         /// <summary>
  11.         /// <para>
  12.         /// Replacement for list.OrderBy(?).Take(n)
  13.         /// </para>
  14.         /// <para>
  15.         /// 1- Low Memory usage (since this method requires only ~"2*n" elements be in memory)
  16.         /// </para>
  17.         /// 2- Speed up by an order of magnitude especially when the input is large.
  18.         /// </summary>
  19.         public static IEnumerable<T> TakeOrdered<T, TKey>(this IEnumerable<T> list, int n, Func<T, TKey> keySelector, bool ascending = true) where TKey : IComparable<TKey>
  20.         {
  21.  
  22.             var pq = new MyPriorityQueue<T>(n,
  23.                     ascending
  24.                         ?
  25.                         (Func<T, T, bool>)((a, b) => keySelector(a).CompareTo(keySelector(b)) >= 0)
  26.                         :
  27.                         (Func<T, T, bool>)((a, b) => keySelector(a).CompareTo(keySelector(b)) < 0)
  28.                 );
  29.  
  30.             Stack<T> stack = new Stack<T>();
  31.  
  32.             int count = 0;
  33.             foreach (T item in list)
  34.             {
  35.                 pq.InsertWithOverflow(item);
  36.                 count++;
  37.             }
  38.  
  39.             int min = Math.Min(count, n);
  40.  
  41.             for (int i = 0; i < min; i++)
  42.                 stack.Push(pq.Pop());
  43.  
  44.             for (int i = 0; i < min; i++)
  45.                 yield return stack.Pop();
  46.  
  47.         }
  48.  
  49.         public static IEnumerable<T> TakeOrdered<T>(this IEnumerable<T> list, int n, bool ascending = true) where T : IComparable<T>
  50.         {
  51.             return TakeOrdered(list, n, x => x,ascending);
  52.         }
  53.  
  54.  
  55.         class MyPriorityQueue<T> : Lucene.Net.Util.PriorityQueue<T>
  56.         {
  57.             Func<T, T, bool> _LessThan;
  58.  
  59.             internal MyPriorityQueue(int n, Func<T, T, bool> lessThan)
  60.             {
  61.                 _LessThan = lessThan;
  62.                 Initialize(n);
  63.             }
  64.  
  65.             public override bool LessThan(T a, T b)
  66.             {
  67.                 return _LessThan(a, b);
  68.             }
  69.         }
  70.     }
  71. }
  72.  
  73. #region Lucene.Net
  74. /// --------------------------------------------------------------------------------------
  75. /// PriorityQueue from Lucene.Net
  76. /// http://incubator.apache.org/lucene.net/
  77. ///
  78. /// Remove this part if you already have a reference to Lucene.Net
  79. /// --------------------------------------------------------------------------------------
  80.  
  81. /*
  82.  * Licensed to the Apache Software Foundation (ASF) under one or more
  83.  * contributor license agreements.  See the NOTICE file distributed with
  84.  * this work for additional information regarding copyright ownership.
  85.  * The ASF licenses this file to You under the Apache License, Version 2.0
  86.  * (the "License"); you may not use this file except in compliance with
  87.  * the License.  You may obtain a copy of the License at
  88.  *
  89.  * http://www.apache.org/licenses/LICENSE-2.0
  90.  *
  91.  * Unless required by applicable law or agreed to in writing, software
  92.  * distributed under the License is distributed on an "AS IS" BASIS,
  93.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  94.  * See the License for the specific language governing permissions and
  95.  * limitations under the License.
  96.  */
  97.  
  98. namespace Lucene.Net.Util
  99. {
  100.  
  101.     /// <summary>A PriorityQueue maintains a partial ordering of its elements such that the
  102.     /// least element can always be found in constant time.  Put()'s and pop()'s
  103.     /// require log(size) time.
  104.     ///
  105.     /// <p/><b>NOTE</b>: This class pre-allocates a full array of
  106.     /// length <code>maxSize+1</code>, in {@link #initialize}.
  107.     ///
  108.     /// </summary>
  109.     public abstract class PriorityQueue<T>
  110.     {
  111.         private int size;
  112.         private int maxSize;
  113.         protected internal T[] heap;
  114.  
  115.         /// <summary>Determines the ordering of objects in this priority queue.  Subclasses
  116.         /// must define this one method.
  117.         /// </summary>
  118.         public abstract bool LessThan(T a, T b);
  119.  
  120.         /// <summary> This method can be overridden by extending classes to return a sentinel
  121.         /// object which will be used by {@link #Initialize(int)} to fill the queue, so
  122.         /// that the code which uses that queue can always assume it's full and only
  123.         /// change the top without attempting to insert any new object.<br/>
  124.         ///
  125.         /// Those sentinel values should always compare worse than any non-sentinel
  126.         /// value (i.e., {@link #LessThan(Object, Object)} should always favor the
  127.         /// non-sentinel values).<br/>
  128.         ///
  129.         /// By default, this method returns false, which means the queue will not be
  130.         /// filled with sentinel values. Otherwise, the value returned will be used to
  131.         /// pre-populate the queue. Adds sentinel values to the queue.<br/>
  132.         ///
  133.         /// If this method is extended to return a non-null value, then the following
  134.         /// usage pattern is recommended:
  135.         ///
  136.         /// <pre>
  137.         /// // extends getSentinelObject() to return a non-null value.
  138.         /// PriorityQueue pq = new MyQueue(numHits);
  139.         /// // save the 'top' element, which is guaranteed to not be null.
  140.         /// MyObject pqTop = (MyObject) pq.top();
  141.         /// &lt;...&gt;
  142.         /// // now in order to add a new element, which is 'better' than top (after
  143.         /// // you've verified it is better), it is as simple as:
  144.         /// pqTop.change().
  145.         /// pqTop = pq.updateTop();
  146.         /// </pre>
  147.         ///
  148.         /// <b>NOTE:</b> if this method returns a non-null value, it will be called by
  149.         /// {@link #Initialize(int)} {@link #Size()} times, relying on a new object to
  150.         /// be returned and will not check if it's null again. Therefore you should
  151.         /// ensure any call to this method creates a new instance and behaves
  152.         /// consistently, e.g., it cannot return null if it previously returned
  153.         /// non-null.
  154.         ///
  155.         /// </summary>
  156.         /// <returns> the sentinel object to use to pre-populate the queue, or null if
  157.         /// sentinel objects are not supported.
  158.         /// </returns>
  159.         protected internal virtual T GetSentinelObject()
  160.         {
  161.             return default(T);
  162.         }
  163.  
  164.         /// <summary>Subclass constructors must call this. </summary>
  165.         protected internal void Initialize(int maxSize)
  166.         {
  167.             size = 0;
  168.             int heapSize;
  169.             if (0 == maxSize)
  170.                 // We allocate 1 extra to avoid if statement in top()
  171.                 heapSize = 2;
  172.             else
  173.             {
  174.                 if (maxSize == Int32.MaxValue)
  175.                 {
  176.                     // Don't wrap heapSize to -1, in this case, which
  177.                     // causes a confusing NegativeArraySizeException.
  178.                     // Note that very likely this will simply then hit
  179.                     // an OOME, but at least that's more indicative to
  180.                     // caller that this values is too big.  We don't +1
  181.                     // in this case, but it's very unlikely in practice
  182.                     // one will actually insert this many objects into
  183.                     // the PQ:
  184.                     heapSize = Int32.MaxValue;
  185.                 }
  186.                 else
  187.                 {
  188.                     // NOTE: we add +1 because all access to heap is
  189.                     // 1-based not 0-based.  heap[0] is unused.
  190.                     heapSize = maxSize + 1;
  191.                 }
  192.             }
  193.             heap = new T[heapSize];
  194.             this.maxSize = maxSize;
  195.  
  196.             // If sentinel objects are supported, populate the queue with them
  197.             T sentinel = GetSentinelObject();
  198.             if (sentinel != null && !sentinel.Equals(default(T)))
  199.             {
  200.                 heap[1] = sentinel;
  201.                 for (int i = 2; i < heap.Length; i++)
  202.                 {
  203.                     heap[i] = GetSentinelObject();
  204.                 }
  205.                 size = maxSize;
  206.             }
  207.         }
  208.  
  209.         /// <summary> Adds an Object to a PriorityQueue in log(size) time. If one tries to add
  210.         /// more objects than maxSize from initialize a RuntimeException
  211.         /// (ArrayIndexOutOfBound) is thrown.
  212.         ///
  213.         /// </summary>
  214.         /// <deprecated> use {@link #Add(Object)} which returns the new top object,
  215.         /// saving an additional call to {@link #Top()}.
  216.         /// </deprecated>
  217.         [Obsolete("use Add(Object) which returns the new top object, saving an additional call to Top().")]
  218.         public void Put(T element)
  219.         {
  220.             size++;
  221.             heap[size] = element;
  222.             UpHeap();
  223.         }
  224.  
  225.         /// <summary> Adds an Object to a PriorityQueue in log(size) time. If one tries to add
  226.         /// more objects than maxSize from initialize an
  227.         /// {@link ArrayIndexOutOfBoundsException} is thrown.
  228.         ///
  229.         /// </summary>
  230.         /// <returns> the new 'top' element in the queue.
  231.         /// </returns>
  232.         public T Add(T element)
  233.         {
  234.             size++;
  235.             heap[size] = element;
  236.             UpHeap();
  237.             return heap[1];
  238.         }
  239.  
  240.         /// <summary> Adds element to the PriorityQueue in log(size) time if either the
  241.         /// PriorityQueue is not full, or not lessThan(element, top()).
  242.         ///
  243.         /// </summary>
  244.         /// <param name="element">
  245.         /// </param>
  246.         /// <returns> true if element is added, false otherwise.
  247.         /// </returns>
  248.         /// <deprecated> use {@link #InsertWithOverflow(Object)} instead, which
  249.         /// encourages objects reuse.
  250.         /// </deprecated>
  251.         [Obsolete("use InsertWithOverflow(Object) instead, which encourages objects reuse.")]
  252.         public virtual bool Insert(T element)
  253.         {
  254.             return !element.Equals(InsertWithOverflow(element));
  255.         }
  256.  
  257.         /// <summary> insertWithOverflow() is the same as insert() except its
  258.         /// return value: it returns the object (if any) that was
  259.         /// dropped off the heap because it was full. This can be
  260.         /// the given parameter (in case it is smaller than the
  261.         /// full heap's minimum, and couldn't be added), or another
  262.         /// object that was previously the smallest value in the
  263.         /// heap and now has been replaced by a larger one, or null
  264.         /// if the queue wasn't yet full with maxSize elements.
  265.         /// </summary>
  266.         public virtual T InsertWithOverflow(T element)
  267.         {
  268.             if (size < maxSize)
  269.             {
  270.                 Put(element);
  271.                 return default(T);
  272.             }
  273.             else if (size > 0 && !LessThan(element, heap[1]))
  274.             {
  275.                 T ret = heap[1];
  276.                 heap[1] = element;
  277.                 AdjustTop();
  278.                 return ret;
  279.             }
  280.             else
  281.             {
  282.                 return element;
  283.             }
  284.         }
  285.  
  286.         /// <summary>Returns the least element of the PriorityQueue in constant time. </summary>
  287.         public T Top()
  288.         {
  289.             // We don't need to check size here: if maxSize is 0,
  290.             // then heap is length 2 array with both entries null.
  291.             // If size is 0 then heap[1] is already null.
  292.             return heap[1];
  293.         }
  294.  
  295.         /// <summary>Removes and returns the least element of the PriorityQueue in log(size)
  296.         /// time.
  297.         /// </summary>
  298.         public T Pop()
  299.         {
  300.             if (size > 0)
  301.             {
  302.                 T result = heap[1]; // save first value
  303.                 heap[1] = heap[size]; // move last to first
  304.                 heap[size] = default(T); // permit GC of objects
  305.                 size--;
  306.                 DownHeap(); // adjust heap
  307.                 return result;
  308.             }
  309.             else
  310.                 return default(T);
  311.         }
  312.  
  313.         /// <summary> Should be called when the Object at top changes values. Still log(n) worst
  314.         /// case, but it's at least twice as fast to
  315.         ///
  316.         /// <pre>
  317.         /// pq.top().change();
  318.         /// pq.adjustTop();
  319.         /// </pre>
  320.         ///
  321.         /// instead of
  322.         ///
  323.         /// <pre>
  324.         /// o = pq.pop();
  325.         /// o.change();
  326.         /// pq.push(o);
  327.         /// </pre>
  328.         ///
  329.         /// </summary>
  330.         /// <deprecated> use {@link #UpdateTop()} which returns the new top element and
  331.         /// saves an additional call to {@link #Top()}.
  332.         /// </deprecated>
  333.         [Obsolete("use UpdateTop() which returns the new top element and saves an additional call to Top()")]
  334.         public void AdjustTop()
  335.         {
  336.             DownHeap();
  337.         }
  338.  
  339.         /// <summary> Should be called when the Object at top changes values. Still log(n) worst
  340.         /// case, but it's at least twice as fast to
  341.         ///
  342.         /// <pre>
  343.         /// pq.top().change();
  344.         /// pq.updateTop();
  345.         /// </pre>
  346.         ///
  347.         /// instead of
  348.         ///
  349.         /// <pre>
  350.         /// o = pq.pop();
  351.         /// o.change();
  352.         /// pq.push(o);
  353.         /// </pre>
  354.         ///
  355.         /// </summary>
  356.         /// <returns> the new 'top' element.
  357.         /// </returns>
  358.         public T UpdateTop()
  359.         {
  360.             DownHeap();
  361.             return heap[1];
  362.         }
  363.  
  364.         /// <summary>Returns the number of elements currently stored in the PriorityQueue. </summary>
  365.         public int Size()
  366.         {
  367.             return size;
  368.         }
  369.  
  370.         /// <summary>Removes all entries from the PriorityQueue. </summary>
  371.         public void Clear()
  372.         {
  373.             for (int i = 0; i <= size; i++)
  374.             {
  375.                 heap[i] = default(T);
  376.             }
  377.             size = 0;
  378.         }
  379.  
  380.         private void UpHeap()
  381.         {
  382.             int i = size;
  383.             T node = heap[i]; // save bottom node
  384.             int j = URShift(i, 1);
  385.             while (j > 0 && LessThan(node, heap[j]))
  386.             {
  387.                 heap[i] = heap[j]; // shift parents down
  388.                 i = j;
  389.                 j = URShift(j, 1);
  390.             }
  391.             heap[i] = node; // install saved node
  392.         }
  393.  
  394.         private void DownHeap()
  395.         {
  396.             int i = 1;
  397.             T node = heap[i]; // save top node
  398.             int j = i << 1; // find smaller child
  399.             int k = j + 1;
  400.             if (k <= size && LessThan(heap[k], heap[j]))
  401.             {
  402.                 j = k;
  403.             }
  404.             while (j <= size && LessThan(heap[j], node))
  405.             {
  406.                 heap[i] = heap[j]; // shift up child
  407.                 i = j;
  408.                 j = i << 1;
  409.                 k = j + 1;
  410.                 if (k <= size && LessThan(heap[k], heap[j]))
  411.                 {
  412.                     j = k;
  413.                 }
  414.             }
  415.             heap[i] = node; // install saved node
  416.         }
  417.  
  418.         public int URShift(int number, int bits)
  419.         {
  420.             return (int)(((uint)number) >> bits);
  421.         }
  422.     }
  423. }
  424. #endregion
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement