SqlQuantumLeap

SQLCLR UDF to parse a string into NGrams

Jul 9th, 2016
92
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. /**************************************************************************************
  2.  This script relates to the following SQL Server Central forum thread:
  3.  Simple CLR request ( http://www.sqlservercentral.com/Forums/Topic1795321-3412-1.aspx )
  4.  
  5.  Written by: Solomon Rutzky
  6.  On: 2016-06-26
  7.  
  8.  This script is located at:   http://pastebin.com/Rd4G7T3B
  9. ***************************************************************************************/
  10.  
  11. using System; // Array, Object
  12. using System.Collections; // IEnumerable
  13. using System.Data.SqlTypes; // SqlChars, SqlInt32
  14. using Microsoft.SqlServer.Server; // SqlFacet
  15.  
  16. public class NGrams
  17. {
  18.     [Microsoft.SqlServer.Server.SqlFunction(FillRowMethodName = "NGramSolomon_FillRow",
  19.                                             TableDefinition = "Position INT, Token NVARCHAR(4000)")]
  20.     public static IEnumerable NGramSolomon([SqlFacet(MaxSize = 4000)] SqlChars InputString, SqlInt32 TokenSize)
  21.     {
  22.         if (InputString.IsNull || TokenSize.IsNull)
  23.         {
  24.             yield break;
  25.         }
  26.  
  27.         int _Take = TokenSize.Value;
  28.         int _Limit = ((int)InputString.Length - _Take);
  29.  
  30.         if (_Take < 1 || _Limit < 0)
  31.         {
  32.             yield break;
  33.         }
  34.  
  35.         ResultRow _Result = new ResultRow();
  36.  
  37.         char[] _Chars = InputString.Value;
  38.         char[] _Temp = new char[_Take];
  39.  
  40.         for (int _Index = 0; _Index <= _Limit; _Index++)
  41.         {
  42.             Array.Copy(_Chars, _Index, _Temp, 0, _Take);
  43.  
  44.             _Result.Position = (_Index + 1);
  45.             _Result.Token = new SqlChars(_Temp);
  46.  
  47.             yield return _Result;
  48.         }
  49.     }
  50.  
  51.     public class ResultRow
  52.     {
  53.         public int Position;
  54.         public SqlChars Token;
  55.     }
  56.  
  57.     public static void NGramSolomon_FillRow(Object obj, out SqlInt32 position, out SqlChars token)
  58.     {
  59.         ResultRow _Item = (ResultRow)obj;
  60.         position = new SqlInt32(_Item.Position);
  61.         token = _Item.Token;
  62.     }
  63. }
RAW Paste Data