Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- This code relates to the following DBA.StackExchange answer:
- http://dba.stackexchange.com/questions/131759/is-there-a-sql-server-implementation-of-the-longest-common-substring-problem/131766#131766
- A T-SQL installation script (no external DLL) containing only this User-Defined Aggregate (UDA) is located at:
- http://pastebin.com/wnLwT1GM
- Date: 2016-03-16
- Version: 1.0.3
- Copyright (c) 2016 Sql Quantum Leap. All rights reserved.
- http://www.SqlQuantumLeap.com
- */
- using System;
- using System.Collections.Generic;
- using System.Data.SqlTypes;
- using System.IO;
- using System.Xml;
- using Microsoft.SqlServer.Server;
- [Serializable]
- [Microsoft.SqlServer.Server.SqlUserDefinedAggregate(Format.UserDefined,
- IsInvariantToDuplicates = true, IsInvariantToNulls = true, IsInvariantToOrder = true,
- IsNullIfEmpty = false, MaxByteSize = -1)]
- public struct LongestCommonSubstring : IBinarySerialize
- {
- private bool _IsEmpty;
- private string _TempFirstValue;
- private bool _IsFirstComparison;
- private List<string> _Matches;
- private bool _HasMerged;
- private bool? _ReturnAllSubstrings;
- public void Init()
- {
- _IsEmpty = false;
- _TempFirstValue = String.Empty;
- _IsFirstComparison = true;
- _Matches = new List<string>();
- _HasMerged = false;
- _ReturnAllSubstrings = null;
- return;
- }
- public void Accumulate([SqlFacet(MaxSize = 4000)] SqlString SomeString, SqlBoolean ReturnAllSubstrings)
- {
- // grab option on first call only to avoid potential "odd" behavior
- if (!_ReturnAllSubstrings.HasValue)
- {
- if (ReturnAllSubstrings.IsTrue)
- {
- _ReturnAllSubstrings = true;
- }
- else
- {
- _ReturnAllSubstrings = false;
- }
- }
- if (SomeString.IsNull || _IsEmpty)
- {
- return;
- }
- if (SomeString.Value.Trim() == String.Empty)
- {
- _IsEmpty = true;
- _Matches.Clear();
- _TempFirstValue = String.Empty;
- return;
- }
- if (_IsFirstComparison)
- {
- if (_TempFirstValue == String.Empty)
- {
- _TempFirstValue = SomeString.Value;
- return;
- }
- if (ExtractSubstrings(SomeString.Value, _TempFirstValue) == 0)
- {
- _IsEmpty = true;
- }
- _TempFirstValue = String.Empty;
- _IsFirstComparison = false;
- return;
- }
- RemoveMissingMatches(SomeString.Value);
- if (_Matches.Count == 0)
- {
- _IsEmpty = true;
- }
- return;
- }
- private void RemoveMissingMatches(string SearchIn)
- {
- for (int _Index = 0; _Index < _Matches.Count; _Index++)
- {
- if (SearchIn.Length >= _Matches[_Index].Length &&
- SearchIn.IndexOf(_Matches[_Index], StringComparison.InvariantCultureIgnoreCase) >= 0)
- {
- continue;
- }
- _Matches.RemoveAt(_Index);
- _Index--; // prevent skipping due to following entries shifting down 1
- }
- return;
- }
- private int ExtractSubstrings(string SearchIn, string SearchFor)
- {
- string _TempString;
- int _TempMaxLength = 0;
- bool _AlreadyInCollection;
- if (SearchIn.Length < SearchFor.Length)
- {
- // switch
- _TempString = SearchFor;
- SearchFor = SearchIn;
- SearchIn = _TempString;
- }
- for (int _SearchForLength = SearchFor.Length; _SearchForLength > 0 ; _SearchForLength--)
- {
- for (int _Index = 0; (_Index + _SearchForLength) <= SearchFor.Length; _Index++)
- {
- _TempString = SearchFor.Substring(_Index, _SearchForLength);
- if (SearchIn.IndexOf(_TempString, StringComparison.InvariantCultureIgnoreCase) >= 0)
- {
- if (_TempMaxLength == 0)
- {
- _TempMaxLength = _SearchForLength;
- }
- _AlreadyInCollection = false;
- for (int _MatchIndex = 0; _MatchIndex < _Matches.Count; _MatchIndex++)
- {
- if(_Matches[_MatchIndex].Equals(_TempString, StringComparison.InvariantCultureIgnoreCase))
- {
- _AlreadyInCollection = true;
- break;
- }
- }
- if (!_AlreadyInCollection) // no duplicates
- {
- _Matches.Add(_TempString);
- }
- }
- }
- }
- return _TempMaxLength;
- }
- private List<int> GetMaxLengthItems()
- {
- int _MaxMatchLength = 0;
- List<int> _MaxLengthMatches = new List<int>();
- for (int _Index = 0; _Index < _Matches.Count; _Index++)
- {
- if (_Matches[_Index].Length > _MaxMatchLength)
- {
- _MaxLengthMatches.Clear();
- _MaxLengthMatches.Add(_Index);
- _MaxMatchLength = _Matches[_Index].Length;
- continue;
- }
- if (_Matches[_Index].Length == _MaxMatchLength)
- {
- _MaxLengthMatches.Add(_Index);
- }
- }
- return _MaxLengthMatches;
- }
- public void Merge (LongestCommonSubstring Incoming)
- {
- _HasMerged = true;
- if (_IsEmpty || Incoming._IsEmpty)
- {
- _IsEmpty = true;
- _Matches.Clear();
- _TempFirstValue = String.Empty;
- return;
- }
- if (_IsFirstComparison)
- {
- if (_TempFirstValue == String.Empty)
- {
- if (Incoming._IsFirstComparison)
- {
- _TempFirstValue = Incoming._TempFirstValue;
- }
- else
- {
- _IsFirstComparison = false;
- _Matches = Incoming._Matches;
- }
- return;
- }
- else
- {
- if (Incoming._IsFirstComparison)
- {
- if (Incoming._TempFirstValue != String.Empty)
- {
- Accumulate(Incoming._TempFirstValue, _ReturnAllSubstrings.Value);
- }
- }
- else
- {
- string _Temp = _TempFirstValue;
- _TempFirstValue = String.Empty;
- _IsFirstComparison = false;
- _Matches = Incoming._Matches;
- Accumulate(_Temp, _ReturnAllSubstrings.Value);
- }
- return;
- } // if (_TempFirstValue == String.Empty) else
- } // if (_IsFirstComparison)
- else
- {
- if (Incoming._IsFirstComparison)
- {
- if (Incoming._TempFirstValue != String.Empty)
- {
- Accumulate(Incoming._TempFirstValue, _ReturnAllSubstrings.Value);
- }
- }
- else
- {
- bool _MatchExists;
- for (int _Index = 0; _Index < _Matches.Count; _Index++)
- {
- _MatchExists = false;
- for (int _IncomingIndex = 0; _IncomingIndex < Incoming._Matches.Count; _IncomingIndex++)
- {
- if (String.Equals(_Matches[_Index], Incoming._Matches[_IncomingIndex],
- StringComparison.InvariantCultureIgnoreCase))
- {
- _MatchExists = true;
- break;
- }
- }
- if (!_MatchExists)
- {
- _Matches.RemoveAt(_Index);
- _Index--; // prevent skipping due to following entries shifting down 1
- }
- }
- if (_Matches.Count == 0)
- {
- _IsEmpty = true;
- }
- } // if (Incoming._IsFirstComparison) else
- } // if (_IsFirstComparison) else
- return;
- }
- public SqlXml Terminate()
- {
- if (_IsEmpty)
- {
- return new SqlXml(XmlReader.Create(new StringReader("<Items Merged=\"" + _HasMerged.ToString() + "\"></Items>")));
- }
- if (!_IsEmpty && _IsFirstComparison)
- {
- return SqlXml.Null;
- }
- List<int> _MatchIndexesToReturn = GetMaxLengthItems();
- XmlDocument _AllItems = new XmlDocument();
- XmlElement _Items = _AllItems.CreateElement("Items");
- _Items.SetAttribute("Merged", _HasMerged.ToString());
- if (!_ReturnAllSubstrings.Value)
- {
- for (int _Index = 0; _Index < _MatchIndexesToReturn.Count; _Index++)
- {
- XmlElement _TempElement = _AllItems.CreateElement("Item");
- _TempElement.InnerText = _Matches[_MatchIndexesToReturn[_Index]];
- _Items.AppendChild(_TempElement);
- }
- }
- else
- {
- for (int _Index = 0; _Index < _Matches.Count; _Index++)
- {
- XmlElement _TempElement = _AllItems.CreateElement("Item");
- _TempElement.InnerText = _Matches[_Index];
- _TempElement.SetAttribute("IsLongest", _MatchIndexesToReturn.Contains(_Index).ToString());
- _Items.AppendChild(_TempElement);
- }
- }
- _AllItems.AppendChild(_Items);
- return new SqlXml(XmlReader.Create(new StringReader(_AllItems.OuterXml)));
- }
- public void Read(BinaryReader Reader)
- {
- _ReturnAllSubstrings = Reader.ReadBoolean();
- _HasMerged = Reader.ReadBoolean();
- _IsEmpty = Reader.ReadBoolean();
- // no sense in doing extra work if there can't be any matches
- if (!_IsEmpty)
- {
- _TempFirstValue = Reader.ReadString();
- _IsFirstComparison = Reader.ReadBoolean();
- int _TempMatchCount = Reader.ReadInt32();
- _Matches = new List<string>();
- for (int _Index = 0; _Index < _TempMatchCount; _Index++)
- {
- _Matches.Add(Reader.ReadString());
- }
- }
- return;
- }
- public void Write(BinaryWriter Writer)
- {
- Writer.Write(_ReturnAllSubstrings.Value); // bool
- Writer.Write(_HasMerged); // bool
- Writer.Write(_IsEmpty); // bool
- // no sense in doing extra work if there can't be any matches
- if (!_IsEmpty)
- {
- Writer.Write(_TempFirstValue); // string
- Writer.Write(_IsFirstComparison); // bool
- Writer.Write(_Matches.Count); // int
- for (int _Index = 0; _Index < _Matches.Count; _Index++)
- {
- Writer.Write(_Matches[_Index]);
- }
- }
- return;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement