Untitled

using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;

namespace tp
{
    public class FindSimilaryLookingWords
    {
        public FindSimilaryLookingWords(string word, List<string> words,
            out List<string> wordsSortedBySimilarity, out List<double> similarities)
        {
            wordsSortedBySimilarity = new List<string>();
            similarities = new List<double>();
            foreach (string comparedWord in words)
            {
                double similarity;
                var compute2 = new Compute2WordsSimilarity(word, comparedWord, out similarity);
                int index = similarities.Count;
                for (int i = 0; i < similarities.Count; i ++)
                {
                    if (similarity > similarities [i])
                    {
                        index = i;
                        break;
                    }
                }
                wordsSortedBySimilarity.Insert(index, comparedWord);
                similarities.Insert(index, similarity);
            }
        }
    }

    public class Compute2WordsSimilarity
    {
        public Compute2WordsSimilarity(string word1, string word2,
            // 0 = no similarity, 100 = equal
            out double similarity
            )
        {
            double maxSimilarity = 0;
            double importance;
            double removeFromMaxSimilarity = 0;

            if (word1 == null || word2 == null)
            {
                similarity = 0;
                return;
            }
            string word1Lc = word1.ToLower();
            string word2Lc = word2.ToLower();

            // if both words don't start with same letters remove 50 points
            importance = 30;
            maxSimilarity += importance;
            if ((getFirstChar(word1) != getFirstChar(word2)) && (getLastChar(word1) != getLastChar(word2)))
            {
                removeFromMaxSimilarity -= importance;
            }

            // if they don't start with same letter and end with same remove 30
            importance = 15;
            maxSimilarity += importance;
            if ((getFirstChar(word1) != getFirstChar(word2)) && (getLastChar(word1) == getLastChar(word2)))
            {
                removeFromMaxSimilarity -= importance;
            }

            // if they start with same letter and don't end with same remove 20
            importance = 10;
            maxSimilarity += importance;
            if ((getFirstChar(word1) == getFirstChar(word2)) && (getLastChar(word1) != getLastChar(word2)))
            {
                removeFromMaxSimilarity -= importance;
            }

            // words length difference: remove max 5 points
            importance = 5;
            maxSimilarity += importance;
            removeFromMaxSimilarity -= importance * (1 - Math.Min(word1.Length, word2.Length) / (double)Math.Max(word1.Length, word2.Length));

            // number of letters or sets shuffled: remove max 10 points
            List<int> setsMatchedIndexesWord1; // indexes in word1
            List<int> setsMatchedIndexesWord2; // indexes in word2
            List<int> setsLengths; // lengths are equal in both words, hence the match
            List<int> setsMissingIndexes; // indexes in word2
            List<int> setsMissingLengths;
            List<int> setsAddedIndexes; // indexes in word1
            List<int> setsAddedLengths;
            pairShuffledSets(word1Lc, word2Lc, out setsMatchedIndexesWord1, out setsMatchedIndexesWord2,
                out setsLengths, out setsMissingIndexes,
                out setsMissingLengths, out setsAddedIndexes, out setsAddedLengths);
            double countShuffled = 0;
            double sumShuffled = 0;
            double sumDistanceShuffle = 0;
            for (int i = 0; i < setsMatchedIndexesWord1.Count; i++)
            {
                int setIndexWord1 = setsMatchedIndexesWord1[i];
                int setIndexWord2 = setsMatchedIndexesWord2[i];
                //double movePercentage = (setIndexWord1 - setIndexWord2) /
                if (setIndexWord1 != setIndexWord2)
                {
                    sumDistanceShuffle += Math.Abs(setIndexWord1 - setIndexWord2);
                    sumShuffled += setsLengths [i];
                    countShuffled++;
                }
            }
            importance = 10;
            maxSimilarity += importance;
            removeFromMaxSimilarity -= importance * sumShuffled / (double)word1.Length;

            // summed distance of movement when shuffling: remove max 5 points
            importance = 10;
            maxSimilarity += importance;
            removeFromMaxSimilarity -= importance * sumDistanceShuffle / (double)word1.Length;

            // number of letters or sets removed: remove max 20 points
            importance = 100;
            maxSimilarity += importance;
            int sumRemoved = setsMissingLengths.Sum();
            removeFromMaxSimilarity -= importance * sumRemoved / (double)word1.Length;

            // number of letters or sets added: remove max 10 points
            importance = 10;
            maxSimilarity += importance;
            int sumAdded = setsAddedLengths.Sum();
            removeFromMaxSimilarity -= importance * sumAdded / (double)word1.Length;

            // number of breaks when shuffling (count of shuffled): remove max 5 points
            importance = 5;
            maxSimilarity += importance;
            removeFromMaxSimilarity -= importance * countShuffled / (double)word1.Length;

            // number of breaks when removing: remove max 5 points
            importance = 5;
            maxSimilarity += importance;
            removeFromMaxSimilarity -= importance * setsMissingIndexes.Count / (double)word1.Length;

            // number of breaks when adding: remove max 5 points
            importance = 5;
            maxSimilarity += importance;
            removeFromMaxSimilarity -= importance * setsAddedIndexes.Count / (double)word1.Length;

            // computer similarity
            similarity = 100 * (maxSimilarity + removeFromMaxSimilarity) / maxSimilarity;
        }

        private void pairShuffledSets(string word1, string word2,
            out List<int> setsMatchedIndexesWord1, // indexes in word1
            out List<int> setsMatchedIndexesWord2, // indexes in word2
            out List<int> setsLengths,
            out List<int> setsMissingIndexes, // indexes in word2
            out List<int> setsMissingLengths,
            out List<int> setsAddedIndexes, // indexes in word1
            out List<int> setsAddedLengths
            )
        {
            // vars
            setsMatchedIndexesWord1 = new List<int>();
            setsMatchedIndexesWord2 = new List<int>();
            setsLengths = new List<int>();
            setsMissingIndexes = new List<int>();
            setsMissingLengths = new List<int>();
            setsAddedIndexes = new List<int>();
            setsAddedLengths = new List<int>();
            var takenBits1 = new List<bool>(word1.Length);
            for (int i = 0; i < word1.Length; i++)
                takenBits1.Add(false);
            var takenBits2 = new List<bool>(word2.Length);
            for (int i = 0; i < word2.Length; i++)
                takenBits2.Add(false);
            bool areAnyBitsFree;
            areAnyBitsFree = true;

            // get setsMatchedIndexesWord1, setsMatchedIndexesWord2, setsLengths
            for (int blockSize = word1.Length; blockSize >= 1 && areAnyBitsFree; blockSize--)// for each possible block length
            {
                for (int x1 = 0; x1 <= word1.Length - blockSize && areAnyBitsFree; x1++) // take a block from every possible position in word1
                {
                    string block = word1.Substring(x1, blockSize);
                    int startIndex = 0;
                    bool isBlockTaken = true;
                    while (isBlockTaken)
                    {
                        int x2 = word2.IndexOf(block, startIndex, StringComparison.InvariantCultureIgnoreCase);
                        if (x2 != -1)
                        {
                            // get isBlockTaken. check that this block is not already taken by some other block. if it is try to find next block in word2.
                            isBlockTaken = false;
                            for (int i = x2; i < x2 + block.Length; i++)
                            {
                                if (takenBits2[i])
                                {
                                    isBlockTaken = true;
                                    startIndex = x2 + 1;
                                    break;
                                }
                            }

                            // update takenBits1
                            if (!isBlockTaken)
                            {
                                for (int i = x1; i < x1 + block.Length; i++)
                                    takenBits1[i] = true;
                            }

                            // update takenBits2
                            if (!isBlockTaken)
                            {
                                for (int i = x2; i < x2 + block.Length; i++)
                                    takenBits2[i] = true;
                            }

                            // update areAnyBitsFree
                            areAnyBitsFree = takenBits1.IndexOf(false) != -1 && takenBits2.IndexOf(false) != -1;

                            // we found a match of two parts. update out variables
                            if (!isBlockTaken)
                            {
                                setsMatchedIndexesWord1.Add(x1);
                                setsMatchedIndexesWord2.Add(x2);
                                setsLengths.Add(blockSize);
                            }
                        }
                        else
                            break;
                    }
                }
            }

            // get setsMissingIndexes and setsMissingLengths from takenBits1
            int wx1 = 0;
            for (; wx1 < takenBits1.Count; wx1++)
            {
                if (!takenBits1[wx1])
                {
                    setsMissingIndexes.Add(wx1);
                    var missingSetLen = 0;
                    for (; wx1 < takenBits1.Count && !takenBits1[wx1]; wx1++)
                        missingSetLen++;
                    setsMissingLengths.Add(missingSetLen);
                }
            }

            // get setsAddedIndexes and setsAddedLengths from takenBits2
            int wx2 = 0;
            for (; wx2 < takenBits2.Count; wx2++)
            {
                if (!takenBits2[wx2])
                {
                    setsAddedIndexes.Add(wx2);
                    var addedSetLen = 0;
                    for (; wx2 < takenBits1.Count && !takenBits1[wx2]; wx2++)
                        addedSetLen++;
                    setsAddedLengths.Add(addedSetLen);
                }
            }
        }

        private char getFirstChar(string s)
        {
            if (s == null || s.Length == 0)
                return '\0';
            return s[0];
        }

        private char getLastChar(string s)
        {
            if (s == null || s.Length == 0)
                return '\0';
            return s[s.Length - 1];
        }
    }
}