Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- int ComputeLevenshteinDistance(string source, string target)
- {
- if ((source == null) || (target == null)) return 0;
- if ((source.Length == 0) || (target.Length == 0)) return 0;
- if (source == target) return source.Length;
- int sourceWordCount = source.Length;
- int targetWordCount = target.Length;
- // Step 1
- if (sourceWordCount == 0)
- return targetWordCount;
- if (targetWordCount == 0)
- return sourceWordCount;
- int[,] distance = new int[sourceWordCount + 1, targetWordCount + 1];
- // Step 2
- for (int i = 0; i <= sourceWordCount; distance[i, 0] = i++) ;
- for (int j = 0; j <= targetWordCount; distance[0, j] = j++) ;
- for (int i = 1; i <= sourceWordCount; i++)
- {
- for (int j = 1; j <= targetWordCount; j++)
- {
- // Step 3
- int cost = (target[j - 1] == source[i - 1]) ? 0 : 1;
- // Step 4
- distance[i, j] = Math.Min(Math.Min(distance[i - 1, j] + 1, distance[i, j - 1] + 1), distance[i - 1, j - 1] + cost);
- }
- }
- return distance[sourceWordCount, targetWordCount];
- }
- double CalculateSimilarity(string source, string target)
- {
- if ((source == null) || (target == null)) return 0.0;
- if ((source.Length == 0) || (target.Length == 0)) return 0.0;
- if (source == target) return 1.0;
- int stepsToSame = ComputeLevenshteinDistance(source, target);
- return (1.0 - ((double)stepsToSame / (double)Math.Max(source.Length, target.Length)));
- }
- string FileToString(string path)
- {
- BinaryReader reader = new BinaryReader(File.Open(path, FileMode.Open));
- byte[] data = reader.ReadBytes((int)reader.BaseStream.Length);
- reader.Close();
- return System.Text.Encoding.UTF8.GetString(data, 0, data.Length);
- }
- double CompareFiles(string path, string path2)
- {
- string data, data2;
- data = FileToString(path);
- data2 = FileToString(path2);
- return CalculateSimilarity(data, data2);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement