Advertisement
andruhovski

HTML Comparison

May 27th, 2019
252
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.69 KB | None | 0 0
  1. using System;
  2. using System.Collections.Generic;
  3. using System.IO;
  4. using System.Linq;
  5. using System.Threading.Tasks;
  6. using Aspose.App.Models;
  7. using Aspose.Html;
  8. using Tools.Foundation.Models;
  9.  
  10. namespace Aspose.App.Controllers
  11. {
  12. internal class AsposeHtmlComparisonController : ApiControllerBase
  13. {
  14. public async Task<Response> Compare(string fileName1, string fileName2, string folderName)
  15. {
  16. Aspose.App.Models.License.SetAsposeHtmlLicense();
  17. var comparedDocument = string.Format("{0}_compare_to_{1}.html",
  18. Path.GetFileNameWithoutExtension(fileName1), Path.GetFileNameWithoutExtension(fileName2));
  19.  
  20. return await Process(this.GetType().Name, comparedDocument, folderName, ".html", false, false,
  21. AsposeHTML + ComparisonApp, ProductFamilyNameKeysEnum.html, "Compare",
  22. (inFilePath, outPath, zipOutFolder) =>
  23. {
  24. var tc1 = GetTextContent(AppSettings.WorkingDirectory + folderName + "/" + fileName1);
  25. var tc2 = GetTextContent(AppSettings.WorkingDirectory + folderName + "/" + fileName2);
  26. var count = Math.Min(tc1.Count, tc2.Count);
  27. var result = new Aspose.Html.HTMLDocument();
  28. for (int i = 0; i < count; i++)
  29. {
  30. var originalStr = tc1[i].Trim();
  31. var modifiedStr = tc2[i].Trim();
  32. var changes = Levenshtein3(originalStr, modifiedStr);
  33. var modifications = new System.Collections.Generic.List<Modification>();
  34. int pos1 = 0, pos2 = 0;
  35. var current = new Modification
  36. {
  37. Status = changes.route[0]
  38. };
  39. for (int ii = 0; ii < changes.route.Length; ii++)
  40. {
  41. var status = changes.route[ii];
  42. char symbol = ' ';
  43. if (changes.route[ii] == '=')
  44. {
  45. symbol = originalStr[pos1++];
  46. pos2++;
  47. }
  48. if (changes.route[ii] == 'I')
  49. {
  50. symbol = modifiedStr[pos2++];
  51. }
  52. if (changes.route[ii] == 'D')
  53. {
  54. symbol = originalStr[pos1++];
  55. pos2++;
  56. }
  57. if (changes.route[ii] == 'R')
  58. {
  59. symbol = modifiedStr[pos2++];
  60. pos1++;
  61. }
  62. if (current.Status == status)
  63. {
  64. current.Value += symbol;
  65. }
  66. else
  67. {
  68. modifications.Add(current);
  69. current = new Modification
  70. {
  71. Status = status,
  72. Value = symbol.ToString()
  73. };
  74. }
  75. }
  76. modifications.Add(current);
  77. var sb = new System.Text.StringBuilder();
  78. foreach (var item in modifications)
  79. {
  80. switch (item.Status)
  81. {
  82. case '=':
  83. sb.AppendFormat("<span>{0}</span>", item.Value);
  84. break;
  85. case 'I':
  86. sb.AppendFormat("<ins>{0}</ins>", item.Value);
  87. break;
  88. case 'R':
  89. sb.AppendFormat("<mark>{0}</mark>", item.Value);
  90. break;
  91. case 'D':
  92. sb.AppendFormat("<del>{0}</del>", item.Value);
  93. break;
  94. default:
  95. break;
  96. }
  97. }
  98. result.Body.InnerHTML += sb.ToString();
  99. }
  100. result.Save(outPath);
  101. });
  102. }
  103. private static List<string> GetTextContent(string fileName)
  104. {
  105. var document = new HTMLDocument(System.IO.File.ReadAllText(fileName), string.Empty);
  106.  
  107. var scriptsElements = document.GetElementsByTagName("script");
  108. foreach (var element in scriptsElements)
  109. {
  110. element.TextContent = string.Empty;
  111. }
  112. var styleElements = document.GetElementsByTagName("style");
  113. foreach (var element in styleElements)
  114. {
  115. element.TextContent = string.Empty;
  116. }
  117.  
  118. var textFragments = new List<string>();
  119.  
  120. foreach (var element in document.Children)
  121. {
  122. textFragments.Add(element.TextContent);
  123. }
  124. return textFragments;
  125. }
  126. private static Prescription Levenshtein3(string S1, string S2)
  127. {
  128. int m = S1.Length, n = S2.Length;
  129. int h = (int)Math.Sqrt(m + 1);
  130. int[,] D = new int[h + 1, n + 1];
  131. char[,] P = new char[h + 1, n + 1];
  132.  
  133. int d = 0;
  134. var route = new System.Text.StringBuilder();
  135. int iPos = m, jPos = n;
  136. do
  137. {
  138. for (int i = 0; i <= jPos; i++)
  139. {
  140. D[0, i] = i;
  141. P[0, i] = 'I';
  142. }
  143. int index = 1;
  144. for (int i = 1; i <= iPos; i++)
  145. {
  146. for (int j = 0; j <= jPos; j++)
  147. {
  148. if (j == 0) D[index, j] = i;
  149. else
  150. {
  151. int cost = (S1[i - 1] != S2[j - 1]) ? 1 : 0;
  152. if (D[index, j - 1] < D[index - 1, j] && D[index, j - 1] < D[index - 1, j - 1] + cost)
  153. {
  154. //Insert
  155. D[index, j] = D[index, j - 1] + 1;
  156. P[index, j] = 'I';
  157. }
  158. else if (D[index - 1, j] < D[index - 1, j - 1] + cost)
  159. {
  160. //Remove
  161. D[index, j] = D[index - 1, j] + 1;
  162. P[index, j] = 'D';
  163. }
  164. else
  165. {
  166. //Replace or no ops
  167. D[index, j] = D[index - 1, j - 1] + cost;
  168. P[index, j] = (cost == 1) ? 'R' : '=';
  169. }
  170. }
  171. }
  172. if (i % h == 0)
  173. {
  174. //Allocate of memory for new lines and copying the last from the previous band to the first line of the new
  175. int[] vRow = new int[n + 1];
  176. char[] cRow = new char[n + 1];
  177. for (int j = 0; j <= n; j++)
  178. {
  179. vRow[j] = D[index, j];
  180. cRow[j] = P[index, j];
  181. }
  182. D = new int[h + 1, n + 1];
  183. P = new char[h + 1, n + 1];
  184. for (int j = 0; j <= n; j++)
  185. {
  186. D[0, j] = vRow[j];
  187. P[0, j] = cRow[j];
  188. }
  189. index = 0;
  190. }
  191. index++;
  192. }
  193. if (iPos == m && jPos == n) d = D[index - 1, n];
  194. //Restore prescriptions in the last _i - 1 lines
  195. while (index > 0 && iPos != 0 && jPos != 0)
  196. {
  197. char c = P[index - 1, jPos];
  198. route.Append(c);
  199. if (c == 'R' || c == '=')
  200. {
  201. iPos--;
  202. jPos--;
  203. index--;
  204. }
  205. else if (c == 'D')
  206. {
  207. iPos--;
  208. index--;
  209. }
  210. else
  211. {
  212. jPos--;
  213. }
  214. }
  215. } while ((iPos != 0) && (jPos != 0));
  216. return new Prescription(d, string.Join(string.Empty, route.ToString().ToCharArray().Reverse()));
  217. }
  218.  
  219. }
  220.  
  221. internal class Prescription
  222. {
  223. public string route;
  224. public int distance;
  225.  
  226. public Prescription(int distance, string route)
  227. {
  228. this.distance = distance;
  229. this.route = route;
  230. }
  231. }
  232. internal class Modification
  233. {
  234. public char Status;
  235. public string Value;
  236. }
  237.  
  238. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement