Advertisement
Guest User

Untitled

a guest
Jul 20th, 2019
84
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.01 KB | None | 0 0
  1. /**
  2. * Strip word BS
  3. */
  4. function stripMSWord(input) {
  5. // 1. remove line breaks / Mso classes
  6. var output = input.replace(/(\|\\r| class=(")?Mso[a-zA-Z]+(")?)/g, '');
  7. // 2. strip Word generated HTML comments
  8. output = output.replace(/<\!--(\s|.)*?-->/gim, '');
  9. output = output.replace(/<\!(\s|.)*?>/gim, '');
  10. // 3. remove tags leave content if any
  11. output = output.replace(/<(\/)*(meta|link|html|head|body|span|\\\\?xml:|xml|st1:|o:|w:|m:|v:|font)(\s|.)*?>/gim, '');
  12. // 4. Remove everything in between and including tags '<style(.)style(.)>'
  13. var badTags = ['style', 'script', 'applet', 'embed', 'noframes', 'noscript'];
  14. for (var i in badTags) {
  15. let tagStripper = new RegExp('<' + badTags[i] + '(\s|.)*?' + badTags[i] + '(.*?)>', 'gim');
  16. output = output.replace(tagStripper, '');
  17. }
  18.  
  19. // 5. remove attributes ' style="..."', align, start
  20. output = output.replace(/style='(\s|.)*?'/gim, '');
  21. output = output.replace(/ align=.*? /g, '');
  22. output = output.replace(/start='.*?'/g, '');
  23. return output;
  24. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement