SHARE
TWEET

Delete all Spam Wordpress Comments via XMLRPC API w/ Python!

TankorSmash Jul 21st, 2013 123 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. from wordpress_xmlrpc import Client
  2. from wordpress_xmlrpc.methods.posts import *
  3. from wordpress_xmlrpc.methods.users import *
  4. from wordpress_xmlrpc.methods.comments import *
  5. from wordpress_xmlrpc.methods.pages import *
  6.  
  7. from collections import Counter
  8.  
  9. DANGER_WORDS = ["glasses", "longchamp", "oakleys", "oakley", "-online", "4u", "adipex", "advicer", "baccarrat", "blackjack", "bllogspot", "booker", "byob", "car-rental-e-site", "car-rentals-e-site", "carisoprodol", "casino", "casinos", "chatroom", "cialis", "coolcoolhu", "coolhu", "credit-card-debt", "credit-report-4u", "cwas", "cyclen", "cyclobenzaprine", "dating-e-site", "day-trading", "debt-consolidation", "debt-consolidation-consultant", "discreetordering", "duty-free", "dutyfree", "equityloans", "fioricet", "flowers-leading-site", "freenet-shopping", "freenet", "gambling-", "hair-loss", "health-insurancedeals-4u", "homeequityloans", "homefinance", "holdem", "holdempoker", "holdemsoftware", "holdemtexasturbowilson", "hotel-dealse-site", "hotele-site", "hotelse-site", "incest", "insurance-quotesdeals-4u", "insurancedeals-4u", "jrcreations", "levitra", "macinstruct", "mortgage-4-u", "mortgagequotes", "online-gambling", "onlinegambling-4u", "ottawavalleyag", "ownsthis", "palm-texas-holdem-game", "paxil", "penis", "pharmacy", "phentermine", "poker-chip", "poze", "pussy", "rental-car-e-site", "ringtones", "roulette", "shemale", "shoes", "slot-machine", "texas-holdem", "thorcarlson", "top-site", "top-e-site", "tramadol", "trim-spa", "ultram", "valeofglamorganconservatives", "viagra", "vioxx", "xanax", "zolus", ]
  10. DRUG_WORDS = ["ambien</a>", "vuitton", "ambien", "viagra", "cialis", "drug", "hydrocodone", "klonopin", "pill", "withdrawal", "ativan", 'valium', 'clomid', 'rel="nofollow">buy', 'valium</a>', 'xanax', 'marcjacobs', 'watches', 'discount', 'tadalafil', 'premature', 'ejaculation']
  11. RISKY_WORDS = ["sex", "free", "online"]
  12.  
  13.  
  14. def get_all_comments_per_post(client, post_id=""):
  15.     """
  16.    returns a single key dict with the comments inside
  17.    """
  18.     data = {'filter' : post_id,
  19.             'number': 2000,
  20.             'status': 'hold'}
  21.  
  22.     resp = client.call(GetComments(data))
  23.  
  24.     return resp
  25.  
  26.  
  27. def delete_comment(client, comment_id):
  28.     data = {'comment_id':int(comment_id)}
  29.     resp = client.call(DeleteComment(comment_id))
  30.  
  31.  
  32. def delete_comments_containing(client, list_of_words):
  33.     print 'searching for comments'
  34.     comments = get_all_comments_per_post(wp)
  35.  
  36.     print 'search complete'
  37.     print 'count:', len(comments)
  38.     for comment in comments:
  39.         words = []
  40.  
  41.         words += comment.content.split()
  42.         words += comment.author.split()
  43.         words += [comment.author_email]
  44.         words += [comment.author_url]
  45.         for word in words:
  46.             if word.lower() in list_of_words or \
  47.             any([l_word for l_word in list_of_words if word in l_word]):
  48.                 print "DANGER FROM", comment.author.encode(errors="replace"), "DELETING with ID", comment.id
  49.                 delete_comment(wp, comment.id)
  50.                 break
  51.  
  52.  
  53. def top_20_common_words(comments):
  54.     """
  55.    bonus method for getting the most common words in the comments you pass
  56.    into it, which is useful when you're trying to build a list of all the spammy
  57.    words to auto parse out of your blog
  58.  
  59.    returns all words longer than 3 characters
  60.    """
  61.  
  62.     all_words = []
  63.     for comment in comments:
  64.         for word in comment.content.split():
  65.             all_words.append(word)
  66.  
  67.     word_counter = Counter(all_words)
  68.  
  69.     return [word[0] for word in word_counter.most_common() if len(word[0]) >= 4][:20]
  70.  
  71.  
  72. wp = Client(r'http://www.YOURBLOGURL.com/xmlrpc.php', 'ADMIN_USERNAME', 'ADMIN_PASSWORD')
  73. resp = wp.call(GetCommentStatusList())
  74. print "All Possible Comment Statuses:", resp
  75.  
  76. wordlist = DANGER_WORDS + DRUG_WORDS
  77. wordlist = [word.lower() for word in wordlist]
  78.  
  79. print 'Searching for Comments...'
  80. delete_comments_containing(wp, wordlist)
  81. print 'Search Complete!'
RAW Paste Data
Pastebin PRO Summer Special!
Get 60% OFF on Pastebin PRO accounts!
Top