daily pastebin goal
49%
SHARE
TWEET

Delete all Spam Wordpress Comments via XMLRPC API w/ Python!

TankorSmash Jul 21st, 2013 147 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. from wordpress_xmlrpc import Client
  2. from wordpress_xmlrpc.methods.posts import *
  3. from wordpress_xmlrpc.methods.users import *
  4. from wordpress_xmlrpc.methods.comments import *
  5. from wordpress_xmlrpc.methods.pages import *
  6.  
  7. from collections import Counter
  8.  
  9. DANGER_WORDS = ["glasses", "longchamp", "oakleys", "oakley", "-online", "4u", "adipex", "advicer", "baccarrat", "blackjack", "bllogspot", "booker", "byob", "car-rental-e-site", "car-rentals-e-site", "carisoprodol", "casino", "casinos", "chatroom", "cialis", "coolcoolhu", "coolhu", "credit-card-debt", "credit-report-4u", "cwas", "cyclen", "cyclobenzaprine", "dating-e-site", "day-trading", "debt-consolidation", "debt-consolidation-consultant", "discreetordering", "duty-free", "dutyfree", "equityloans", "fioricet", "flowers-leading-site", "freenet-shopping", "freenet", "gambling-", "hair-loss", "health-insurancedeals-4u", "homeequityloans", "homefinance", "holdem", "holdempoker", "holdemsoftware", "holdemtexasturbowilson", "hotel-dealse-site", "hotele-site", "hotelse-site", "incest", "insurance-quotesdeals-4u", "insurancedeals-4u", "jrcreations", "levitra", "macinstruct", "mortgage-4-u", "mortgagequotes", "online-gambling", "onlinegambling-4u", "ottawavalleyag", "ownsthis", "palm-texas-holdem-game", "paxil", "penis", "pharmacy", "phentermine", "poker-chip", "poze", "pussy", "rental-car-e-site", "ringtones", "roulette", "shemale", "shoes", "slot-machine", "texas-holdem", "thorcarlson", "top-site", "top-e-site", "tramadol", "trim-spa", "ultram", "valeofglamorganconservatives", "viagra", "vioxx", "xanax", "zolus", ]
  10. DRUG_WORDS = ["ambien</a>", "vuitton", "ambien", "viagra", "cialis", "drug", "hydrocodone", "klonopin", "pill", "withdrawal", "ativan", 'valium', 'clomid', 'rel="nofollow">buy', 'valium</a>', 'xanax', 'marcjacobs', 'watches', 'discount', 'tadalafil', 'premature', 'ejaculation']
  11. RISKY_WORDS = ["sex", "free", "online"]
  12.  
  13.  
  14. def get_all_comments_per_post(client, post_id=""):
  15.     """
  16.    returns a single key dict with the comments inside
  17.    """
  18.     data = {'filter' : post_id,
  19.             'number': 2000,
  20.             'status': 'hold'}
  21.  
  22.     resp = client.call(GetComments(data))
  23.  
  24.     return resp
  25.  
  26.  
  27. def delete_comment(client, comment_id):
  28.     data = {'comment_id':int(comment_id)}
  29.     resp = client.call(DeleteComment(comment_id))
  30.  
  31.  
  32. def delete_comments_containing(client, list_of_words):
  33.     print 'searching for comments'
  34.     comments = get_all_comments_per_post(wp)
  35.  
  36.     print 'search complete'
  37.     print 'count:', len(comments)
  38.     for comment in comments:
  39.         words = []
  40.  
  41.         words += comment.content.split()
  42.         words += comment.author.split()
  43.         words += [comment.author_email]
  44.         words += [comment.author_url]
  45.         for word in words:
  46.             if word.lower() in list_of_words or \
  47.             any([l_word for l_word in list_of_words if word in l_word]):
  48.                 print "DANGER FROM", comment.author.encode(errors="replace"), "DELETING with ID", comment.id
  49.                 delete_comment(wp, comment.id)
  50.                 break
  51.  
  52.  
  53. def top_20_common_words(comments):
  54.     """
  55.    bonus method for getting the most common words in the comments you pass
  56.    into it, which is useful when you're trying to build a list of all the spammy
  57.    words to auto parse out of your blog
  58.  
  59.    returns all words longer than 3 characters
  60.    """
  61.  
  62.     all_words = []
  63.     for comment in comments:
  64.         for word in comment.content.split():
  65.             all_words.append(word)
  66.  
  67.     word_counter = Counter(all_words)
  68.  
  69.     return [word[0] for word in word_counter.most_common() if len(word[0]) >= 4][:20]
  70.  
  71.  
  72. wp = Client(r'http://www.YOURBLOGURL.com/xmlrpc.php', 'ADMIN_USERNAME', 'ADMIN_PASSWORD')
  73. resp = wp.call(GetCommentStatusList())
  74. print "All Possible Comment Statuses:", resp
  75.  
  76. wordlist = DANGER_WORDS + DRUG_WORDS
  77. wordlist = [word.lower() for word in wordlist]
  78.  
  79. print 'Searching for Comments...'
  80. delete_comments_containing(wp, wordlist)
  81. print 'Search Complete!'
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top