Advertisement
Guest User

Anonymous

a guest
Mar 28th, 2010
164
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.40 KB | None | 0 0
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3.  
  4.  
  5. import urllib
  6. from twill.commands import * # gives us go()
  7.  
  8. areas = ['auburn', 'bham', 'columbusga', 'dothan', 'shoals', 'gadsden', 'huntsville', 'mobile', 'montgomery', 'tuscaloosa', 'anchorage', 'fairbanks', 'kenai', 'juneau', 'flagstaff', 'mohave', 'phoenix', 'prescott', 'showlow', 'sierravista', 'tucson', 'yuma', 'fayar', 'fortsmith', 'jonesboro', 'littlerock', 'memphis', 'texarkana', 'sfbay', 'bakersfield', 'chico', 'fresno', 'goldcountry', 'hanford', 'humboldt', 'imperial', 'inlandempire', 'losangeles', 'mendocino', 'merced', 'modesto', 'monterey', 'orangecounty', 'palmsprings', 'redding', 'reno', 'sacramento', 'sandiego', 'slo', 'santabarbara', 'santamaria', 'siskiyou', 'stockton', 'susanville', 'ventura', 'visalia', 'yubasutter', 'boulder', 'cosprings', 'denver', 'eastco', 'fortcollins', 'rockies', 'pueblo', 'westslope', 'newlondon', 'hartford', 'newhaven', 'nwct', 'newyork', 'daytona', 'keys', 'fortmyers', 'gainesville', 'cfl', 'jacksonville', 'lakeland', 'lakecity', 'ocala', 'okaloosa', 'orlando', 'panamacity', 'pensacola', 'sarasota', 'miami', 'spacecoast', 'staugustine', 'tallahassee', 'tampa', 'treasure', 'albanyga', 'athensga', 'atlanta', 'augusta', 'brunswick', 'columbusga', 'macon', 'nwga', 'savannah', 'statesboro', 'valdosta', 'boise', 'eastidaho', 'lewiston', 'pullman', 'spokane', 'twinfalls', 'bn', 'chambana', 'chicago', 'decatur', 'lasalle', 'mattoon', 'peoria', 'quadcities', 'rockford', 'carbondale', 'springfieldil', 'stlouis', 'quincy', 'bloomington', 'evansville', 'fortwayne', 'indianapolis', 'kokomo', 'tippecanoe', 'muncie', 'richmondin', 'southbend', 'terrahaute', 'chicago', 'ames', 'cedarrapids', 'desmoines', 'dubuque', 'fortdodge', 'iowacity', 'masoncity', 'omaha', 'quadcities', 'siouxcity', 'ottumwa', 'waterloo', 'kansascity', 'lawrence', 'ksu', 'nwks', 'salina', 'seks', 'swks', 'topeka', 'wichita', 'bgky', 'cincinnati', 'eastky', 'huntington', 'lexington', 'louisville', 'owensboro', 'westky', 'batonrouge', 'cenla', 'houma', 'lafayette', 'lakecharles', 'monroe', 'neworleans', 'shreveport', 'annapolis', 'baltimore', 'chambersburg', 'easternshore', 'frederick', 'smd', 'westmd', 'washingtondc', 'boston', 'capecod', 'southcoast', 'westernmass', 'worcester', 'annarbor', 'battlecreek', 'centralmich', 'detroit', 'flint', 'grandrapids', 'holland', 'jxn', 'kalamazoo', 'lansing', 'monroemi', 'muskegon', 'nmi', 'porthuron', 'saginaw', 'southbend', 'swmi', 'thumb', 'up', 'bemidji', 'brainerd', 'duluth', 'fargo', 'mankato', 'minneapolis', 'rmn', 'marshall', 'stcloud', 'gulfport', 'hattiesburg', 'jackson', 'memphis', 'meridian', 'northmiss', 'natchez', 'columbiamo', 'joplin', 'kansascity', 'kirksville', 'loz', 'semo', 'springfield', 'stjoseph', 'stlouis', 'billings', 'bozeman', 'butte', 'greatfalls', 'helena', 'kalispell', 'missoula', 'montana', 'grandisland', 'lincoln', 'northplatte', 'omaha', 'scottsbluff', 'siouxcity', 'elko', 'lasvegas', 'reno', 'cnj', 'jerseyshore', 'newjersey', 'southjersey', 'newyork', 'albuquerque', 'clovis', 'farmington', 'lascruces', 'roswell', 'santafe', 'albany', 'binghamton', 'buffalo', 'catskills', 'chautauqua', 'elmira', 'fingerlakes', 'glensfalls', 'hudsonvalley', 'ithaca', 'longisland', 'newyork', 'oneonta', 'plattsburgh', 'potsdam', 'rochester', 'syracuse', 'twintiers', 'utica', 'watertown', 'asheville', 'boone', 'charlotte', 'eastnc', 'fayetteville', 'greensboro', 'hickory', 'onslow', 'outerbanks', 'raleigh', 'wilmington', 'winstonsalem', 'bismarck', 'fargo', 'grandforks', 'nd', 'akroncanton', 'ashtabula', 'athensohio', 'chillicothe', 'cincinnati', 'cleveland', 'columbus', 'dayton', 'huntington', 'limaohio', 'mansfield', 'wheeling', 'parkersburg', 'sandusky', 'toledo', 'tuscarawas', 'youngstown', 'zanesville', 'fortsmith', 'lawton', 'enid', 'oklahomacity', 'stillwater', 'texoma', 'tulsa', 'bend', 'corvallis', 'eastoregon', 'eugene', 'klamath', 'medford', 'oregoncoast', 'portland', 'roseburg', 'salem', 'altoona', 'chambersburg', 'erie', 'harrisburg', 'lancaster', 'allentown', 'meadville', 'philadelphia', 'pittsburgh', 'poconos', 'reading', 'scranton', 'pennstate', 'twintiers', 'williamsport', 'york', 'charleston', 'columbia', 'florencesc', 'greenville', 'hiltonhead', 'myrtlebeach', 'nesd', 'csd', 'rapidcity', 'siouxfalls', 'sd', 'chattanooga', 'clarksville', 'cookeville', 'jacksontn', 'knoxville', 'memphis', 'nashville', 'tricities', 'abilene', 'amarillo', 'austin', 'beaumont', 'brownsville', 'collegestation', 'corpuschristi', 'dallas', 'nacogdoches', 'delrio', 'elpaso', 'galveston', 'houston', 'killeen', 'laredo', 'lubbock', 'mcallen', 'odessa', 'sanangelo', 'sanantonio', 'sanmarcos', 'bigbend', 'texarkana', 'texoma', 'easttexas', 'victoriatx', 'waco', 'wichitafalls', 'logan', 'ogden', 'provo', 'saltlakecity', 'stgeorge', 'charlottesville', 'danville', 'easternshore', 'fredericksburg', 'norfolk', 'harrisonburg', 'lynchburg', 'blacksburg', 'richmond', 'roanoke', 'swva', 'winchester', 'washingtondc', 'bellingham', 'kpr', 'lewiston', 'moseslake', 'olympic', 'pullman', 'seattle', 'skagit', 'spokane', 'wenatchee', 'yakima', 'portland', 'charlestonwv', 'martinsburg', 'huntington', 'morgantown', 'wheeling', 'parkersburg', 'swv', 'wv', 'appleton', 'duluth', 'eauclaire', 'greenbay', 'janesville', 'racine', 'lacrosse', 'madison', 'milwaukee', 'northernwi', 'sheboygan', 'wausau']
  9.  
  10. def expunge(url, area):
  11.     page = urllib.urlopen(url).read() # <-- and v and vv gets you urls of ind. postings
  12.     page = page[page.index('<p>'):].split('\n')[0]
  13.     page = [i[:i.index('">')] for i in page.split('href="')[1:-1] if '<font size="-1">' in i]
  14.    
  15.     for u in page:
  16.         num = u[u.rfind('/')+1:u.index('.html')] # the number of the posting (like 34235235252)
  17.         spam = area + 'flag/?flagCode=15&amp;postingID='+num # url for flagging as spam
  18.         go(spam) # do it flaggot
  19.    
  20.  
  21. print 'Checking ' + str(len(areas)) + ' areas...'
  22.  
  23. for area in ['http://' + a + '.craigslist.org/' for a in areas]:
  24.     usci = area + 'search/ccc?query=scientology&catAbbreviation=ccc'
  25.     udia = area + 'search/ccc?query=dianetics&catAbbreviation=ccc'
  26.     try:
  27.         sci = urllib.urlopen(usci).read()
  28.         dia = urllib.urlopen(udia).read()
  29.     except:
  30.         print 'tl;dr error for ' + area
  31.        
  32.     if 'Found: ' in sci:
  33.         print 'Found results for "scientology" in ' + area
  34.         expunge(usci, area)
  35.         print 'All scientology listings marked as spam for area'
  36.    
  37.     if 'Found: ' in dia:
  38.         print 'Found results for "dianetics" in ' + area
  39.         expunge(udia, area)
  40.         print 'All dianetics listings marked as spam for area'
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement