Advertisement
Guest User

Untitled

a guest
Sep 16th, 2019
120
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.88 KB | None | 0 0
  1. #Usage: python3 network.py input.csv output.csv
  2.  
  3. import csv
  4. import re
  5. import sys
  6.  
  7.  
  8. input_file = sys.argv[1]
  9. output_file = sys.argv[2]
  10.  
  11. def detect_mention(text):
  12. mentions = []
  13. regex = r"(?<=^|(?<=[^a-zA-Z0-9-_\.]))@([A-Za-z]+[A-Za-z0-9_]+)"
  14. matches = re.finditer(regex, text)
  15.  
  16. for matchNum, match in enumerate(matches, start=1):
  17. mentions.append(match.group())
  18.  
  19. return mentions
  20.  
  21.  
  22. with open(input_file) as csvinput:
  23. readCSV = csv.reader(csvinput, delimiter=',')
  24.  
  25. with open(output_file, 'a') as csvoutput:
  26. spamwriter = csv.writer(csvoutput, delimiter=';',
  27. quotechar='"', quoting=csv.QUOTE_MINIMAL)
  28.  
  29. spamwriter.writerow(['username', 'mention'])
  30.  
  31. for row in readCSV:
  32. username = row[11]
  33. mentions = detect_mention(row[2])
  34.  
  35. for mention in mentions:
  36. spamwriter.writerow([username, mention])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement