Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #Usage: python3 network.py input.csv output.csv
- import csv
- import re
- import sys
- input_file = sys.argv[1]
- output_file = sys.argv[2]
- def detect_mention(text):
- mentions = []
- regex = r"(?<=^|(?<=[^a-zA-Z0-9-_\.]))@([A-Za-z]+[A-Za-z0-9_]+)"
- matches = re.finditer(regex, text)
- for matchNum, match in enumerate(matches, start=1):
- mentions.append(match.group())
- return mentions
- with open(input_file) as csvinput:
- readCSV = csv.reader(csvinput, delimiter=',')
- with open(output_file, 'a') as csvoutput:
- spamwriter = csv.writer(csvoutput, delimiter=';',
- quotechar='"', quoting=csv.QUOTE_MINIMAL)
- spamwriter.writerow(['username', 'mention'])
- for row in readCSV:
- username = row[11]
- mentions = detect_mention(row[2])
- for mention in mentions:
- spamwriter.writerow([username, mention])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement