Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- --- discussion_indexer.py 2010-08-26 09:37:19.000000000 -0300
- +++ discussion_indexer2.py 2010-08-26 13:04:50.000000000 -0300
- @@ -1,28 +1,59 @@
- import re, time
- -MONTHS = ('January', 'February', 'March', 'April', 'May',
- +MONTHS = {
- +'en' : ('January', 'February', 'March', 'April', 'May',
- 'June', 'July', 'August', 'September', 'October',
- 'November', 'December')
- +'pt' : ('Janeiro', 'Fevereiro', 'Março', 'Abril', 'Maio',
- + 'Junho', 'Julho', 'Agosto', 'Setembro', 'Outubro',
- + 'Novembro', 'Dezembro')
- +}
- +
- +#TODO: How to use $dateFormats from /languages/messages/MessagesXX.php ?
- +# en: 'dmy both' => 'H:i, j F Y'
- +# pt: 'dmy both' => 'H\hi\m\i\n \d\e j \d\e F \d\e Y'
- +
- +TIMESTAMPS = {
- +'en' : ur'^(.*?)([0-9]{2}\:[0-9]{2}\,' + \
- + ' [0-9]{1,2} (?:%s) [0-9]{4})' % \
- + '|'.join(MONTHS['en']) + ' \(UTC\)\s*$'
- +'pt' : ur'^(.*?)([0-9]{2}h[0-9]{2}min' + \
- + ' de [0-9]{1,2} de (?:%s) de [0-9]{4})' % \
- + '|'.join(MONTHS['pt']) + ' \(UTC\)\s*$'
- +}
- +
- +TIMESTAMPS2 = {
- +'en' : '%H:%M, %d %B %Y'
- +'pt' : '%Hh%Mmin de %d de %B de %Y'
- +}
- +
- +USERS = {
- +'en' : ur'.*\[\[[Uu]ser(?:[ _]talk)?\:([^]|]*)'
- +'pt' : ur'.*\[\[[Uu]suário(?:[ _]Discussão)?\:([^]|]*)'
- +}
- import mwclient
- import cgitb; cgitb.enable(format = 'text')
- class DiscussionIndexer(object):
- + LANG = 'en' #TODO: How to get $wgLanguageCode for the current project?
- def __init__(self, site):
- self.site = site
- self.cache = {}
- - r_timestamp = re.compile(ur'^(.*?)([0-9]{2}\:[0-9]{2}\,' + \
- - ' [0-9]{1,2} (?:%s) [0-9]{4})' % \
- - '|'.join(MONTHS) + ' \(UTC\)\s*$', re.MULTILINE)
- - r_user = re.compile(ur'.*\[\[[Uu]ser(?:[ _]talk)?\:([^]|]*)')
- + r_timestamp = re.compile(TIMESTAMPS[LANG], re.MULTILINE)
- + r_user = re.compile(USERS[LANG])
- def get_last_comment(self, section):
- """ Returns a tuple (user, timestamp) of the
- last comment made to the section."""
- comments = self.r_timestamp.findall(section)
- + for c in comments:
- + for m in range(12):
- + c[1] = re.sub(MONTHS['pt'][m], MONTHS['en'][m], c[1])
- +
- comments = [(self.r_user.search(line), time.strptime(timestamp,
- - '%H:%M, %d %B %Y')) for line, timestamp in comments]
- + TIMESTAMPS2[LANG])) for line, timestamp in comments]
- comments.sort(cmp = lambda x, y: -cmp(x[1], y[1]))
- @@ -73,7 +104,7 @@
- items = []
- for page, title, user, timestamp in sections:
- if timestamp:
- - timestamp = time.strftime('%H:%M, %d %B %Y', timestamp)
- + timestamp = time.strftime(TIMESTAMPS2[LANG], timestamp)
- else:
- timestamp = u''
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement