Guest User

Untitled

a guest
Nov 17th, 2016
34
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 7.61 KB | None | 0 0
  1. # coding: utf8
  2. import re
  3. import uuid
  4. import datetime
  5. import tempfile
  6.  
  7. import requests
  8. session = requests.Session()
  9.  
  10. from flask import Flask, jsonify, request
  11. from mongoengine import *
  12.  
  13. import pycrfsuite
  14. from utils import *
  15.  
  16.  
  17. class Account(Document):
  18.     username = StringField(required=True, primary_key=True)
  19.     password = StringField(required=True)
  20.     date = DateTimeField(default=datetime.datetime.now)
  21.     token = StringField()
  22.  
  23.  
  24. class Token(EmbeddedDocument):
  25.     word = StringField(required=True)
  26.     label = StringField(required=True)
  27.  
  28.  
  29. class Page(Document):
  30.     title = StringField(required=True)
  31.     tokens = ListField(EmbeddedDocumentField(Token), required=True)
  32.     date = DateTimeField(default=datetime.datetime.now)
  33.     account = ReferenceField(Account, required=True)
  34.     validated = BooleanField(default=False)
  35.  
  36.  
  37. class Model(Document):
  38.     file = FileField(required=True)
  39.     pages = ListField(ReferenceField(Page), required=True)
  40.     date = DateTimeField(default=datetime.datetime.now)
  41.  
  42. app = Flask(__name__)
  43.  
  44.  
  45. @app.before_first_request
  46. def check_connection():
  47.     connect('neat')
  48.  
  49.  
  50. @app.route('/token', methods=['PUT'])
  51. def api_token():
  52.     print "Token"
  53.  
  54.     if 'token' not in request.json:
  55.         return jsonify(success=False, error="No token provided")
  56.     token = request.json['token']
  57.  
  58.     account = Account.objects(token=token)
  59.  
  60.     return jsonify(success=True, valid=bool(account))
  61.  
  62.  
  63. @app.route('/user', methods=['POST']) # use post to send login information
  64. def api_login():
  65.     print "Login"
  66.  
  67.     if 'username' not in request.json:
  68.         return jsonify(success=False, error="No username provided")
  69.     username = request.json['username']
  70.  
  71.     account = Account.objects(username=username)
  72.     if not account:
  73.         return jsonify(success=False, error="Username doesn't exist")
  74.     else:
  75.         account = account.first()
  76.  
  77.     if 'password' not in request.json:
  78.         return jsonify(success=False, error="No password provided")
  79.     password = request.json['password']
  80.  
  81.     if password == account.password:
  82.         account.token = str(uuid.uuid4())
  83.         account.save()
  84.         return jsonify(success=True, token=account.token)
  85.     else:
  86.         return jsonify(success=True, error="Invalid password")
  87.  
  88. @app.route('/user', methods=['GET']) # use get to list all members
  89. def api_list_members():
  90.     print "List members"
  91.  
  92.     for member in Account.objects:
  93.         print member.username
  94.  
  95.  
  96. @app.route('/user', methods=['PUT']) # use put to add a new user
  97. def api_register():
  98.     print "Register"
  99.  
  100.     if 'username' not in request.json:
  101.         return jsonify(success=False, error="No username provided")
  102.     username = request.json['username']
  103.  
  104.     account = Account.objects(username=username)
  105.     if account:
  106.         return jsonify(success=False, error="Username already exist")
  107.  
  108.     if 'password' not in request.json:
  109.         return jsonify(success=False, error="No password provided")
  110.     password = request.json['password']
  111.  
  112.     account = Account(username=username, password=password)
  113.     account.token = str(uuid.uuid4())
  114.     account.save()
  115.  
  116.     return jsonify(success=True, token=account.token)
  117.  
  118. labelsList = [
  119.     ('PERSON', '#f44336'),
  120.     ('ORGANIZATION', '#3f51b5'),
  121.     ('LOCATION', '#4caf50'),
  122.     ('PRODUCT', '#9c27b0'),
  123.     ('ART', '#ff9800'),
  124.     ('EVENT', '#ffeb3b'),
  125.     ('OTHER', '#795548')
  126. ]
  127. labels = dict(labelsList)
  128. labels['NONE'] = '#000000'
  129.  
  130.  
  131. @app.route('/labels', methods=['GET'])
  132. def api_get_labels():
  133.     print "Getting labels"
  134.  
  135.     return jsonify(success=True, labels=labelsList)
  136.  
  137. caps_exp = re.compile(
  138.     u'(?<!\w)\w*[A-Z\u00C0-\u00DC]+\w*([\s-]\w*[A-Z\u00C0-\u00DC]+\w*)*(?!\w)', re.UNICODE)
  139.  
  140. crf_tagger = None
  141. last_update = datetime.datetime.fromtimestamp(0)
  142.  
  143.  
  144. @app.route('/text', methods=['PUT'])
  145. def api_put_text():
  146.     print "Putting text"
  147.  
  148.     if 'text' not in request.json:
  149.         return jsonify(success=False, error="No text provided")
  150.     text = request.json['text']
  151.  
  152.     words, features = get_sequence(text)
  153.     labels, probs = [], []
  154.  
  155.     model = Model.objects()
  156.     if model:
  157.         model = model.first()
  158.  
  159.         global crf_tagger
  160.         global last_update
  161.  
  162.         if last_update < model.date:
  163.             print "Updating model..."
  164.  
  165.             f = tempfile.NamedTemporaryFile(delete=False)
  166.             f.write(model.file.read())
  167.             f.close()
  168.  
  169.             crf_tagger = pycrfsuite.Tagger()
  170.             crf_tagger.open(f.name)
  171.  
  172.             last_update = model.date
  173.  
  174.         labels = crf_tagger.tag(features)
  175.  
  176.         probs = []
  177.         for pos, label in enumerate(labels):
  178.             probs.append(crf_tagger.marginal(label, pos))
  179.  
  180.     else:
  181.         label = None
  182.  
  183.         for word in words:
  184.             if caps_exp.match(word):
  185.                 if label is None:
  186.                     label = 'NONE'
  187.                     labels.append('B-' + label)
  188.                 else:
  189.                     labels.append('I-' + label)
  190.             else:
  191.                 label = None
  192.                 labels.append(label)
  193.  
  194.         probs = [0 for i in xrange(len(words))]
  195.  
  196.     text, indices = "", {}
  197.     for index, word in enumerate(words):
  198.         indices[len(text)] = index
  199.         text += word + " "
  200.     text = text[:-1].encode('utf-8')
  201.  
  202.     r = session.post('http://localhost:8983/solr/wikidata_items/tag?overlaps=LONGEST_DOMINANT_RIGHT&tagsLimit=10000&wt=json',
  203.                      headers={'Content-Type': 'text/plain'}, data=text)
  204.     j = r.json()
  205.  
  206.     docs = {doc['id']: doc for doc in j['response']['docs']}
  207.  
  208.     for tag in j['tags']:
  209.         start, end, ids = tag[1], tag[3], tag[5]
  210.         ids = [id for id in ids if id[0] == 'Q']
  211.         id = 'Q' + str(min([int(id[1:]) for id in ids]))
  212.  
  213.         if start not in indices:
  214.             continue
  215.         start = indices[start]
  216.  
  217.         if end + 1 not in indices:
  218.             continue
  219.         end = indices[end + 1]
  220.  
  221.         doc = docs[id]
  222.         if 'category' not in doc:
  223.             continue
  224.         category = doc['category']
  225.  
  226.         if category == 'UNKNOWN':
  227.             continue
  228.  
  229.         prob = sum(probs[start:end]) / (end - start)
  230.         if prob >= 0.90:
  231.             continue
  232.  
  233.         for index in xrange(start, end):
  234.             if index == start:
  235.                 labels[index] = 'B-' + category
  236.             else:
  237.                 labels[index] = 'I-' + category
  238.  
  239.     labels = [None if label is 'O' else label for label in labels]
  240.  
  241.     return jsonify(success=True, words=words, labels=labels)
  242.  
  243.  
  244. @app.route('/pages', methods=['PUT'])
  245. def api_put_page():
  246.     print "Putting page"
  247.  
  248.     if 'token' not in request.json:
  249.         return jsonify(success=False, error="No token provided")
  250.     token = request.json['token']
  251.  
  252.     account = Account.objects(token=token)
  253.     if not account:
  254.         return jsonify(success=False, error="Invalid token")
  255.     else:
  256.         account = account.first()
  257.  
  258.     if 'title' not in request.json:
  259.         return jsonify(success=False, error="No title provided")
  260.     title = request.json['title']
  261.  
  262.     if 'words' not in request.json:
  263.         return jsonify(success=False, error="No words provided")
  264.     words = request.json['words']
  265.  
  266.     if 'labels' not in request.json:
  267.         return jsonify(success=False, error="No labels provided")
  268.     labels = request.json['labels']
  269.  
  270.     tokens = []
  271.     for word, label in zip(words, labels):
  272.         tokens.append(Token(word=word, label=label))
  273.  
  274.     page = Page(title=title, tokens=tokens, account=account)
  275.     page.save()
  276.  
  277.     return jsonify(success=True, code=str(page.id))
  278.  
  279. if __name__ == '__main__':
  280.     app.run(debug=True)
Add Comment
Please, Sign In to add comment