Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # coding: utf8
- import re
- import uuid
- import datetime
- import tempfile
- import requests
- session = requests.Session()
- from flask import Flask, jsonify, request
- from mongoengine import *
- import pycrfsuite
- from utils import *
- class Account(Document):
- username = StringField(required=True, primary_key=True)
- password = StringField(required=True)
- date = DateTimeField(default=datetime.datetime.now)
- token = StringField()
- class Token(EmbeddedDocument):
- word = StringField(required=True)
- label = StringField(required=True)
- class Page(Document):
- title = StringField(required=True)
- tokens = ListField(EmbeddedDocumentField(Token), required=True)
- date = DateTimeField(default=datetime.datetime.now)
- account = ReferenceField(Account, required=True)
- validated = BooleanField(default=False)
- class Model(Document):
- file = FileField(required=True)
- pages = ListField(ReferenceField(Page), required=True)
- date = DateTimeField(default=datetime.datetime.now)
- app = Flask(__name__)
- @app.before_first_request
- def check_connection():
- connect('neat')
- @app.route('/token', methods=['PUT'])
- def api_token():
- print "Token"
- if 'token' not in request.json:
- return jsonify(success=False, error="No token provided")
- token = request.json['token']
- account = Account.objects(token=token)
- return jsonify(success=True, valid=bool(account))
- @app.route('/user', methods=['POST']) # use post to send login information
- def api_login():
- print "Login"
- if 'username' not in request.json:
- return jsonify(success=False, error="No username provided")
- username = request.json['username']
- account = Account.objects(username=username)
- if not account:
- return jsonify(success=False, error="Username doesn't exist")
- else:
- account = account.first()
- if 'password' not in request.json:
- return jsonify(success=False, error="No password provided")
- password = request.json['password']
- if password == account.password:
- account.token = str(uuid.uuid4())
- account.save()
- return jsonify(success=True, token=account.token)
- else:
- return jsonify(success=True, error="Invalid password")
- @app.route('/user', methods=['GET']) # use get to list all members
- def api_list_members():
- print "List members"
- for member in Account.objects:
- print member.username
- @app.route('/user', methods=['PUT']) # use put to add a new user
- def api_register():
- print "Register"
- if 'username' not in request.json:
- return jsonify(success=False, error="No username provided")
- username = request.json['username']
- account = Account.objects(username=username)
- if account:
- return jsonify(success=False, error="Username already exist")
- if 'password' not in request.json:
- return jsonify(success=False, error="No password provided")
- password = request.json['password']
- account = Account(username=username, password=password)
- account.token = str(uuid.uuid4())
- account.save()
- return jsonify(success=True, token=account.token)
- labelsList = [
- ('PERSON', '#f44336'),
- ('ORGANIZATION', '#3f51b5'),
- ('LOCATION', '#4caf50'),
- ('PRODUCT', '#9c27b0'),
- ('ART', '#ff9800'),
- ('EVENT', '#ffeb3b'),
- ('OTHER', '#795548')
- ]
- labels = dict(labelsList)
- labels['NONE'] = '#000000'
- @app.route('/labels', methods=['GET'])
- def api_get_labels():
- print "Getting labels"
- return jsonify(success=True, labels=labelsList)
- caps_exp = re.compile(
- u'(?<!\w)\w*[A-Z\u00C0-\u00DC]+\w*([\s-]\w*[A-Z\u00C0-\u00DC]+\w*)*(?!\w)', re.UNICODE)
- crf_tagger = None
- last_update = datetime.datetime.fromtimestamp(0)
- @app.route('/text', methods=['PUT'])
- def api_put_text():
- print "Putting text"
- if 'text' not in request.json:
- return jsonify(success=False, error="No text provided")
- text = request.json['text']
- words, features = get_sequence(text)
- labels, probs = [], []
- model = Model.objects()
- if model:
- model = model.first()
- global crf_tagger
- global last_update
- if last_update < model.date:
- print "Updating model..."
- f = tempfile.NamedTemporaryFile(delete=False)
- f.write(model.file.read())
- f.close()
- crf_tagger = pycrfsuite.Tagger()
- crf_tagger.open(f.name)
- last_update = model.date
- labels = crf_tagger.tag(features)
- probs = []
- for pos, label in enumerate(labels):
- probs.append(crf_tagger.marginal(label, pos))
- else:
- label = None
- for word in words:
- if caps_exp.match(word):
- if label is None:
- label = 'NONE'
- labels.append('B-' + label)
- else:
- labels.append('I-' + label)
- else:
- label = None
- labels.append(label)
- probs = [0 for i in xrange(len(words))]
- text, indices = "", {}
- for index, word in enumerate(words):
- indices[len(text)] = index
- text += word + " "
- text = text[:-1].encode('utf-8')
- r = session.post('http://localhost:8983/solr/wikidata_items/tag?overlaps=LONGEST_DOMINANT_RIGHT&tagsLimit=10000&wt=json',
- headers={'Content-Type': 'text/plain'}, data=text)
- j = r.json()
- docs = {doc['id']: doc for doc in j['response']['docs']}
- for tag in j['tags']:
- start, end, ids = tag[1], tag[3], tag[5]
- ids = [id for id in ids if id[0] == 'Q']
- id = 'Q' + str(min([int(id[1:]) for id in ids]))
- if start not in indices:
- continue
- start = indices[start]
- if end + 1 not in indices:
- continue
- end = indices[end + 1]
- doc = docs[id]
- if 'category' not in doc:
- continue
- category = doc['category']
- if category == 'UNKNOWN':
- continue
- prob = sum(probs[start:end]) / (end - start)
- if prob >= 0.90:
- continue
- for index in xrange(start, end):
- if index == start:
- labels[index] = 'B-' + category
- else:
- labels[index] = 'I-' + category
- labels = [None if label is 'O' else label for label in labels]
- return jsonify(success=True, words=words, labels=labels)
- @app.route('/pages', methods=['PUT'])
- def api_put_page():
- print "Putting page"
- if 'token' not in request.json:
- return jsonify(success=False, error="No token provided")
- token = request.json['token']
- account = Account.objects(token=token)
- if not account:
- return jsonify(success=False, error="Invalid token")
- else:
- account = account.first()
- if 'title' not in request.json:
- return jsonify(success=False, error="No title provided")
- title = request.json['title']
- if 'words' not in request.json:
- return jsonify(success=False, error="No words provided")
- words = request.json['words']
- if 'labels' not in request.json:
- return jsonify(success=False, error="No labels provided")
- labels = request.json['labels']
- tokens = []
- for word, label in zip(words, labels):
- tokens.append(Token(word=word, label=label))
- page = Page(title=title, tokens=tokens, account=account)
- page.save()
- return jsonify(success=True, code=str(page.id))
- if __name__ == '__main__':
- app.run(debug=True)
Add Comment
Please, Sign In to add comment