Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- function pennTreeParse(penn_string) {
- function eatWhiteSpace(s) {
- while (s.length && /\s/.test(s[0])) s = s.slice(1);
- return s;
- }
- function isCharacter(c) {
- return /^[a-z0-9ÀÈÌÒÙàèìòùÁÉÍÓÚÝáéíóúýÂÊÎÔÛâêîôûÃÑÕãñõÄËÏÖÜäëïöüçÇßØøÅåÆæÞþÐð]+/i.test( c );
- }
- function isPunctuation(c) {
- return /^[\'\?\.,-\/#!$%\^&\*;:{}=\-_`~]+/i.test( c );; // remember not to put parens in here
- }
- function isCorP (c) {
- return isCharacter(c) || isPunctuation(c);
- }
- function getToken(s) {
- var result = {'token': '', 'remainder': s};
- while (s.length && isCorP(s[0]) ) {
- result.token += s[0];
- s = s.slice(1);
- }
- result.remainder = s;
- return result;
- }
- function parsePenn(nodes, edges, parent, num, penn_string) {
- while ((penn_string = eatWhiteSpace(penn_string)).length > 0) {
- if (penn_string[0] == ')') {
- return {'num': num, 'remainder': penn_string.slice(1)};
- }
- var node_class = 'leaf';
- if (penn_string[0] == '(') {
- node_class = 'interior';
- penn_string = penn_string.slice(1)
- }
- var result = getToken(penn_string);
- // use result.token + '-' + '{0}'.f(num) to put the node number in the title
- nodes.push({id: num, label: result.token, reflexive: false});
- if (parent != -1) {
- edges.push({source: nodes[parent], target: nodes[num], left: false, right: true });
- }
- if (node_class == 'leaf') {
- num = num + 1;
- penn_string = penn_string.slice(result.token.length);
- }
- else {
- result = parsePenn(nodes, edges, num, num + 1, result.remainder);
- num = result.num;
- penn_string = result.remainder;
- }
- }
- return {'num': num, 'remainder': ''};
- }
- var nodes = [], edges = []
- parsePenn(nodes, edges, -1, 0, penn_string);
- return {nodes:nodes, edges: edges};
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement