Advertisement
Guest User

Untitled

a guest
Mar 31st, 2015
214
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.13 KB | None | 0 0
  1. function pennTreeParse(penn_string) {
  2.  
  3. function eatWhiteSpace(s) {
  4. while (s.length && /\s/.test(s[0])) s = s.slice(1);
  5. return s;
  6. }
  7.  
  8. function isCharacter(c) {
  9. return /^[a-z0-9ÀÈÌÒÙàèìòùÁÉÍÓÚÝáéíóúýÂÊÎÔÛâêîôûÃÑÕãñõÄËÏÖÜäëïöüçÇßØøÅåÆæÞþÐð]+/i.test( c );
  10. }
  11.  
  12. function isPunctuation(c) {
  13. return /^[\'\?\.,-\/#!$%\^&\*;:{}=\-_`~]+/i.test( c );; // remember not to put parens in here
  14. }
  15.  
  16. function isCorP (c) {
  17. return isCharacter(c) || isPunctuation(c);
  18. }
  19.  
  20. function getToken(s) {
  21. var result = {'token': '', 'remainder': s};
  22. while (s.length && isCorP(s[0]) ) {
  23. result.token += s[0];
  24. s = s.slice(1);
  25. }
  26. result.remainder = s;
  27. return result;
  28. }
  29.  
  30. function parsePenn(nodes, edges, parent, num, penn_string) {
  31.  
  32. while ((penn_string = eatWhiteSpace(penn_string)).length > 0) {
  33.  
  34. if (penn_string[0] == ')') {
  35. return {'num': num, 'remainder': penn_string.slice(1)};
  36. }
  37.  
  38. var node_class = 'leaf';
  39.  
  40. if (penn_string[0] == '(') {
  41. node_class = 'interior';
  42. penn_string = penn_string.slice(1)
  43. }
  44. var result = getToken(penn_string);
  45.  
  46. // use result.token + '-' + '{0}'.f(num) to put the node number in the title
  47. nodes.push({id: num, label: result.token, reflexive: false});
  48. if (parent != -1) {
  49. edges.push({source: nodes[parent], target: nodes[num], left: false, right: true });
  50. }
  51.  
  52. if (node_class == 'leaf') {
  53. num = num + 1;
  54. penn_string = penn_string.slice(result.token.length);
  55. }
  56. else {
  57. result = parsePenn(nodes, edges, num, num + 1, result.remainder);
  58.  
  59. num = result.num;
  60. penn_string = result.remainder;
  61. }
  62. }
  63. return {'num': num, 'remainder': ''};
  64. }
  65.  
  66. var nodes = [], edges = []
  67. parsePenn(nodes, edges, -1, 0, penn_string);
  68. return {nodes:nodes, edges: edges};
  69. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement