Guest User

Untitled

a guest
Nov 25th, 2017
63
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.22 KB | None | 0 0
  1. #!/usr/bin/env python3
  2. """
  3. usage: {} < corpus.json > plot.pdf
  4.  
  5. The corpus file must contain one JSON document per line,
  6. features must be stored in a field names "{}",
  7. classes in a field names "{}".
  8. """
  9.  
  10. import sys
  11. import json
  12. from collections import defaultdict
  13. import numpy as np
  14. import matplotlib.pyplot as plt
  15. from matplotlib.ticker import EngFormatter
  16.  
  17. FEATURES_FIELD = "features"
  18. CLASS_FIELD = "category"
  19. RATIO = (2, 3)
  20.  
  21.  
  22. # display help if no data on stdin
  23. if sys.stdin.isatty():
  24. print(__doc__.format(sys.argv[0], FEATURES_FIELD, CLASS_FIELD))
  25. sys.exit(1)
  26.  
  27.  
  28. # load corpus from the corpus file
  29. features = []
  30. class_features = defaultdict(list)
  31. for line in sys.stdin:
  32. data = json.loads(line)
  33. features.append(data[FEATURES_FIELD])
  34. class_features[data[CLASS_FIELD]].append(data[FEATURES_FIELD])
  35. features = np.transpose(np.array(features))
  36. class_features = {
  37. cl: np.transpose(np.array(fts))
  38. for cl, fts in class_features.items()}
  39.  
  40. # setup global figure parameters
  41. plotsize = (len(features), len(class_features) + 1)
  42. fig = plt.figure(figsize=tuple(x * plotsize[i] for i, x in enumerate(RATIO)))
  43. plt.rcParams["axes.grid"] = True
  44. plt.rcParams["grid.linestyle"] = "dotted"
  45.  
  46. # display information about the corpus as suptitle
  47. desc = "samples:{} (".format(len(features[0]))
  48. desc += ", ".join([
  49. "{}:{}".format(cl, len(fts[0]))
  50. for cl, fts in class_features.items()])
  51. fig.suptitle(desc + ")")
  52.  
  53. # display features subplots
  54. ylims = []
  55. for i, vals in enumerate(features):
  56. axis = plt.subplot2grid(plotsize, (i, 0))
  57. axis.yaxis.set_major_formatter(EngFormatter())
  58. if i == 0:
  59. plt.title("all")
  60. axis.set_ylabel("feature #{}".format(i))
  61. axis.get_yaxis().set_label_coords(-0.15, 0.5)
  62. plt.hist(vals)
  63. ylims.append(axis.get_ylim())
  64.  
  65. # display per-class features subplots
  66. for i, c in enumerate(class_features):
  67. for j, vals in enumerate(class_features[c]):
  68. axis = plt.subplot2grid(plotsize, (j, i+1))
  69. axis.yaxis.set_major_formatter(EngFormatter())
  70. if j == 0:
  71. plt.title("{}".format(c))
  72. plt.hist(vals)
  73. axis.set_ylim(ylims[j])
  74.  
  75. # fix layout
  76. plt.tight_layout()
  77. fig.subplots_adjust(top=0.90)
  78.  
  79. # output the resulting plot
  80. plt.savefig(sys.stdout.buffer, format="pdf")
  81. plt.close()
Add Comment
Please, Sign In to add comment