Advertisement
konchin_shih

07/28 20:15

Jul 28th, 2021
101
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.86 KB | None | 0 0
  1. import numpy
  2. import cv2
  3. import pandas
  4. import seaborn
  5. # import os
  6. from matplotlib import pyplot
  7.  
  8. # path = "./data/"
  9. maxShrinkRate = 10
  10.  
  11. # pictureList = []
  12. # for filename in os.listdir(path):
  13. #     if filename.endswith(".jpg"):
  14. #         pictureList.append(cv2.imread(path + filename, cv2.IMREAD_GRAYSCALE))
  15.  
  16.  
  17. picture = cv2.imread("211.jpg", cv2.IMREAD_GRAYSCALE)
  18.  
  19.  
  20. # 資料處理
  21.  
  22. def shrinkGraph(originalGraph, xShrinkRate: int = 1, yShrinkRate: int = 1):
  23.  
  24.     xSize = len(originalGraph)
  25.     ySize = len(originalGraph[0])
  26.  
  27.     fixed_xSize = xSize // xShrinkRate
  28.     fixed_ySize = ySize // yShrinkRate
  29.  
  30.     returnGraph = numpy.zeros((fixed_xSize, fixed_ySize))
  31.  
  32.     for xi in range(fixed_xSize):
  33.         for yi in range(fixed_ySize):
  34.             for xj in range(xShrinkRate):
  35.                 for yj in range(yShrinkRate):
  36.                     returnGraph[xi][yi] += originalGraph[xi * xShrinkRate + xj][yi * yShrinkRate + yj]
  37.             # returnGraph[xi][yi] //= xShrinkRate * yShrinkRate
  38.  
  39.     return returnGraph
  40.  
  41.  
  42. def heapslaw(originalGraph, xShrinkRate, yShrinkRate):
  43.  
  44.     xSize = len(originalGraph)
  45.     ySize = len(originalGraph[0])
  46.  
  47.     fixed_xSize = xSize // xShrinkRate
  48.     fixed_ySize = ySize // yShrinkRate
  49.  
  50.     letterDict = {}
  51.     wordDict = {}
  52.  
  53.     for xi in range(fixed_xSize):
  54.         for yi in range(fixed_ySize):
  55.             word = 0
  56.             for xj in range(xShrinkRate):
  57.                 for yj in range(yShrinkRate):
  58.                     letter = originalGraph[xi * xShrinkRate + xj][yi * yShrinkRate + yj]
  59.                     if letter not in letterDict:
  60.                         letterDict[letter] = 0
  61.                     letterDict[letter] += 1
  62.                     word += letter
  63.             if word not in wordDict:
  64.                 wordDict[word] = 0
  65.             wordDict[word] += 1
  66.  
  67.     sorted_letterList = []
  68.     sorted_wordList = []
  69.  
  70.     for k, v in letterDict.items():
  71.         sorted_letterList.append((k, v))
  72.     for k, v in wordDict.items():
  73.         sorted_wordList.append((k, v))
  74.  
  75.     def cmp(val):
  76.         return val[1]
  77.  
  78.     sorted_letterList.sort(reverse=True, key=cmp)
  79.     sorted_wordList.sort(reverse=True, key=cmp)
  80.  
  81.     modified_letterDict = {}
  82.     modified_wordDict = {}
  83.  
  84.     for i in range(len(sorted_letterList)):
  85.         modified_letterDict[sorted_letterList[i][0]] = i
  86.     for i in range(len(sorted_wordList)):
  87.         modified_wordDict[sorted_wordList[i][0]] = i
  88.  
  89.     final_letterList = []
  90.     final_wordList = []
  91.  
  92.     for xi in range(fixed_xSize):
  93.         for yi in range(fixed_ySize):
  94.             word = 0
  95.             for xj in range(xShrinkRate):
  96.                 for yj in range(yShrinkRate):
  97.                     letter = originalGraph[xi * xShrinkRate + xj][yi * yShrinkRate + yj]
  98.                     word += letter
  99.                     final_letterList.append(modified_letterDict[letter])
  100.             for i in range(xShrinkRate * yShrinkRate):
  101.                 final_wordList.append(modified_wordDict[word])
  102.  
  103.     pyplot.figure(figsize=(15, 5))
  104.     pyplot.subplot(1, 2, 1)
  105.     pyplot.scatter(final_wordList, final_letterList, s=0.5)
  106.     pyplot.title(f"Heaps\' law ({xShrinkRate} * {yShrinkRate})")
  107.     pyplot.ylabel('rank')
  108.     pyplot.xlabel('rank')
  109.  
  110.  
  111. def toGraph(originalGraph, xShrinkRate, yShrinkRate):
  112.  
  113.     xSize = len(originalGraph)
  114.     ySize = len(originalGraph[0])
  115.  
  116.     fixed_xSize = xSize // xShrinkRate
  117.     fixed_ySize = ySize // yShrinkRate
  118.     fixed_graph = numpy.zeros((fixed_xSize, fixed_ySize))
  119.  
  120.     for xi in range(fixed_xSize):
  121.         for yi in range(fixed_ySize):
  122.             for xj in range(xShrinkRate):
  123.                 for yj in range(yShrinkRate):
  124.                     fixed_graph[xi][yi] += originalGraph[xi * xShrinkRate + xj][yi * yShrinkRate + yj]
  125.             # fixed_graph[xi][yi] //= xShrinkRate * yShrinkRate
  126.  
  127.     fixed_result = numpy.array(pandas.value_counts(fixed_graph.flatten()))
  128.     returnVar = numpy.arange(1, len(fixed_result) + 1)
  129.  
  130.     pyplot.figure(figsize=(15, 5))
  131.     pyplot.subplot(1, 2, 1)
  132.     pyplot.plot(returnVar, fixed_result)
  133.     pyplot.title(f"{xShrinkRate} * {yShrinkRate}")
  134.     pyplot.ylabel('appear time')
  135.     pyplot.xlabel('rank')
  136.  
  137.     pyplot.subplot(1, 2, 2)
  138.     pyplot.plot(returnVar, fixed_result)
  139.     pyplot.xscale('log')
  140.     pyplot.yscale('log')
  141.     pyplot.title(f"{xShrinkRate} * {yShrinkRate}")
  142.     pyplot.ylabel('appear time(log_10)')
  143.     pyplot.xlabel('rank(log_10)')
  144.  
  145.     return [returnVar, fixed_result]
  146.  
  147.  
  148. trans_picture = picture.tolist()
  149.  
  150. fixed_resultList = []
  151. for i in range(1, maxShrinkRate + 1):
  152.     fixed_resultList.append(toGraph(trans_picture, i, i))  # here
  153. #   fixed_resultList.append(toGraph(trans_pictureList, 1, i))
  154. #   fixed_resultList.append(toGraph(trans_pictureList, i, 1))
  155.  
  156.  
  157. for i in range(1, maxShrinkRate + 1):
  158.     heapslaw(shrinkGraph(trans_picture, 2, 2), i, i)
  159.  
  160.  
  161. # 畫總圖
  162.  
  163. pyplot.figure(figsize=(15, 5))
  164. pyplot.subplot(1, 2, 1)
  165. for i in fixed_resultList:
  166.     pyplot.plot(i[0], i[1])
  167. pyplot.title('total')
  168. pyplot.ylabel('appear time')
  169. pyplot.xlabel('rank')
  170.  
  171.  
  172. pyplot.subplot(1, 2, 2)
  173. for i in fixed_resultList:
  174.     pyplot.plot(i[0], i[1])
  175. pyplot.xscale('log')
  176. pyplot.yscale('log')
  177. pyplot.title('total')
  178. pyplot.ylabel('appear time(log_10)')
  179. pyplot.xlabel('rank(log_10)')
  180.  
  181.  
  182. # 分析圖
  183.  
  184. logarithmic_resultList = []  # [x, y]
  185. for i in fixed_resultList:
  186.     logarithmic_resultList.append([numpy.log10(i[0]), numpy.log10(i[1])])
  187.  
  188.  
  189. pyplot.figure(figsize=(15, 5))
  190. pyplot.subplot(1, 2, 1)
  191.  
  192. for i in range(maxShrinkRate):
  193.     u = seaborn.regplot(logarithmic_resultList[i][0],
  194.                         logarithmic_resultList[i][1],
  195.                         label=f"{i + 1} * {i + 1}")
  196. u.set_title('analyze')
  197. u.set_ylabel('appear time(log_10)')
  198. u.set_xlabel('rank(log_10)')
  199. u.legend(loc='upper right')
  200.  
  201. # 線條
  202. pyplot.subplot(1, 2, 2)
  203.  
  204. for i in range(maxShrinkRate):
  205.     v = seaborn.regplot(logarithmic_resultList[i][0],
  206.                         logarithmic_resultList[i][1],
  207.                         label=f"{i + 1} * {i + 1}",
  208.                         scatter=False)
  209. v.set_title('analyze')
  210. v.set_ylabel('appear time(log_10)')
  211. v.set_xlabel('rank(log_10)')
  212. v.legend(loc='upper right')
  213.  
  214. # 取數值
  215. pyplot.figure(figsize=(0.001, 0.001))
  216.  
  217.  
  218. axList = []
  219.  
  220. for i in range(maxShrinkRate):
  221.     pyplot.subplot(1, maxShrinkRate, i + 1)
  222.     axList.append(seaborn.regplot(logarithmic_resultList[i][0],
  223.                                   logarithmic_resultList[i][1],
  224.                                   scatter=False))
  225.  
  226. slopeList = []
  227.  
  228. for i in range(maxShrinkRate):
  229.     slope, intercept = numpy.polyfit(axList[i].get_lines()[0].get_xdata(),
  230.                                      axList[i].get_lines()[0].get_ydata(),
  231.                                      1)
  232.     print(f"slope {i + 1} = {slope}")
  233.     slopeList.append(slope)
  234.  
  235. print('the average of slopes =', sum(slopeList) / maxShrinkRate)
  236.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement