konchin_shih

07/27 17:10

Jul 27th, 2021 (edited)
141
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.25 KB | None | 0 0
  1. import numpy
  2. import cv2
  3. import pandas
  4. import seaborn
  5. # import os
  6. from matplotlib import pyplot
  7.  
  8. # path = "./data/"
  9.  
  10. # pictureList = []
  11. # for filename in os.listdir(path):
  12. #     if filename.endwith(".jpg"):
  13. #         pictureList.append(cv2.imread(path + filename, cv2.IMREAD_GRAYSCALE))
  14.  
  15.  
  16. picture = cv2.imread("1.jpg", cv2.IMREAD_GRAYSCALE)
  17.  
  18.  
  19. # 資料處理
  20.  
  21. def heapslaw(originalGraph, xShrinkRate, yShrinkRate, xWordShrinkRate: int = 1, yWordShrinkRate: int = 1):
  22.  
  23.     xSize = len(originalGraph)
  24.     ySize = len(originalGraph[0])
  25.  
  26.     fixed_xWordSize = xSize // xWordShrinkRate
  27.     fixed_yWordSize = ySize // yWordShrinkRate
  28.  
  29.     letterDict = {}
  30.     wordDict = {}
  31.  
  32.     for xi in range(fixed_xWordSize):
  33.         for yi in range(fixed_yWordSize):
  34.             word = 0
  35.             for xj in range(xWordShrinkRate):
  36.                 for yj in range(yWordShrinkRate):
  37.                     letter = originalGraph[xi * xWordShrinkRate + xj][yi * yWordShrinkRate + yj]
  38.                     if letter not in letterDict:
  39.                         letterDict[letter] = 0
  40.                     letterDict[letter] += 1
  41.                     word += letter
  42.             if word not in wordDict:
  43.                 wordDict[word] = 0
  44.             wordDict[word] += 1
  45.  
  46.     sorted_letterList = []
  47.     sorted_wordList = []
  48.  
  49.     for k, v in letterDict.items():
  50.         sorted_letterList.append((k, v))
  51.     for k, v in wordDict.items():
  52.         sorted_wordList.append((k, v))
  53.  
  54.     def cmp(val):
  55.         return val[1]
  56.  
  57.     sorted_letterList.sort(reverse=True, key=cmp)
  58.     sorted_wordList.sort(reverse=True, key=cmp)
  59.  
  60.     modified_letterDict = {}
  61.     modified_wordDict = {}
  62.  
  63.     for i in range(len(sorted_letterList)):
  64.         modified_letterDict[sorted_letterList[i][0]] = i
  65.     for i in range(len(sorted_wordList)):
  66.         modified_wordDict[sorted_wordList[i][0]] = i
  67.  
  68.     fixed_xSize = xSize // xShrinkRate
  69.     fixed_ySize = ySize // yShrinkRate
  70.  
  71.     final_letterList = []
  72.     final_wordList = []
  73.  
  74.     for xi in range(fixed_xSize):
  75.         for yi in range(fixed_ySize):
  76.             word = 0
  77.             for xj in range(xShrinkRate):
  78.                 for yj in range(yShrinkRate):
  79.                     letter = originalGraph[xi * xWordShrinkRate + xj][yi * yWordShrinkRate + yj]
  80.                     word += letter
  81.                     final_letterList.append(modified_letterDict[letter])
  82.             word //= xShrinkRate * yShrinkRate
  83.             final_wordList.append(modified_wordDict[word])
  84.  
  85.     pyplot.figure(figsize=(15, 5))
  86.     pyplot.subplot(1, 2, 1)
  87.     pyplot.scatter(final_wordList, final_letterList, s=0.)
  88.     pyplot.title(f"Heaps\' law ({xShrinkRate} * {yShrinkRate})")
  89.     pyplot.ylabel('rank')
  90.     pyplot.xlabel('rank')
  91.  
  92.  
  93. def toGraph(originalGraph, xShrinkRate, yShrinkRate):
  94.  
  95.     xSize = len(originalGraph)
  96.     ySize = len(originalGraph[0])
  97.  
  98.     fixed_xSize = xSize // xShrinkRate
  99.     fixed_ySize = ySize // yShrinkRate
  100.     fixed_graph = numpy.zeros((fixed_xSize, fixed_ySize))
  101.  
  102.     for xi in range(fixed_xSize):
  103.         for yi in range(fixed_ySize):
  104.             for xj in range(xShrinkRate):
  105.                 for yj in range(yShrinkRate):
  106.                     fixed_graph[xi][yi] += originalGraph[xi * xShrinkRate + xj][yi * yShrinkRate + yj]
  107.             fixed_graph[xi][yi] //= xShrinkRate * yShrinkRate
  108.  
  109.     fixed_result = numpy.array(pandas.value_counts(fixed_graph.flatten()))
  110.     returnVar = numpy.arange(1, len(fixed_result) + 1)
  111.  
  112.     pyplot.figure(figsize=(15, 5))
  113.     pyplot.subplot(1, 2, 1)
  114.     pyplot.plot(returnVar, fixed_result)
  115.     pyplot.title(f"{xShrinkRate} * {yShrinkRate}")
  116.     pyplot.ylabel('appear time')
  117.     pyplot.xlabel('rank')
  118.  
  119.     pyplot.subplot(1, 2, 2)
  120.     pyplot.plot(returnVar, fixed_result)
  121.     pyplot.xscale('log')
  122.     pyplot.yscale('log')
  123.     pyplot.title(f"{xShrinkRate} * {yShrinkRate}")
  124.     pyplot.ylabel('appear time(log_10)')
  125.     pyplot.xlabel('rank(log_10)')
  126.  
  127.     return [returnVar, fixed_result]
  128.  
  129.  
  130. trans_picture = picture.tolist()
  131.  
  132. fixed_resultList = []
  133. for i in range(1, 10 + 1):
  134.     fixed_resultList.append(toGraph(trans_picture, i, i))  # here
  135. #   fixed_resultList.append(toGraph(trans_pictureList, 1, i))
  136. #   fixed_resultList.append(toGraph(trans_pictureList, i, 1))
  137.  
  138.  
  139. for i in range(1, 10 + 1):
  140.     heapslaw(trans_picture, i, i)
  141.  
  142.  
  143. # 畫總圖
  144.  
  145. pyplot.figure(figsize=(15, 5))
  146. pyplot.subplot(1, 2, 1)
  147. for i in fixed_resultList:
  148.     pyplot.plot(i[0], i[1])
  149. pyplot.title('total')
  150. pyplot.ylabel('appear time')
  151. pyplot.xlabel('rank')
  152.  
  153.  
  154. pyplot.subplot(1, 2, 2)
  155. for i in fixed_resultList:
  156.     pyplot.plot(i[0], i[1])
  157. pyplot.xscale('log')
  158. pyplot.yscale('log')
  159. pyplot.title('total')
  160. pyplot.ylabel('appear time(log_10)')
  161. pyplot.xlabel('rank(log_10)')
  162.  
  163.  
  164. # 分析圖
  165.  
  166. logarithmic_resultList = []  # [x, y]
  167. for i in fixed_resultList:
  168.     logarithmic_resultList.append([numpy.log10(i[0]), numpy.log10(i[1])])
  169.  
  170.  
  171. pyplot.figure(figsize=(15, 5))
  172. pyplot.subplot(1, 2, 1)
  173.  
  174. for i in range(10):
  175.     u = seaborn.regplot(logarithmic_resultList[i][0],
  176.                         logarithmic_resultList[i][1],
  177.                         label=f"{i + 1} * {i + 1}")
  178. u.set_title('analyze')
  179. u.set_ylabel('appear time(log_10)')
  180. u.set_xlabel('rank(log_10)')
  181. u.legend(loc='upper right')
  182.  
  183. # 線條
  184. pyplot.subplot(1, 2, 2)
  185.  
  186. for i in range(10):
  187.     v = seaborn.regplot(logarithmic_resultList[i][0],
  188.                         logarithmic_resultList[i][1],
  189.                         label=f"{i + 1} * {i + 1}",
  190.                         scatter=False)
  191. v.set_title('analyze')
  192. v.set_ylabel('appear time(log_10)')
  193. v.set_xlabel('rank(log_10)')
  194. v.legend(loc='upper right')
  195.  
  196. # 取數值
  197. pyplot.figure(figsize=(0.001, 0.001))
  198.  
  199.  
  200. axList = []
  201.  
  202. for i in range(10):
  203.     pyplot.subplot(1, 10, i + 1)
  204.     axList.append(seaborn.regplot(logarithmic_resultList[i][0],
  205.                                   logarithmic_resultList[i][1],
  206.                                   scatter=False))
  207.  
  208. slopeList = []
  209.  
  210. for i in range(10):
  211.     slope, intercept = numpy.polyfit(axList[i].get_lines()[0].get_xdata(),
  212.                                      axList[i].get_lines()[0].get_ydata(),
  213.                                      1)
  214.     print(f"slope {i + 1} = {slope}")
  215.     slopeList.append(slope)
  216.  
  217. print('the average of slopes =', sum(slopeList) / 10)
  218.  
Add Comment
Please, Sign In to add comment