Advertisement
konchin_shih

07/27 18:00

Jul 27th, 2021
104
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.79 KB | None | 0 0
  1. import numpy
  2. import cv2
  3. import pandas
  4. import seaborn
  5. # import os
  6. from matplotlib import pyplot
  7.  
  8. # path = "./data/"
  9.  
  10. # pictureList = []
  11. # for filename in os.listdir(path):
  12. #     if filename.endswith(".jpg"):
  13. #         pictureList.append(cv2.imread(path + filename, cv2.IMREAD_GRAYSCALE))
  14.  
  15.  
  16. picture = cv2.imread("1.jpg", cv2.IMREAD_GRAYSCALE)
  17.  
  18.  
  19. # 資料處理
  20.  
  21. def shrinkGraph(originalGraph, xShrinkRate: int = 1, yShrinkRate: int = 1):
  22.  
  23.     xSize = len(originalGraph)
  24.     ySize = len(originalGraph[0])
  25.  
  26.     fixed_xSize = xSize // xShrinkRate
  27.     fixed_ySize = ySize // yShrinkRate
  28.  
  29.     returnGraph = numpy.zeros((fixed_xSize, fixed_ySize))
  30.  
  31.     for xi in range(fixed_xSize):
  32.         for yi in range(fixed_ySize):
  33.             for xj in range(xShrinkRate):
  34.                 for yj in range(yShrinkRate):
  35.                     returnGraph[xi][yi] += originalGraph[xi * xShrinkRate + xj][yi * yShrinkRate + yj]
  36.             returnGraph[xi][yi] //= xShrinkRate * yShrinkRate
  37.  
  38.     return returnGraph
  39.  
  40.  
  41. def heapslaw(originalGraph, xShrinkRate, yShrinkRate):
  42.  
  43.     xSize = len(originalGraph)
  44.     ySize = len(originalGraph[0])
  45.  
  46.     fixed_xSize = xSize // xShrinkRate
  47.     fixed_ySize = ySize // yShrinkRate
  48.  
  49.     letterDict = {}
  50.     wordDict = {}
  51.  
  52.     for xi in range(fixed_xSize):
  53.         for yi in range(fixed_ySize):
  54.             word = 0
  55.             for xj in range(xShrinkRate):
  56.                 for yj in range(yShrinkRate):
  57.                     letter = originalGraph[xi * xShrinkRate + xj][yi * yShrinkRate + yj]
  58.                     if letter not in letterDict:
  59.                         letterDict[letter] = 0
  60.                     letterDict[letter] += 1
  61.                     word += letter
  62.             if word not in wordDict:
  63.                 wordDict[word] = 0
  64.             wordDict[word] += 1
  65.  
  66.     sorted_letterList = []
  67.     sorted_wordList = []
  68.  
  69.     for k, v in letterDict.items():
  70.         sorted_letterList.append((k, v))
  71.     for k, v in wordDict.items():
  72.         sorted_wordList.append((k, v))
  73.  
  74.     def cmp(val):
  75.         return val[1]
  76.  
  77.     sorted_letterList.sort(reverse=True, key=cmp)
  78.     sorted_wordList.sort(reverse=True, key=cmp)
  79.  
  80.     modified_letterDict = {}
  81.     modified_wordDict = {}
  82.  
  83.     for i in range(len(sorted_letterList)):
  84.         modified_letterDict[sorted_letterList[i][0]] = i
  85.     for i in range(len(sorted_wordList)):
  86.         modified_wordDict[sorted_wordList[i][0]] = i
  87.  
  88.     final_letterList = []
  89.     final_wordList = []
  90.  
  91.     for xi in range(fixed_xSize):
  92.         for yi in range(fixed_ySize):
  93.             word = 0
  94.             for xj in range(xShrinkRate):
  95.                 for yj in range(yShrinkRate):
  96.                     letter = originalGraph[xi * xShrinkRate + xj][yi * yShrinkRate + yj]
  97.                     word += letter
  98.                     final_letterList.append(modified_letterDict[letter])
  99.             word //= xShrinkRate * yShrinkRate
  100.             for i in range(xShrinkRate * yShrinkRate):
  101.                 final_wordList.append(modified_wordDict[word])
  102.  
  103.     pyplot.figure(figsize=(15, 5))
  104.     pyplot.subplot(1, 2, 1)
  105.     pyplot.scatter(final_wordList, final_letterList, s=0.5)
  106.     pyplot.title(f"Heaps\' law ({xShrinkRate} * {yShrinkRate})")
  107.     pyplot.ylabel('rank')
  108.     pyplot.xlabel('rank')
  109.  
  110.  
  111. def toGraph(originalGraph, xShrinkRate, yShrinkRate):
  112.  
  113.     xSize = len(originalGraph)
  114.     ySize = len(originalGraph[0])
  115.  
  116.     fixed_xSize = xSize // xShrinkRate
  117.     fixed_ySize = ySize // yShrinkRate
  118.     fixed_graph = numpy.zeros((fixed_xSize, fixed_ySize))
  119.  
  120.     for xi in range(fixed_xSize):
  121.         for yi in range(fixed_ySize):
  122.             for xj in range(xShrinkRate):
  123.                 for yj in range(yShrinkRate):
  124.                     fixed_graph[xi][yi] += originalGraph[xi * xShrinkRate + xj][yi * yShrinkRate + yj]
  125.             fixed_graph[xi][yi] //= xShrinkRate * yShrinkRate
  126.  
  127.     fixed_result = numpy.array(pandas.value_counts(fixed_graph.flatten()))
  128.     returnVar = numpy.arange(1, len(fixed_result) + 1)
  129.  
  130.     pyplot.figure(figsize=(15, 5))
  131.     pyplot.subplot(1, 2, 1)
  132.     pyplot.plot(returnVar, fixed_result)
  133.     pyplot.title(f"{xShrinkRate} * {yShrinkRate}")
  134.     pyplot.ylabel('appear time')
  135.     pyplot.xlabel('rank')
  136.  
  137.     pyplot.subplot(1, 2, 2)
  138.     pyplot.plot(returnVar, fixed_result)
  139.     pyplot.xscale('log')
  140.     pyplot.yscale('log')
  141.     pyplot.title(f"{xShrinkRate} * {yShrinkRate}")
  142.     pyplot.ylabel('appear time(log_10)')
  143.     pyplot.xlabel('rank(log_10)')
  144.  
  145.     return [returnVar, fixed_result]
  146.  
  147.  
  148. trans_picture = picture.tolist()
  149.  
  150. fixed_resultList = []
  151. for i in range(1, 10 + 1):
  152.     fixed_resultList.append(toGraph(trans_picture, i, i))  # here
  153. #   fixed_resultList.append(toGraph(trans_pictureList, 1, i))
  154. #   fixed_resultList.append(toGraph(trans_pictureList, i, 1))
  155.  
  156.  
  157. for i in range(1, 10 + 1):
  158.     heapslaw(shrinkGraph(trans_picture, 2, 2), i, i)
  159.  
  160.  
  161. # 畫總圖
  162.  
  163. pyplot.figure(figsize=(15, 5))
  164. pyplot.subplot(1, 2, 1)
  165. for i in fixed_resultList:
  166.     pyplot.plot(i[0], i[1])
  167. pyplot.title('total')
  168. pyplot.ylabel('appear time')
  169. pyplot.xlabel('rank')
  170.  
  171.  
  172. pyplot.subplot(1, 2, 2)
  173. for i in fixed_resultList:
  174.     pyplot.plot(i[0], i[1])
  175. pyplot.xscale('log')
  176. pyplot.yscale('log')
  177. pyplot.title('total')
  178. pyplot.ylabel('appear time(log_10)')
  179. pyplot.xlabel('rank(log_10)')
  180.  
  181.  
  182. # 分析圖
  183.  
  184. logarithmic_resultList = []  # [x, y]
  185. for i in fixed_resultList:
  186.     logarithmic_resultList.append([numpy.log10(i[0]), numpy.log10(i[1])])
  187.  
  188.  
  189. pyplot.figure(figsize=(15, 5))
  190. pyplot.subplot(1, 2, 1)
  191.  
  192. for i in range(10):
  193.     u = seaborn.regplot(logarithmic_resultList[i][0],
  194.                         logarithmic_resultList[i][1],
  195.                         label=f"{i + 1} * {i + 1}")
  196. u.set_title('analyze')
  197. u.set_ylabel('appear time(log_10)')
  198. u.set_xlabel('rank(log_10)')
  199. u.legend(loc='upper right')
  200.  
  201. # 線條
  202. pyplot.subplot(1, 2, 2)
  203.  
  204. for i in range(10):
  205.     v = seaborn.regplot(logarithmic_resultList[i][0],
  206.                         logarithmic_resultList[i][1],
  207.                         label=f"{i + 1} * {i + 1}",
  208.                         scatter=False)
  209. v.set_title('analyze')
  210. v.set_ylabel('appear time(log_10)')
  211. v.set_xlabel('rank(log_10)')
  212. v.legend(loc='upper right')
  213.  
  214. # 取數值
  215. pyplot.figure(figsize=(0.001, 0.001))
  216.  
  217.  
  218. axList = []
  219.  
  220. for i in range(10):
  221.     pyplot.subplot(1, 10, i + 1)
  222.     axList.append(seaborn.regplot(logarithmic_resultList[i][0],
  223.                                   logarithmic_resultList[i][1],
  224.                                   scatter=False))
  225.  
  226. slopeList = []
  227.  
  228. for i in range(10):
  229.     slope, intercept = numpy.polyfit(axList[i].get_lines()[0].get_xdata(),
  230.                                      axList[i].get_lines()[0].get_ydata(),
  231.                                      1)
  232.     print(f"slope {i + 1} = {slope}")
  233.     slopeList.append(slope)
  234.  
  235. print('the average of slopes =', sum(slopeList) / 10)
  236.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement