Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy
- import cv2
- import pandas
- import seaborn
- # import os
- from matplotlib import pyplot
- # path = "./data/"
- # pictureList = []
- # for filename in os.listdir(path):
- # if filename.endwith(".jpg"):
- # pictureList.append(cv2.imread(path + filename, cv2.IMREAD_GRAYSCALE))
- picture = cv2.imread("1.jpg", cv2.IMREAD_GRAYSCALE)
- # 資料處理
- def heapslaw(originalGraph, xShrinkRate, yShrinkRate, xWordShrinkRate: int = 1, yWordShrinkRate: int = 1):
- xSize = len(originalGraph)
- ySize = len(originalGraph[0])
- fixed_xWordSize = xSize // xWordShrinkRate
- fixed_yWordSize = ySize // yWordShrinkRate
- letterDict = {}
- wordDict = {}
- for xi in range(fixed_xWordSize):
- for yi in range(fixed_yWordSize):
- word = 0
- for xj in range(xWordShrinkRate):
- for yj in range(yWordShrinkRate):
- letter = originalGraph[xi * xWordShrinkRate + xj][yi * yWordShrinkRate + yj]
- if letter not in letterDict:
- letterDict[letter] = 0
- letterDict[letter] += 1
- word += letter
- if word not in wordDict:
- wordDict[word] = 0
- wordDict[word] += 1
- sorted_letterList = []
- sorted_wordList = []
- for k, v in letterDict.items():
- sorted_letterList.append((k, v))
- for k, v in wordDict.items():
- sorted_wordList.append((k, v))
- def cmp(val):
- return val[1]
- sorted_letterList.sort(reverse=True, key=cmp)
- sorted_wordList.sort(reverse=True, key=cmp)
- modified_letterDict = {}
- modified_wordDict = {}
- for i in range(len(sorted_letterList)):
- modified_letterDict[sorted_letterList[i][0]] = i
- for i in range(len(sorted_wordList)):
- modified_wordDict[sorted_wordList[i][0]] = i
- fixed_xSize = xSize // xShrinkRate
- fixed_ySize = ySize // yShrinkRate
- final_letterList = []
- final_wordList = []
- for xi in range(fixed_xSize):
- for yi in range(fixed_ySize):
- word = 0
- for xj in range(xShrinkRate):
- for yj in range(yShrinkRate):
- letter = originalGraph[xi * xWordShrinkRate + xj][yi * yWordShrinkRate + yj]
- word += letter
- final_letterList.append(modified_letterDict[letter])
- word //= xShrinkRate * yShrinkRate
- final_wordList.append(modified_wordDict[word])
- pyplot.figure(figsize=(15, 5))
- pyplot.subplot(1, 2, 1)
- pyplot.scatter(final_wordList, final_letterList, s=0.)
- pyplot.title(f"Heaps\' law ({xShrinkRate} * {yShrinkRate})")
- pyplot.ylabel('rank')
- pyplot.xlabel('rank')
- def toGraph(originalGraph, xShrinkRate, yShrinkRate):
- xSize = len(originalGraph)
- ySize = len(originalGraph[0])
- fixed_xSize = xSize // xShrinkRate
- fixed_ySize = ySize // yShrinkRate
- fixed_graph = numpy.zeros((fixed_xSize, fixed_ySize))
- for xi in range(fixed_xSize):
- for yi in range(fixed_ySize):
- for xj in range(xShrinkRate):
- for yj in range(yShrinkRate):
- fixed_graph[xi][yi] += originalGraph[xi * xShrinkRate + xj][yi * yShrinkRate + yj]
- fixed_graph[xi][yi] //= xShrinkRate * yShrinkRate
- fixed_result = numpy.array(pandas.value_counts(fixed_graph.flatten()))
- returnVar = numpy.arange(1, len(fixed_result) + 1)
- pyplot.figure(figsize=(15, 5))
- pyplot.subplot(1, 2, 1)
- pyplot.plot(returnVar, fixed_result)
- pyplot.title(f"{xShrinkRate} * {yShrinkRate}")
- pyplot.ylabel('appear time')
- pyplot.xlabel('rank')
- pyplot.subplot(1, 2, 2)
- pyplot.plot(returnVar, fixed_result)
- pyplot.xscale('log')
- pyplot.yscale('log')
- pyplot.title(f"{xShrinkRate} * {yShrinkRate}")
- pyplot.ylabel('appear time(log_10)')
- pyplot.xlabel('rank(log_10)')
- return [returnVar, fixed_result]
- trans_picture = picture.tolist()
- fixed_resultList = []
- for i in range(1, 10 + 1):
- fixed_resultList.append(toGraph(trans_picture, i, i)) # here
- # fixed_resultList.append(toGraph(trans_pictureList, 1, i))
- # fixed_resultList.append(toGraph(trans_pictureList, i, 1))
- for i in range(1, 10 + 1):
- heapslaw(trans_picture, i, i)
- # 畫總圖
- pyplot.figure(figsize=(15, 5))
- pyplot.subplot(1, 2, 1)
- for i in fixed_resultList:
- pyplot.plot(i[0], i[1])
- pyplot.title('total')
- pyplot.ylabel('appear time')
- pyplot.xlabel('rank')
- pyplot.subplot(1, 2, 2)
- for i in fixed_resultList:
- pyplot.plot(i[0], i[1])
- pyplot.xscale('log')
- pyplot.yscale('log')
- pyplot.title('total')
- pyplot.ylabel('appear time(log_10)')
- pyplot.xlabel('rank(log_10)')
- # 分析圖
- logarithmic_resultList = [] # [x, y]
- for i in fixed_resultList:
- logarithmic_resultList.append([numpy.log10(i[0]), numpy.log10(i[1])])
- pyplot.figure(figsize=(15, 5))
- pyplot.subplot(1, 2, 1)
- for i in range(10):
- u = seaborn.regplot(logarithmic_resultList[i][0],
- logarithmic_resultList[i][1],
- label=f"{i + 1} * {i + 1}")
- u.set_title('analyze')
- u.set_ylabel('appear time(log_10)')
- u.set_xlabel('rank(log_10)')
- u.legend(loc='upper right')
- # 線條
- pyplot.subplot(1, 2, 2)
- for i in range(10):
- v = seaborn.regplot(logarithmic_resultList[i][0],
- logarithmic_resultList[i][1],
- label=f"{i + 1} * {i + 1}",
- scatter=False)
- v.set_title('analyze')
- v.set_ylabel('appear time(log_10)')
- v.set_xlabel('rank(log_10)')
- v.legend(loc='upper right')
- # 取數值
- pyplot.figure(figsize=(0.001, 0.001))
- axList = []
- for i in range(10):
- pyplot.subplot(1, 10, i + 1)
- axList.append(seaborn.regplot(logarithmic_resultList[i][0],
- logarithmic_resultList[i][1],
- scatter=False))
- slopeList = []
- for i in range(10):
- slope, intercept = numpy.polyfit(axList[i].get_lines()[0].get_xdata(),
- axList[i].get_lines()[0].get_ydata(),
- 1)
- print(f"slope {i + 1} = {slope}")
- slopeList.append(slope)
- print('the average of slopes =', sum(slopeList) / 10)
Add Comment
Please, Sign In to add comment