07/27 17:10

import numpy
import cv2
import pandas
import seaborn
# import os
from matplotlib import pyplot

# path = "./data/"

# pictureList = []
# for filename in os.listdir(path):
#     if filename.endwith(".jpg"):
#         pictureList.append(cv2.imread(path + filename, cv2.IMREAD_GRAYSCALE))


picture = cv2.imread("1.jpg", cv2.IMREAD_GRAYSCALE)


# 資料處理

def heapslaw(originalGraph, xShrinkRate, yShrinkRate, xWordShrinkRate: int = 1, yWordShrinkRate: int = 1):

    xSize = len(originalGraph)
    ySize = len(originalGraph[0])

    fixed_xWordSize = xSize // xWordShrinkRate
    fixed_yWordSize = ySize // yWordShrinkRate

    letterDict = {}
    wordDict = {}

    for xi in range(fixed_xWordSize):
        for yi in range(fixed_yWordSize):
            word = 0
            for xj in range(xWordShrinkRate):
                for yj in range(yWordShrinkRate):
                    letter = originalGraph[xi * xWordShrinkRate + xj][yi * yWordShrinkRate + yj]
                    if letter not in letterDict:
                        letterDict[letter] = 0
                    letterDict[letter] += 1
                    word += letter
            if word not in wordDict:
                wordDict[word] = 0
            wordDict[word] += 1

    sorted_letterList = []
    sorted_wordList = []

    for k, v in letterDict.items():
        sorted_letterList.append((k, v))
    for k, v in wordDict.items():
        sorted_wordList.append((k, v))

    def cmp(val):
        return val[1]

    sorted_letterList.sort(reverse=True, key=cmp)
    sorted_wordList.sort(reverse=True, key=cmp)

    modified_letterDict = {}
    modified_wordDict = {}

    for i in range(len(sorted_letterList)):
        modified_letterDict[sorted_letterList[i][0]] = i
    for i in range(len(sorted_wordList)):
        modified_wordDict[sorted_wordList[i][0]] = i

    fixed_xSize = xSize // xShrinkRate
    fixed_ySize = ySize // yShrinkRate

    final_letterList = []
    final_wordList = []

    for xi in range(fixed_xSize):
        for yi in range(fixed_ySize):
            word = 0
            for xj in range(xShrinkRate):
                for yj in range(yShrinkRate):
                    letter = originalGraph[xi * xWordShrinkRate + xj][yi * yWordShrinkRate + yj]
                    word += letter
                    final_letterList.append(modified_letterDict[letter])
            word //= xShrinkRate * yShrinkRate
            final_wordList.append(modified_wordDict[word])

    pyplot.figure(figsize=(15, 5))
    pyplot.subplot(1, 2, 1)
    pyplot.scatter(final_wordList, final_letterList, s=0.)
    pyplot.title(f"Heaps\' law ({xShrinkRate} * {yShrinkRate})")
    pyplot.ylabel('rank')
    pyplot.xlabel('rank')


def toGraph(originalGraph, xShrinkRate, yShrinkRate):

    xSize = len(originalGraph)
    ySize = len(originalGraph[0])

    fixed_xSize = xSize // xShrinkRate
    fixed_ySize = ySize // yShrinkRate
    fixed_graph = numpy.zeros((fixed_xSize, fixed_ySize))

    for xi in range(fixed_xSize):
        for yi in range(fixed_ySize):
            for xj in range(xShrinkRate):
                for yj in range(yShrinkRate):
                    fixed_graph[xi][yi] += originalGraph[xi * xShrinkRate + xj][yi * yShrinkRate + yj]
            fixed_graph[xi][yi] //= xShrinkRate * yShrinkRate

    fixed_result = numpy.array(pandas.value_counts(fixed_graph.flatten()))
    returnVar = numpy.arange(1, len(fixed_result) + 1)

    pyplot.figure(figsize=(15, 5))
    pyplot.subplot(1, 2, 1)
    pyplot.plot(returnVar, fixed_result)
    pyplot.title(f"{xShrinkRate} * {yShrinkRate}")
    pyplot.ylabel('appear time')
    pyplot.xlabel('rank')

    pyplot.subplot(1, 2, 2)
    pyplot.plot(returnVar, fixed_result)
    pyplot.xscale('log')
    pyplot.yscale('log')
    pyplot.title(f"{xShrinkRate} * {yShrinkRate}")
    pyplot.ylabel('appear time(log_10)')
    pyplot.xlabel('rank(log_10)')

    return [returnVar, fixed_result]


trans_picture = picture.tolist()

fixed_resultList = []
for i in range(1, 10 + 1):
    fixed_resultList.append(toGraph(trans_picture, i, i))  # here
#   fixed_resultList.append(toGraph(trans_pictureList, 1, i))
#   fixed_resultList.append(toGraph(trans_pictureList, i, 1))


for i in range(1, 10 + 1):
    heapslaw(trans_picture, i, i)


# 畫總圖

pyplot.figure(figsize=(15, 5))
pyplot.subplot(1, 2, 1)
for i in fixed_resultList:
    pyplot.plot(i[0], i[1])
pyplot.title('total')
pyplot.ylabel('appear time')
pyplot.xlabel('rank')


pyplot.subplot(1, 2, 2)
for i in fixed_resultList:
    pyplot.plot(i[0], i[1])
pyplot.xscale('log')
pyplot.yscale('log')
pyplot.title('total')
pyplot.ylabel('appear time(log_10)')
pyplot.xlabel('rank(log_10)')


# 分析圖

logarithmic_resultList = []  # [x, y]
for i in fixed_resultList:
    logarithmic_resultList.append([numpy.log10(i[0]), numpy.log10(i[1])])


pyplot.figure(figsize=(15, 5))
pyplot.subplot(1, 2, 1)

for i in range(10):
    u = seaborn.regplot(logarithmic_resultList[i][0],
                        logarithmic_resultList[i][1],
                        label=f"{i + 1} * {i + 1}")
u.set_title('analyze')
u.set_ylabel('appear time(log_10)')
u.set_xlabel('rank(log_10)')
u.legend(loc='upper right')

# 線條
pyplot.subplot(1, 2, 2)

for i in range(10):
    v = seaborn.regplot(logarithmic_resultList[i][0],
                        logarithmic_resultList[i][1],
                        label=f"{i + 1} * {i + 1}",
                        scatter=False)
v.set_title('analyze')
v.set_ylabel('appear time(log_10)')
v.set_xlabel('rank(log_10)')
v.legend(loc='upper right')

# 取數值
pyplot.figure(figsize=(0.001, 0.001))


axList = []

for i in range(10):
    pyplot.subplot(1, 10, i + 1)
    axList.append(seaborn.regplot(logarithmic_resultList[i][0],
                                  logarithmic_resultList[i][1],
                                  scatter=False))

slopeList = []

for i in range(10):
    slope, intercept = numpy.polyfit(axList[i].get_lines()[0].get_xdata(),
                                     axList[i].get_lines()[0].get_ydata(),
                                     1)
    print(f"slope {i + 1} = {slope}")
    slopeList.append(slope)

print('the average of slopes =', sum(slopeList) / 10)