Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- a = read.csv (file='results.csv')
- library("psych")
- library("dplyr")
- library("ggplot2")
- library("GGally")
- print ("Вариационный ряд")
- sort(a$Writing.Mean)
- n=65
- k = ceiling(log(n, base = 2) +1)-1
- h = (max(a$Writing.Mean)-min(a$Writing.Mean))/k
- x<-matrix( nrow = k, ncol = 3, byrow = T)
- j=min(a$Writing.Mean)
- for (i in 1:k) {
- x[i, 1] = j
- j=j+h
- x[i, 2] = j
- x[i, 3] = x[i, 1] + (h/2)
- }
- df = data.frame(From=x[,1], To=x[,2], Volume=0, MeanOfInterval=x[,3])
- for (i in 1:n) {
- if ((a$Writing.Mean[i]>= df[1,1])&&(a$Writing.Mean[i] < df[1,2])) {
- df[1,3]=df[1,3]+1
- }
- if ((a$Writing.Mean[i]>= df[2,1])&&(a$Writing.Mean[i] < df[2,2])) {
- df[2,3]=df[2,3]+1
- }
- if ((a$Writing.Mean[i]>= df[3,1])&&(a$Writing.Mean[i] < df[3,2])) {
- df[3,3]=df[3,3]+1
- }
- if ((a$Writing.Mean[i]>= df[4,1])&&(a$Writing.Mean[i] < df[4,2])) {
- df[4,3]=df[4,3]+1
- }
- if ((a$Writing.Mean[i]>= df[5,1])&&(a$Writing.Mean[i] < df[5,2])) {
- df[5,3]=df[5,3]+1
- }
- if ((a$Writing.Mean[i]>= df[6,1])&&(a$Writing.Mean[i] < df[6,2])) {
- df[6,3]=df[6,3]+1
- }
- if ((a$Writing.Mean[i]>= df[7,1])&&(a$Writing.Mean[i] <= df[7,2]+1)) {
- df[7,3]=df[7,3]+1
- }
- }
- print ("Интервальный ряд: ")
- df
- qplot(data=df, xlab="Средняя оценка за Writing, баллов", ylab="Количество", main="Данные выпускников города New York", geom="path", MeanOfInterval, Volume)
- barplot(height = df$Volume, names.arg = df$MeanOfInterval)
- plot(sort(a$Writing.Mean), (1:n)/n, type="S", col="seagreen", main="ЭФР абсолютных частот", xlab="", ylab="")
- plot(sort(df$Volume), (1:k)/k, type="S", col="seagreen", main="ЭФР относительных частот", xlab="", ylab="")
- # Лабароторная работа №2
- N = 65;
- n = df$Volume
- x = df$MeanOfInterval
- xsr = 0;
- for (j in 1:k) {
- xsr = xsr + x[j]*n[j];
- }
- xsr = xsr / N;
- print ("Математическое ожидание")
- xsr
- df = data.frame(df, A = 0)
- df[1,5] = df[1,3]
- for (i in 2:k) {
- df[i,5] = df[i-1,5] + df[i,3]
- }
- C = df[4,4]
- z = array(0, dim=c(1))
- for (i in 1:k) {
- z[i] = (x[i] - C)/h
- }
- M = array(0, dim=c(1))
- for (i in 1:4) {
- M[i] = 0;
- for (j in 1:k) {
- M[i] = M[i] + n[j]*z[j]^i
- }
- M[i] = M[i]/N;
- }
- m = array(0, dim=c(1))
- for (i in 1:4) {
- m[i] = 0;
- for (j in 1:k) {
- m[i] = m[i] + n[j]*(x[j]-xsr)^i;
- }
- m[i] = m[i]/N;
- }
- #СКО
- Dv = m[2];
- S = ((N/(N-1))*Dv)^(1/2);
- print ("Дисперсия: ")
- S^2
- #Ассиметрия
- As = m[3]/(S^3)
- print ("Ассиметрия: ")
- As
- #Эксцесс
- Ex = (m[4]/(S^4))-3
- print ("Эксцес: ")
- Ex
- #Мода
- Mo = df$From[2] + h * ((df$Volume[2]-df$Volume[1])/((df$Volume[2]-df$Volume[1])+(df$Volume[2]-df$Volume[3])))
- print ("Мода: ")
- Mo
- #Медиана
- Me = df$From[2] + h * (df$A[5]/2 - df$A[1]) / df$A[2]
- print ("Медиана: ")
- Me
- # Лабараторная работа №3
- #MO отрезки
- G = array(0, dim=c(1))
- G[1] = 0.95
- G[2] = 0.99
- G[3] = 0.999
- t = array(0, dim=c(1))
- t[1] = 1.996
- t[2] = 2.649
- t[3] = 3.439
- f =array(0, dim=c(1))
- for (i in 1:3) {
- f[i] = t[i]*S/sqrt(N)
- }
- a1 = array (0, dim=c(1))
- a1[1] = xsr - f[1]
- a1[2] = xsr - f[2]
- a1[3] = xsr - f[3]
- a2 = array (0, dim=c(1))
- a2[1] = xsr + f[1]
- a2[2] = xsr + f[2]
- a2[3] = xsr + f[3]
- df2 = data.frame(G=G, t=t, S=S, f=f, a1=a1, a2=a2)
- df2
- #СКО отрезки
- G2 = array (0, dim=c(1))
- G2[1] = 0.95
- G2[2] = 0.99
- q = array (0, dim=c(1))
- q[1] = 0.174
- q[2] = 0.245
- s1 = array (0, dim=c(1))
- s1[1] = S*(1-q[1])
- s1[2] = S*(1-q[2])
- s2 = array (0, dim=c(1))
- s2[1] = S*(1+q[1])
- s2[2] = S*(1+q[2])
- df3 = data.frame (G=G2, q=q, s1=s1, s2=s2)
- df3
- # Гипотеза
- # https://www.matburo.ru/Examples/Files/ms_pg_3.pdf
- x
- u = array (0, dim=c(1))
- for (i in 1:k) {
- u[i] = (x[i] - xsr)/S
- }
- fi = array (0, dim=c(1))
- for (i in 1:k) {
- fi[i] = (1/sqrt(2*pi))*exp(-u[i]^2/2)
- }
- n0 = array (0, dim=c(1))
- for (i in 1:k) {
- n0[i] = N*h*fi[i]/S
- }
- f2 = array (0, dim=c(1))
- for (i in 1:k) {
- f2[i] = (n[i] - n0[i])^2/n0[i]
- }
- hi2_nabl = 0
- for (i in 1:k) {
- hi2_nabl = hi2_nabl + f2[i]
- }
- df4 = data.frame (x=x, u=u, fi=fi, n0=n0, n=n, f2)
- df4 # для рассчета критерия хи квадрат
- hi2_nabl
- hi2_krit = 9.5
- # Лабараторная работа №4
- h2 = (max(a$Mathematics.Mean)-min(a$Mathematics.Mean))/k
- x2<-matrix( nrow = k, ncol = 3, byrow = T)
- j2=min(a$Mathematics.Mean)
- for (i in 1:k) {
- x2[i, 1] = j2
- j2=j2+h2
- x2[i, 2] = j2
- x2[i, 3] = x2[i, 1] + (h2/2)
- }
- df5 = data.frame(From=x2[,1], To=x2[,2], Volume=0, MeanOfInterval=x2[,3])
- for (i in 1:N) {
- if ((a$Mathematics.Mean[i]>= df5[1,1])&&(a$Mathematics.Mean[i] < df5[1,2])) {
- df5[1,3]=df5[1,3]+1
- }
- if ((a$Mathematics.Mean[i]>= df5[2,1])&&(a$Mathematics.Mean[i] < df5[2,2])) {
- df5[2,3]=df5[2,3]+1
- }
- if ((a$Mathematics.Mean[i]>= df5[3,1])&&(a$Mathematics.Mean[i] < df5[3,2])) {
- df5[3,3]=df5[3,3]+1
- }
- if ((a$Mathematics.Mean[i]>= df5[4,1])&&(a$Mathematics.Mean[i] < df5[4,2])) {
- df5[4,3]=df5[4,3]+1
- }
- if ((a$Mathematics.Mean[i]>= df5[5,1])&&(a$Mathematics.Mean[i] < df5[5,2])) {
- df5[5,3]=df5[5,3]+1
- }
- if ((a$Mathematics.Mean[i]>= df5[6,1])&&(a$Mathematics.Mean[i] < df5[6,2])) {
- df5[6,3]=df5[6,3]+1
- }
- if ((a$Mathematics.Mean[i]>= df5[7,1])&&(a$Mathematics.Mean[i] <= df5[7,2]+1)) {
- df5[7,3]=df5[7,3]+1
- }
- }
- df5 # интервалы для у
- n2 = df5$Volume
- y = df5$MeanOfInterval
- ysr = 0;
- for (j in 1:k) {
- ysr = ysr + y[j]*n2[j];
- }
- ysr = ysr / N;
- df5 = data.frame(df5, A = 0)
- df5[1,5] = df5[1,3]
- for (i in 2:k) {
- df5[i,5] = df5[i-1,5] + df5[i,3]
- }
- C2 = df5[4,4]
- z2 = array(0, dim=c(1))
- for (i in 1:k) {
- z2[i] = (y[i] - C2)/h2
- }
- M2 = array(0, dim=c(1))
- for (i in 1:4) {
- M2[i] = 0;
- for (j in 1:k) {
- M2[i] = M2[i] + n2[j]*z2[j]^i
- }
- M2[i] = M2[i]/N;
- }
- m2 = array(0, dim=c(1))
- for (i in 1:4) {
- m2[i] = 0;
- for (j in 1:k) {
- m2[i] = m2[i] + n2[j]*(y[j]-ysr)^i;
- }
- m2[i] = m2[i]/N;
- }
- S2 = ((N/(N-1))*m2[2])^(1/2);
- S2 # дисперсия у
- column1 = array (0, dim=c(1))
- for (i in 1:k) column1[i]=0
- column2 = array (0, dim=c(1))
- for (i in 1:k) column1[i]=0
- column3 = array (0, dim=c(1))
- for (i in 1:k) column1[i]=0
- column4 = array (0, dim=c(1))
- for (i in 1:k) column1[i]=0
- column5 = array (0, dim=c(1))
- for (i in 1:k) column1[i]=0
- column6 = array (0, dim=c(1))
- for (i in 1:k) column1[i]=0
- column7 = array (0, dim=c(1))
- for (i in 1:k) column1[i]=0
- df6 = data.frame (one=column1, two=column2, three=column3, four=column4, five=column5, six=column6, seven=column7) # для частот
- xx=a$Writing.Mean # начальные x
- yy=a$Mathematics.Mean # начальные y
- df
- NN = 0 # для проверки
- kk = k-1
- #заполняем таблицу частот
- for (i in 1:N) {
- for (j in 1:kk) {
- if ((xx[i]>=df$From[j])&&(xx[i]<df$To[j])) {
- for (p in 1:kk) {
- if ((yy[i]>=df5$From[p])&&(yy[i]<df5$To[p])) {
- df6[p,j]=df6[p,j]+1
- NN = NN +1
- }
- }
- if ((yy[i]>=df5$From[k])&&(yy[i]<=df5$To[k])) {
- df6[k,j]=df6[k,j]+1
- NN = NN +1
- }
- }
- }
- if ((xx[i]>=df$From[k])&&(xx[i]<=df$To[k])) {
- for (p in 1:kk) {
- if ((yy[i]>=df5$From[p])&&(yy[i]<df5$To[p])) {
- df6[p,k]=df6[p,k]+1
- NN = NN +1
- }
- }
- if ((yy[i]>=df5$From[k])&&(yy[i]<=df5$To[k])) {
- df6[k,k]=df6[k,k]+1
- NN = NN +1
- }
- }
- }
- df6 #таблица частот
- ############
- x_kryshka = array(0, dim=c(1))
- x_kryshka[1] = 0
- x_kryshka[2] = 0
- x_kryshka[3] = 0
- x_kryshka[4] = 0
- x_kryshka[5] = 0
- x_kryshka[6] = 0
- x_kryshka[7] = 0
- for (i in 1:k)
- for (j in 1:k) {
- x_kryshka[i] = x_kryshka[i] +x[j]*df6[i,j]
- }
- x_kryshka
- wxy = 0
- for (i in 1:k)
- wxy = wxy + y[i]*x_kryshka[i]
- wxy
- ##########
- y_kryshka = array(0, dim=c(1))
- y_kryshka[1] = 0
- y_kryshka[2] = 0
- y_kryshka[3] = 0
- y_kryshka[4] = 0
- y_kryshka[5] = 0
- y_kryshka[6] = 0
- y_kryshka[7] = 0
- y_kryshka
- for (j in 1:k)
- for (i in 1:k) {
- y_kryshka[j] = y_kryshka[j] +(y[i]*df6[i,j])
- }
- y_kryshka
- wxy2 = 0
- for (i in 1:k)
- wxy2 = wxy2 + x[i]*y_kryshka[i]
- wxy2
- ##########
- wxy3 = 0
- for (i in 1:k)
- for (j in 1:k)
- wxy3=wxy3+df6[i,j]*x[j]*y[i]
- wxy3
- ##########
- mxy = wxy/N - xsr*ysr
- mxy
- rxy = mxy/(S*S2)
- rxy
- # доверительный интервал для коэффициента корреляции
- z = 0.5 * log (((1+rxy)/(1-rxy)), base = exp(1))
- SE = (k-3)^(-1/2)
- SE
- T_krit = 2.57
- z1 = z - T_krit*SE
- z2 = z = T_krit*SE
- tanh(z1) #интервал от этого числа
- tanh(z2) #и до этого
- # проверка статистической гипотезы о равенстве коэффициента корреляции нулю
- T_nabl = rxy * (sqrt(k-2)/sqrt(1-rxy^2))
- T_nabl
- T_krit = 2.57
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement