Basic script

DIM a$(1000,1000)
DIM randnum(1000)
DIM randnum2(1000)
DIM av1(1000)
DIM av2(1000)
DIM avtot(1000)
DIM score1(1000)
DIM score2(1000)
DIM scoretot(1000)

numsplits=1000

Rem Count the number of rows AND columns in the comma-delimited text file we're inputting
Rem The csv FILES INPUT here DON'T have a comma at the end of the line

filedialog "Open","*.txt",file$
 IF file$="" THEN END

OPEN file$  FOR INPUT AS #f
'open "g:\data\funcfirstques.txt" for input as #f


Rem The NEXT lines of code READ in each LINE from the comma delimited file, AND count the lines.
WHILE NOT(EOF(#f))
    LINE INPUT #f, a$
    i=i+1
WEND

CLOSE #f

nrows=i

Rem Now we're going to take the last line of the file, make a little file out of it, and count the number of variables in it.

Rem Here we WRITE that one LINE TO a file called junk.txt.
OPEN "g:\data\junk.txt" FOR OUTPUT AS #1
PRINT#1, a$
CLOSE #1

Rem Now we OPEN that file, INPUT each comma-delimited variable, AND count AS we go.
OPEN "g:\data\junk.txt" FOR INPUT AS #1
WHILE NOT(EOF(#1))
INPUT #1, b$
k=k+1
WEND
CLOSE #1

rem We're not going to subtract 1 from k, the count of columns, because
rem when there IS NOT a comma at the END of the LINE, the LINE feed appears TO be INPUT AS one more a$.

ncolumns=k

PRINT "nrows= "; nrows
PRINT "ncolumns=";ncolumns

' Now let's check to make sure that the number of entries in our table equals the number that it
' should equal.

'Let's read the number of entries in our data file.
OPEN file$ FOR INPUT AS #f
WHILE NOT(EOF(#f))
INPUT#f, c$
m=m+1
WEND
CLOSE#f
nentries=m

'Let's make sure the number of entries equals the number of rows * (number of columns+1)
' The reason for adding 1 to the number of columns is that the line feeds are also counted as entries

PRINT "nentries=";nentries
IF nentries=nrows*(ncolumns) THEN
     PRINT "Columns, rows, and entries check; we're good to go."
ELSE
   PRINT "Columns, rows, and entries don't check; please look at your data file to make sure each line has equal no. of entries."
   END
END IF

Rem Now, knowing the number of rows AND columns, we're going to read the data file into an array, a$(i,j)
Rem where the order IS row, column.

OPEN file$ FOR INPUT AS #f
FOR i =1 TO nrows
rem In the NEXT LINE, we're not using ncolumns+1 because of the lack of comma at end of line
FOR j=1 TO ncolumns
   INPUT#f, a$(i,j)
NEXT j
NEXT i
CLOSE #f

rem Now LET's make sure we read the file in correctly.
FOR i=1 TO nrows
FOR j=1 TO ncolumns
    IF j<>ncolumns THEN
'    print a$(i,j);",";
    ELSE
'    print a$(i,j);chr$(10)
    END IF
NEXT j
NEXT i

nfirsthalf=INT(ncolumns/2)
nsechalf=ncolumns-nfirsthalf


sumr=0
sumsteppedr=0

Rem below begins the LOOP where we compute the correlation with a number of RANDOM splits.


FOR split=1 TO numsplits

firstsofar=0
sum1=0
count1=0
sum2=0
count2=0

[pickasplit]
' Now we're going to pick a random way of dividing the items into two halves.
' We're going to put int(ncolumns/2) items into the first half, and the rest into the second half.
' That means that if there is an even number of items, half go into the first half and half to the second.
' If there is an odd number, the smaller number of items go into the first half and the larger number to the second.
'The variables first$(i), where i goes from 1 to nfirsthalf, will hold the values in the first set.
'The variables second$(i), where i goes from 1 to nsechalf, will hold the values in the second set.
'When we separate the ncolumns integers into two sets, the integers we get will be
'used to designate the j values of the a$(i,j) variables that will become first$(i) and second$(i).


FOR i=1 TO nfirsthalf

randnum(i)=INT(ncolumns*RND(1))+1
rem Now LET's check to see that the column number isn't already spoken for.
rem firstsofar IS the number of keepers we've got so far
  [checktaken]
   taken$="no"
   FOR q=1 TO firstsofar
   IF randnum(i)=randnum(q) THEN taken$="yes"
   NEXT q
   IF taken$="yes" THEN randnum(i)=INT(ncolumns*RND(1))+1:GOTO [checktaken]
   rem randnum(i) IS a keeper IF we GET here
'   print "Keeper is";randnum(i)
   firstsofar=firstsofar+1

NEXT i

rem now LET's check to see if we've divided the ncolumns randomly for the first half at least
FOR i=1 TO nfirsthalf
'print randnum(i)
NEXT

Rem now LET's take the ncolumns integers and designate the ones not already chosen as randnum2(i)
Rem where i goes from 1 TO nsechalf.
Rem We'll just go from 1 to nsechalf, and check to see if each of these is taken.
Rem Each number that isn't already taken is assigned to randnum2(i).

j=0
FOR i=1 TO ncolumns
   taken$="no"
   FOR q=1 TO nfirsthalf
   IF randnum(q)=i THEN taken$="yes"
   NEXT q
IF taken$="no" THEN
j=j+1
randnum2(j)=i
END IF
NEXT i


Rem now LET's check to see if the item numbers for the second half were assigned correctly.
FOR i=1 TO nsechalf
'print "randnum2="; randnum2(i)
NEXT i

Rem Now we've got nfirsthalf item numbers in the first set, and nsechalf in the second set.
Rem These numbers constitute item numbers, where each row IS numbered from 1 TO ncolumns.
Rem Now we're going to compute averages for the first half and the second half.
Rem We'll do this by averaging the numbers that are nonmissing, and leaving out from the
rem averaging the numbers that are missing, which are labeled "n."


REm Here goes the averaging FOR the first half.

sum1=0
count1=0
FOR z=1 TO nrows
sum1=0
count1=0

FOR i=1 TO nfirsthalf

rem we're going to call t the column number

t=randnum(i)
IF a$(z,t)<>"n" THEN sum1=sum1+VAL(a$(z,t)):count1=count1+1
NEXT i
IF count1=0 THEN PRINT "a split where all were missing! line number=";z:count1=1
av1(z)=sum1/count1
score1(z)=av1(z)*nfirsthalf
NEXT z


Rem Here goes the averaging FOR the second half.

sum2=0
count2=0
FOR z=1 TO nrows
sum2=0
count2=0
FOR i=1 TO nsechalf
rem t IS still the column number
t=randnum2(i)
IF a$(z,t)<>"n" THEN sum2=sum2+VAL(a$(z,t)):count2=count2+1
NEXT i
av2(z)=sum2/count2
score2(z)=av2(z)*nsechalf
NEXT z

Rem LET's compute a score for the whole test, for each person, called scoretot()

FOR z=1 TO nrows
scoretot(z)=score1(z)+score2(z)
'print "score1=";score1(z);"score2=";score2(z);"scoretot=";scoretot(z)
NEXT z


Rem Now we're going to compute the split-half correlation for the split we used on this round.
REm We DO this by computing the Pearson corr, the s FOR the first half, the s FOR second half, AND s FOR total test

sumxy=0
sumx=0
sumy=0
sumx2=0
sumy2=0
sumscoretot2=0
sumscoretot=0


FOR i=1 TO nrows
sumxy=sumxy+score1(i)*score2(i)
sumx=sumx+score1(i)
sumy=sumy+score2(i)
sumx2=sumx2+(score1(i))^2
sumy2=sumy2+(score2(i))^2
sumscoretot2=sumscoretot2+scoretot(i)^2
sumscoretot=sumscoretot+scoretot(i)
NEXT i


r=(sumxy-sumx*sumy/nrows)/((sumx2-sumx^2/nrows)*(sumy2-sumy^2/nrows))^.5

sdforx=((1/nrows)*(sumx2-sumx^2/nrows))^.5
sdfory=((1/nrows)*(sumy2-sumy^2/nrows))^.5
varfortot=((1/nrows)*(sumscoretot2-sumscoretot^2/nrows))


rem LET's step up the r and accumulate the sum of the stepped up r's.

rem the following LINE IS the spearman-brown formula, which has been supplanted by the Flanagan AND Rulon formula
steppedrspear=(2*r)/(1+r)

rem Here we go with Flanagan AND Rulon formula FOR stepping up

'print "sdforx=";sdforx;"  sdfory=";sdfory;"  varfortot=";varfortot
steppedrrulon=4*r*sdforx*sdfory/varfortot

sumsteppedrspear=sumsteppedrspear+steppedrspear
sumsteppedrrulon=sumsteppedrrulon+steppedrrulon


NEXT split


rem now LET's report the average r


avsteppedrspear=sumsteppedrspear/numsplits

PRINT "average of stepped up r's, using Spearman method=";avsteppedrspear

avsteppedrrulon=sumsteppedrrulon/numsplits

PRINT "average of stepped up r's, using Rulon method="; avsteppedrrulon

Rem Now we're going to compute alpha assuming no missing values in the data set
Rem by a standard formula, so that we can compare the value with what we GET
rem by the averaging of stepped up split half reliabilities.
Rem varfortot IS already the variance of the total test.
REm ncolumns IS the number of items in the test.


rem now we're going to compute the variance of each item and sum the variances
sumvariances=0
FOR i = 1 TO ncolumns
sumfirsts=0
sumsq=0
variance=0
FOR j=1 TO nrows
sumfirsts=sumfirsts+VAL(a$(j,i))
sumsq=sumsq+(VAL(a$(j,i)))^2
NEXT j
sfs=sumfirsts^2
variance=(1/nrows)*(sumsq-sfs/nrows)
'print "variance(";i;")=";variance
sumvariances=sumvariances+variance
NEXT i

FOR i=1 TO ncolumns
mean=0
sumdevs2=0
sumfirsts=0
FOR j=1 TO nrows
sumfirsts=sumfirsts+VAL(a$(j,i))
NEXT j
mean=sumfirsts/nrows
FOR j=1 TO nrows
sumdevs2=sumdevs2+(VAL(a$(j,i))-mean)^2
NEXT j
sumvariance=sumvariance+sumdevs2/nrows
NEXT i


Rem now we compute alpha

alphatrad=(ncolumns/(ncolumns-1))*(1-sumvariances/varfortot)

'print "sumvariances=";sumvariances
'print "sumvariance=";sumvariance; "varfortot=";varfortot

PRINT "alphatrad="; alphatrad


END