In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
import statsmodels.api as sm
import statsmodels.formula.api as smf
#import wave
import scipy 
from patsy.contrasts import Treatment
from patsy import dmatrix
from patsy.contrasts import Poly
from patsy.contrasts import ContrastMatrix
%matplotlib inline
In [165]:
def write_read(df=dict, csvfilename=str):
    """
    Takes a pandas df, saves it to csvfilename, and then reads it back in. 
    Takes care of eliminating the index row for spss and R users.

    """
    df.to_csv(csvfilename, index=False)
    df = pd.read_csv(csvfilename)
    return df
df = pd.read_csv("pebl.csv")

#The pebl dataFrames:
#toh = pd.read_csv("toh.csv")
#dspan=pd.read_csv("dspan.csv")
corsi =pd.read_csv("corsi.csv")

#stick the pebl trials and cumsum into the pebl dataframe.
#c_cs = []
#c_tr = []
#for i in np.unique(corsi["subnum"]):
#    c_cs.append(np.max(corsi[corsi["subnum"]==i]["cumsum"]))
#    c_tr.append(np.max(corsi[corsi["subnum"]==i]["trial"]))
#for i in np.unique(dspan["subnum"]):
#    c_cs.append(np.max(dspan[dspan["subnum"]==i]["cumsum"]))
#    c_tr.append(np.max(dspan[dspan["subnum"]==i]["trial"]))
#for i in np.unique(toh["subnum"]):
#    c_cs.append(np.max(toh[toh["subnum"]==i]["cumsum"]))
#    c_tr.append(np.max(toh[toh["subnum"]==i]["trial"]))
#pd.DataFrame(c_cs, c_tr).to_csv("test.csv")
In [163]:
#toh[toh["subnum"]==96]

1. Justification for the scoring technique on the PEBL

$y=m*x+b$

Imagine a right triangle with points at (0,0), (14, 0 ), (37,0). Take a look at the figure below:

In [154]:
fig = plt.figure()
plt.axes()
ax = plt.axes(xlim=(0, 20), ylim=(0, 80))
points = [[0, 0], [14, 0], [14, 37], [0,0]]
line = plt.Polygon(points, color="red", alpha = 0.5, fill=True)#closed=None,
ax.add_patch(line)
plt.show()

The triangle above has a base length of 14, a height of 37, and a hypotenuse length of 39.56.

  • len(a) = 14
  • len(b) = 37
  • len(c) = 39.56 (via the Pythagorean theorem)

The base length corresponds to the amount of trials a participant did. The height for a participant corresponds to the sum of the product of the span length attempts and the accuracy on the attempt:

$ height = \sum\limits_{i=1}^{n=96} (corrLen_i * acc_i ) $

E.g. A participant attempted to complete spans of length 1, 2, and 3:

$span = \begin{bmatrix} 1\\ 2\\ 3\\ \end{bmatrix}$

He has a corresponding accuracy vector:

$acc = \begin{bmatrix} 1\\ 0\\ 1\\ \end{bmatrix}$

Therefore, his the vector for calculating the cumulating-running sum, e.g. cumsum, is:

$corrLen = \begin{bmatrix} 1\\ 0\\ 3\\ \end{bmatrix}$

Thus, the final cumsum vector for participant $_i$:

$cumsum = \begin{bmatrix} 1\\ 1\\ 4\\ \end{bmatrix}$.

And the max of the cumsum vector is the height for the triangle:

$height = \begin{bmatrix} 4\\ \end{bmatrix}$.

Below, we will work through the details for a real participant $(n = 1)$ on the corsi-block-tapping task. The particpants triangle measures are given above in the figure.

In [166]:
#The participants un-normed hypotenuse length is:
np.sqrt((float(len(corsi[corsi["subnum"]==1]["subnum"]))**2 + np.max(corsi[corsi["subnum"]==1]["cumsum"])**2))
Out[166]:
39.560080889704963
In [167]:
#His slope is equal to 
37./14
Out[167]:
2.642857142857143

A. Norming the triangle

We can scale the triangle by 14 and create a similiar triangle within its own unit circle.

e.g.

  • len(a) = $14/14 = 1$
  • len(b) = $37/14$
  • len(c) = $39.5600/14.$ (via the Pythagorean theorem)

Preliminary proof that we have a similar triangle:

$39.5600 / 14. = 2.8257$

In [165]:
#the Pythagorean theorem on the scaled triangle:
np.sqrt(1**2 + (37./14)**2)  
Out[165]:
2.8257200635503548
In [173]:
#For each unit in the base length, we should move 2.8257 units to acheive a hypotenuse length for a similar triangle.
#Thus, moving 14 units, we should find the original unnormed hypotenuse, e.g. 39.5600:
14 * 2.8257200635503548
Out[173]:
39.56008088970496

End Section.



2. Visualizing the slope in the unit circle

In [174]:
def see_slope(task=str):
    """
    
    """
    a=[]
    b=[]
    for i in df[df["task"]==task]["trials"]:
        a.append(i)
    for i in df[df["task"]==task]["cumsum"]:
        b.append(i)
    a_normed=[]
    b_normed=[]
    c_normed = []
    for i in range(len(a)):
        a_normed.append(a[i]/a[i])
        b_normed.append(b[i] / float(a[i]))
    for x in range(len(b_normed)):
        c_normed.append(np.sqrt(1.0+(b_normed[x]**2)))
    fig = plt.figure()
    plt.axes()
    ax = plt.axes(xlim=(0, 1), ylim=(0, 5))
    for x in range(len(b_normed)):
        points = [[0, 0], [a_normed[x], 0], [a_normed[x], b_normed[x]], [0,0]]
        line = plt.Polygon(points, color="blue", alpha = 0.2, fill=False)#closed=None,
        ax.add_patch(line)
    points = [[0, 0], [a_normed[x], 0], [a_normed[x], np.mean(c_normed)], [0,0]] 
    line = plt.Polygon(points, color="red", alpha = 1.0, fill=False)#closed=None,
    ax.add_patch(line)
    plt.show()
    return a_normed, b_normed, c_normed
In [175]:
a_normed, b_normed, c_normed = see_slope("corsi")
In [176]:
a_normed1, b_normed1, c_normed1 = see_slope("dspan")
In [177]:
a_normed2, b_normed2, c_normed2 = see_slope("toh")
In [178]:
#Putting the normed vectors into the DF 
normed1 = np.array([a_normed + a_normed1 + a_normed2])
normed2 = np.array([b_normed + b_normed1 + b_normed2])
normed3 = np.array([c_normed + c_normed1 + c_normed2])
df["anormed"] = normed1[0]
df["bnormed"] = normed2[0]
df["cnormed"] = normed3[0]
In [179]:
#Check out the Df for a subject
df[df["subnum"]==96]
Out[179]:
hypotenuse trials cumsum max_two_corr product_score simple_max slope span subnum task total_correct math sem vis reg exp anormed bnormed cnormed
94 43.324358 14 41 5.0 70 7 2.928571 2 96 corsi 10 0.416667 0.916667 1.0 1.0 eit 1 2.928571 3.094597
188 42.154478 16 39 5.0 48 6 2.437500 4 96 dspan 8 0.416667 0.916667 1.0 1.0 eit 1 2.437500 2.634655
282 29.546573 13 27 7.0 7 35 2.250000 4 96 toh 5 0.416667 0.916667 1.0 1.0 eit 1 2.076923 2.305127
In [180]:
#Optional write the dataframe to the results folder
#df = write_read(df, "pebl.csv")

#Model the regular condition as a function of the psychometric battery and the visual categorization 
#experiment that took part in:

#md = smf.mixedlm("reg ~ task * product_score * exp ", data=df, groups="subnum")
#mdf = md.fit()
#mdf.summary()
In [181]:
#Look at the intersection of the regular scores on the VRT and the scores on the cnormed in the psycometric battery:
sns.distplot(df[df["exp"]=="vrt"]["reg"]);
sns.distplot(df[df["exp"]=="vrt"]["cnormed"]);
sns.jointplot(df[df["exp"]=="vrt"]["reg"], df[df["exp"]=="vrt"]["cnormed"]);