import urllib.request import math def stockCorrelate(ticker1, ticker2): prefix = 'http://ichart.finance.yahoo.com/table.csv?s=' address1 = prefix + ticker1 address2 = prefix + ticker2 print('opening',address1) url1 = urllib.request.urlopen(address1) print('opening',address2) url2 = urllib.request.urlopen(address2) t1Data = url1.readlines() print(len(t1Data),'lines from',address1) t2Data = url2.readlines() print(len(t2Data),'lines from',address2) t1Data = [line.decode('ascii').split(',') for line in t1Data[1:] ] t2Data = [line.decode('ascii').split(',') for line in t2Data[1:] ] t1Close = [] t2Close = [] for i in range(min(len(t1Data), len(t2Data))): assert t1Data[i][0] == t2Data[i][0] t1Close.append(float(t1Data[i][6])) t2Close.append(float(t2Data[i][6])) return correlation(t1Close, t2Close) def correlation(xlist, ylist): xbar = mean(xlist) ybar = mean(ylist) xstd = standardDev(xlist) ystd = standardDev(ylist) num = 0.0 for i in range(len(xlist)): num = num + (xlist[i]-xbar) * (ylist[i]-ybar) corr = num / ((len(xlist)-1) * xstd * ystd) return corr def mean(alist): mean = sum(alist) / len(alist) return mean def standardDev(alist): theMean = mean(alist) sum = 0 for item in alist: difference = item - theMean diffsq = difference ** 2 sum = sum + diffsq sdev = math.sqrt(sum/(len(alist)-1)) return sdev