作者:白于空
关于3D彩票的介绍:略
摘要:根据遗漏模型;建立<期望遗漏,当前遗漏,平均遗漏,最大遗漏,遗漏方差>进行knn分类;挖掘模式;
直接在代码中传递表达信息;结论预测精度比期望值约高20%;
import numpy as np import pandas as pd data = pd.read_csv('F:/2014.csv') data = np.array(data.value) data = data[np.arange(0,300)] for i in np.arange(0,len(data)): data[i] = data[i].split(',') data[i] = np.array(data[i],dtype = 'int32') #到这里,301个数字已经完全提取完毕;开始计算遗漏值 ylz = {} for k in np.arange(10): ws = ylz[k] = [] #这里是起始空列表 for j in np.arange(0,len(data)) : if k in data[j]: ws.append(1) else: ws.append(0) ''' #对于0-1序列,返回其中首个1到最终添加1的间隔数; 例[0,1,0]目标返回[2,4,2];总长为len+1 [0,1]返回[4,1] ''' ylxl = {} for s in np.arange(10): ylxl[s] = [] ylz[s].append(1) ycc = ylz[s] for so in np.arange(0,len(ycc)): if ycc[so]==1: ylxl[s].append(so) else: pass zc = {} for oi in np.arange(10): tu = np.array(ylxl[oi]) zc[oi] = [tu[1:len(tu)] - tu[0:(len(tu)-1)]] zc[oi].insert(0,tu[0]) zc #记录各个值在3D彩票2014年中的历史遗漏序列 #开始记录每个值得历史遗漏,最大遗漏,平均遗漏,数学期望遗漏
上面的文件命名为zcp3d.py;另外excel历史数据表修改——将excel中对应值得一列命名为value;
开始进行主函数的计算:
import numpy as np import zcp3d2 ''' 每一次的zcc和yxdl都得重新计算生成新的序列集;;50-240 ''' def sd(c): c = np.array(c) for i in np.arange(len(c)-1): sd = np.sum((c[i]-np.mean(c))**2)/len(c) return sd # classify using kNN def Classify(newInput,dataSet,labels,k): numSamples = dataSet.shape[0] # shape[0] stands for the num of row diff = np.tile(newInput,(numSamples,1)) - dataSet squaredDiff = diff ** 2 squaredDist = np.sum(squaredDiff,axis = 1) distance = squaredDist ** 0.5 sortedDistIndices = np.argsort(distance) classCount = {} # define a dictionary (can be append element) for i in np.arange(k): voteLabel = labels[sortedDistIndices[i]] classCount[voteLabel] = classCount.get(voteLabel,0) + 1 maxCount = 0 for key,value in classCount.items(): if value > maxCount: maxCount = value maxIndex = key return maxIndex def qhj(knz): ylzc = ylz[knz] def train_set(p): ylzp = ylzc[0:p]#检测项就是ylzc[p+1]了;这里不能直接替换。 #开始对ylzc x_xsj = [] for l in np.arange(p): if ylzp[l]==1: x_xsj.append(l) else: pass tu = np.array(x_xsj) jh_xl = [tu[1:(len(tu)-1)] - tu[0:(len(tu)-2)]] #实现在数组中首单元插入新元素 zcc = [tu[0]] zcc.extend(jh_xl) zcc = np.array(zcc[1]) train_x = [np.mean(zcc),np.max(zcc),sd(zcc),np.min(zcc),zcc[(len(zcc)-1)]] return train_x train = [] lable = [] x = data.shape[0] def cesl(x): for ik in np.arange(50,x): train.append(train_set(ik)) lable.append(ylzc[ik+1]) #train试验集合 #lable目标集合 t = 1 #t取与数据集相差t内的单位,t小于70 dataSet,labels = np.array(train),lable testX = train_set(x+t) k = 9 outputLabel = Classify(testX,k) return outputLabel return cesl(x) jgyc = [] for u in np.arange(10): jgyc.append(qhj[u]) jgyc