美文网首页
实验一 贝叶斯分类器的python实现

实验一 贝叶斯分类器的python实现

作者: 林木木_f297 | 来源:发表于2019-10-20 18:58 被阅读0次

题目描述

汽车评价数据集
共1728个数据,每个数据特征为6维,分为4类,类别标记为unacc,acc,good,V-good
四个类别标记分别表示汽车性价比等级(由低到高)
unacc:1210个
acc:384个
good:69个
V-good:65个
6个特征分别为:(6个属性)
1、buying (取值:v-high、high、med、low) 表示购买价格
2、maint (取值: v-high、high、med、low) 表示维修价格
3、door (取值:2、3、4、5-more) 车门数量
4、Persons (取值:2、4、more) 可容纳人数
5、Lug_boot (取值:small、med、big) 行李箱大小
Safety (取值:low、med、high) 安全系数
链接:http://archive.ics.uci.edu/ml/datasets/Car+Evaluation
实验完成要求:
1.仔细阅读并了解实验数据集;
2.使用任何一种熟悉的计算机语言(比如 C,Java或者MATLAB)实现朴素贝叶斯算法;
3.利用朴素贝叶斯算法在训练数据上学习分类器,训练数据的大小分别设置为:前100个数据,前200个数据,前500个数据,前700个数据。前1000个数据,前1350个数据;
4.利用测试数据对学习的分类器进行性能评估;
5.演示实验,提交代码,统计分析实验结果并上交实验报告;

开始做题

想要实现贝叶斯分类器,可以分为两个部分,一个是训练,另一部分是检验。
训练即将贝叶斯公式用代码语言描述,具体的贝叶斯公式这里就不赘述了
更艰难的是数据的分类,需要大量的重复性代码
其实绕来绕去就是一个统计+计算判断

代码实现

运行结果
import csv
import random
#import pandas
#数据导入及分成两份
def loadcsv(name):
    f = csv.reader(open(name,'r'))
    dataset = list(f)
    return dataset

def randDivision(dataset , trainSize):
    copy = list(dataset)
    train = []
    while len(train)<trainSize:
        index = random.randrange(len(copy))
        train.append(copy.pop(index))
    return [train, copy]


#初始化一些数据

data1 = [[0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0],[0.0,0.0,0.0],[0.0,0.0,0.0],[0.0,0.0,0.0]]
dataunacc = [[0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0],[0.0,0.0,0.0],[0.0,0.0,0.0],[0.0,0.0,0.0]]
dataacc = [[0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0],[0.0,0.0,0.0],[0.0,0.0,0.0],[0.0,0.0,0.0]]
datagood = [[0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0],[0.0,0.0,0.0],[0.0,0.0,0.0],[0.0,0.0,0.0]]
dataVgood = [[0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0],[0.0,0.0,0.0],[0.0,0.0,0.0],[0.0,0.0,0.0]]
datavip = [0,0,0,0]

#统计函数,将具体个数的多少进行统计

def stat(dataset):
    for i in dataset:
        count = i[0]
        maint = i[1]
        door = i[2]
        persons = i[3]
        lug = i[4]
        safty = i[5]
        vip = i[6]
        addcount(count, data1)
        addmaint(maint, data1)
        adddoor(door, data1)
        addperson(persons, data1)
        addlug(lug, data1)
        addsafty(safty, data1)
        if vip == 'unacc':
            datavip[0] = datavip[0]+1
            addcount(count, dataunacc)
            addmaint(maint, dataunacc)
            adddoor(door, dataunacc)
            addperson(persons, dataunacc)
            addlug(lug, dataunacc)
            addsafty(safty, dataunacc)
        elif vip == 'acc':
            datavip[1] = datavip[1] + 1
            addcount(count, dataacc)
            addmaint(maint, dataacc)
            adddoor(door, dataacc)
            addperson(persons, dataacc)
            addlug(lug, dataacc)
            addsafty(safty, dataacc)
        elif vip == 'good':
            datavip[2] = datavip[2] + 1
            addcount(count, datagood)
            addmaint(maint, datagood)
            adddoor(door, datagood)
            addperson(persons, datagood)
            addlug(lug, datagood)
            addsafty(safty, datagood)
        elif vip == 'vgood':
            datavip[3] = datavip[3] + 1
            addcount(count, dataVgood)
            addmaint(maint, dataVgood)
            adddoor(door, dataVgood)
            addperson(persons, dataVgood)
            addlug(lug, dataVgood)
            addsafty(safty, dataVgood)


##上面函数的仔函数

def addcount(count,data):
    if count == 'vhigh':
        data[0][0] = data[0][0]+1
    elif count == 'high':
        data[0][1] = data[0][1]+1
    elif count == 'med':
        data[0][2] = data[0][2] + 1
    elif count == 'low':
        data[0][3] = data[0][3] + 1
def addmaint(maint,data):
    if maint == 'vhigh':
        data[1][0] = data[1][0]+1
    elif maint == 'high':
        data[1][1] = data[1][1] + 1
    elif maint == 'med':
        data[1][2] = data[1][2] + 1
    elif maint == 'low':
        data[1][3] = data[1][3] + 1
def adddoor(door,data):
    if door == '2':
        data[2][0] = data[2][0] + 1
    elif door == '3':
        data[2][1] = data[2][1] + 1
    elif door == '4':
        data[2][2] = data[2][2] + 1
    elif door == '5more':
        data[2][3] = data[2][3] + 1
def addperson(persons,data):
    if persons == '2':
        data[3][0] = data[3][0] + 1
    elif persons == '4':
        data[3][1] = data[3][1] + 1
    elif persons == 'more':
        data[3][2] = data[3][2] + 1
def addlug(lug,data):
    if lug == 'small':
        data[4][0] = data[4][0] + 1
    elif lug == 'med':
        data[4][1] = data[4][1] + 1
    elif lug == 'big':
        data[4][2] = data[4][2] + 1
def addsafty(safty,data):
    if safty == 'low':
        data[5][0] = data[5][0] + 1
    elif safty == 'med':
        data[5][1] = data[5][1] + 1
    elif safty == 'high':
        data[5][2] = data[5][2] + 1

##将具体的个数转化为概率
def getP(num, data):
    for k in range(len(data)):
        for ii in range(len(data[k])):
            data[k][ii] = data[k][ii]/num


##以下的函数为概率返回函数,再检验时起到查表的作用
def getR0(data, t):
        if t == 'vhigh':
            return data[0][0]
        elif t == 'high':
            return data[0][1]
        elif t == 'med':
            return data[0][2]
        elif t == 'low':
            return data[0][3]


def getR1(data, maint):
    if maint == 'vhigh':
        return data[1][0]
    elif maint == 'high':
        return data[1][1]
    elif maint == 'med':
        return data[1][2]
    elif maint == 'low':
        return data[1][3]
    return 0


def getR2(data, door):
    if door == '2':
        return data[2][0]
    elif door == '3':
        return data[2][1]
    elif door == '4':
        return data[2][2]
    elif door == '5more':
        return data[2][3]


def getR3(data, persons):
    if persons == '2':
        return data[3][0]
    elif persons == '4':
        return data[3][1]
    elif persons == 'more':
        return data[3][2]


def getR4(data, lug):
    if lug == 'small':
        return data[4][0]
    elif lug == 'med':
        return data[4][1]
    elif lug == 'big':
        return data[4][2]


def getR5(data, safty):
    if safty == 'low':
        return data[5][0]
    elif safty == 'med':
        return data[5][1]
    elif safty == 'high':
        return data[5][2]


def getR6(num):
    if num == 0:
        return "unacc"
    if num == 1:
        return "acc"
    if num == 2:
        return "good"
    if num == 3:
        return "vgood"
rate = [0,0]

##检验函数
def test(testset):
    for line in testset:
        rate0 = datavip[0]*getR0(dataunacc,line[0])*getR1(dataunacc,line[1])*getR2(dataunacc,line[2])*getR3(dataunacc,line[3])*getR4(dataunacc,line[4])*getR5(dataunacc,line[5])
        rate1 = datavip[1]*getR0(dataacc,line[0])*getR1(dataacc,line[1])*getR2(dataacc,line[2])*getR3(dataacc,line[3])*getR4(dataacc,line[4])*getR5(dataacc,line[5])
        rate2 = datavip[2]*getR0(datagood,line[0])*getR1(datagood,line[1])*getR2(datagood,line[2])*getR3(datagood,line[3])*getR4(datagood,line[4])*getR5(datagood,line[5])
        rate3 = datavip[3]*getR0(dataVgood,line[0])*getR1(dataVgood,line[1])*getR2(dataVgood,line[2])*getR3(dataVgood,line[3])*getR4(dataVgood,line[4])*getR5(dataVgood,line[5])
        k = getbig(rate0,rate1,rate2,rate3)
        if line[6] != getR6(k):
            rate[0] = rate[0]+1
        else:
            rate[1] = rate[1]+1

##返回四个数中的最大值的下表
def getbig(r1,r2,r3,r4):
    k = max(r1, r2, r3, r4)
    if k == r1:
        return 0
    if k == r2:
        return 1
    if k == r3:
        return 2
    if k == r4:
        return 3

##总函数,调用上面的所有函数
def training(size):
    trainSet, testSet = randDivision(dataset, size)
    stat(trainSet)
    getP(datavip[0], dataunacc)
    getP(datavip[1], dataacc)
    getP(datavip[2], datagood)
    getP(datavip[3], dataVgood)
    test(testSet)
    Rate = rate[1] / (rate[0] + rate[1])
    print("当训练数据大小为{0}个时,剩余数据的检测正确率为{1}".format(size, Rate))


if __name__=="__main__":
    name = "..\..\data\car.csv"
    dataset = loadcsv(name)
    training(100)
    training(200)
    training(500)
    training(700)
    training(1350)




相关文章

网友评论

      本文标题:实验一 贝叶斯分类器的python实现

      本文链接:https://www.haomeiwen.com/subject/vaanmctx.html