{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from sklearn import datasets\n", "import numpy as np\n", "import math" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def load_iris(ratio=0.8):\n", " features, target = datasets.load_iris(True)\n", " \n", " num_samples = len(target)\n", " num_train = math.ceil(num_samples * ratio)\n", " \n", " # 随机打乱数据\n", " idx = np.random.permutation(np.arange(num_samples))\n", " traindata = features[idx[:num_train]], target[idx[:num_train]]\n", " validdata = features[idx[num_train:]], target[idx[num_train:]]\n", " \n", " return traindata, validdata" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 作业三\n", "\n", "## 二、朴素贝叶斯分类器\n", "\n", "要求:\n", "\n", "* 用朴素贝叶斯构造一个iris数据集的分类器\n", "* 在尽量不修改代码结构的前提下完成工作\n", "\n", "ETA:1-5 hours" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 定义模型" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# 只需要修改这一部分 -- 代码量在40行以内\n", "class NaiveBayes:\n", " def __init__(self, smooth = 1):\n", " self.smooth = smooth # lambda\n", " self.conditional_prob = None # 条件概率\n", " self.prior_prob = None # 先验概率\n", " \n", " \n", " def __call__(self, features):\n", " return self.predict(features)\n", " \n", " \n", " def fit(self, features, target):\n", " \"\"\"\n", " \n", " 给定特征及真实结果,拟合分类器\n", " \n", " 将预测过程中所需要用到的条件概率及先验概率全部计算好\n", " \"\"\"\n", " \n", " self.num_features = features.shape[-1] # 特征的数目;iris数据集中共有4个特征\n", " self.target_labels = np.unique(target) # 预测结果的可能值:c_k\n", " # features_labels[j][l]表示第 j 个特征的第 l 个可能值: a_{jl}\n", " self.features_labels = [np.unique(features[:, i]) for i in range(features.shape[-1])]\n", " \n", " self.conditional_prob = self._conditional_prob(features, target)\n", " self.prior_prob = self._prior_prob(target)\n", " \n", " \n", " def predict(self, features):\n", " # 预测单个数据\n", " if len(features.shape) == 1:\n", " return np.array([self._predict_single(features)])\n", " # 批量预测\n", " elif len(features.shape) == 2:\n", " N = features.shape[0]\n", " return np.array([self._predict_single(features[i, :]) for i in range(N)])\n", " else:\n", " raise(ValueError(\"Unsupported features size, should be 1 or 2 dimensional\"))\n", " \n", " \n", " def _predict_single(self, feature):\n", " # 实现它\n", " raise(NotImplementedError())\n", " \n", " \n", " def _prior_prob(self, target):\n", " # 利用式4.11估计先验概率\n", " N = len(target)\n", " K = len(self.target_labels)\n", " \n", " # 实现它\n", " raise(NotImplementedError())\n", " \n", " return prob\n", " \n", " \n", " def _conditional_prob(self, features, target):\n", " # 利用式4.10估计条件概率\n", " # prob[k][j][l] is P(X^{(j)} = a_{jl} | Y = c_k)\n", " prob = [[np.zeros(feature.shape) for feature in self.features_labels] \n", " for _ in self.target_labels] # prob里一共存储了多少个数?\n", " \n", " # 实现它\n", " raise(NotImplementedError())\n", " \n", " return prob" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "读取数据" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "(X_train, Y_train), (X_valid, Y_valid) = load_iris()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "创建模型并拟合数据" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "model = NaiveBayes()\n", "model.fit(X_train, Y_train)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "预测结果" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "accuracy: 0.8333\n" ] } ], "source": [ "Y_pred = model.predict(X_valid)\n", "accuracy = np.sum(Y_pred == Y_valid)/len(Y_valid)\n", "print(f\"accuracy: {accuracy:.4f}\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "AI-Course", "language": "python", "name": "ai-course" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 4 }