QuickReference/public/posts/29139.html

249 lines
42 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!DOCTYPE html><html lang="zh-CN" data-theme="light"><head><meta charset="UTF-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0,viewport-fit=cover"><title>k近邻算法K-Nearest NeighborsKNN | QuickReference</title><meta name="author" content="shenjianZ"><meta name="copyright" content="shenjianZ"><meta name="format-detection" content="telephone=no"><meta name="theme-color" content="#ffffff"><meta name="description" content="k近邻算法K-Nearest NeighborsKNN将当前样本的类别归类于距离最近的k个样本的类别 距离公式(2维) 欧式距离$$d &#x3D; \sqrt{(x_1-y_1)^2 + (x_2 - y_2)^2}$$ 曼哈顿距离$$d &#x3D; |x_1 - x_2| + |y_1 - y_2|$$ 切比雪夫距离$$d &#x3D; \max\left(|x_1 - x_2|, |y">
<meta property="og:type" content="article">
<meta property="og:title" content="k近邻算法K-Nearest NeighborsKNN">
<meta property="og:url" content="https://rq.shenjianl.cn/posts/29139.html">
<meta property="og:site_name" content="QuickReference">
<meta property="og:description" content="k近邻算法K-Nearest NeighborsKNN将当前样本的类别归类于距离最近的k个样本的类别 距离公式(2维) 欧式距离$$d &#x3D; \sqrt{(x_1-y_1)^2 + (x_2 - y_2)^2}$$ 曼哈顿距离$$d &#x3D; |x_1 - x_2| + |y_1 - y_2|$$ 切比雪夫距离$$d &#x3D; \max\left(|x_1 - x_2|, |y">
<meta property="og:locale" content="zh_CN">
<meta property="og:image" content="https://rq.shenjianl.cn/img/avatar.jpg">
<meta property="article:published_time" content="2025-01-13T09:20:59.000Z">
<meta property="article:modified_time" content="2025-01-20T10:17:34.771Z">
<meta property="article:author" content="shenjianZ">
<meta property="article:tag" content="KNN">
<meta name="twitter:card" content="summary">
<meta name="twitter:image" content="https://rq.shenjianl.cn/img/avatar.jpg"><link rel="shortcut icon" href="/img/favicon.png"><link rel="canonical" href="https://rq.shenjianl.cn/posts/29139.html"><link rel="preconnect" href="//cdn.jsdelivr.net"/><link rel="preconnect" href="//busuanzi.ibruce.info"/><link rel="stylesheet" href="/css/index.css?v=4.13.0"><link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@6.5.1/css/all.min.css"><link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fancyapps/ui@5.0.33/dist/fancybox/fancybox.min.css" media="print" onload="this.media='all'"><script>const GLOBAL_CONFIG = {
root: '/',
algolia: undefined,
localSearch: {"path":"/search.xml","preload":false,"top_n_per_article":1,"unescape":true,"languages":{"hits_empty":"找不到您查询的内容:${query}","hits_stats":"共找到 ${hits} 篇文章"}},
translate: undefined,
noticeOutdate: undefined,
highlight: {"plugin":"highlight.js","highlightCopy":true,"highlightLang":true,"highlightHeightLimit":false},
copy: {
success: '复制成功',
error: '复制错误',
noSupport: '浏览器不支持'
},
relativeDate: {
homepage: false,
post: false
},
runtime: '天',
dateSuffix: {
just: '刚刚',
min: '分钟前',
hour: '小时前',
day: '天前',
month: '个月前'
},
copyright: undefined,
lightbox: 'fancybox',
Snackbar: undefined,
infinitegrid: {
js: 'https://cdn.jsdelivr.net/npm/@egjs/infinitegrid@4.11.1/dist/infinitegrid.min.js',
buttonText: '加载更多'
},
isPhotoFigcaption: false,
islazyload: false,
isAnchor: false,
percent: {
toc: true,
rightside: false,
},
autoDarkmode: false
}</script><script id="config-diff">var GLOBAL_CONFIG_SITE = {
title: 'k近邻算法K-Nearest NeighborsKNN',
isPost: true,
isHome: false,
isHighlightShrink: undefined,
isToc: true,
postUpdate: '2025-01-20 18:17:34'
}</script><script>(win=>{
win.saveToLocal = {
set: (key, value, ttl) => {
if (ttl === 0) return
const now = Date.now()
const expiry = now + ttl * 86400000
const item = {
value,
expiry
}
localStorage.setItem(key, JSON.stringify(item))
},
get: key => {
const itemStr = localStorage.getItem(key)
if (!itemStr) {
return undefined
}
const item = JSON.parse(itemStr)
const now = Date.now()
if (now > item.expiry) {
localStorage.removeItem(key)
return undefined
}
return item.value
}
}
win.getScript = (url, attr = {}) => new Promise((resolve, reject) => {
const script = document.createElement('script')
script.src = url
script.async = true
script.onerror = reject
script.onload = script.onreadystatechange = function() {
const loadState = this.readyState
if (loadState && loadState !== 'loaded' && loadState !== 'complete') return
script.onload = script.onreadystatechange = null
resolve()
}
Object.keys(attr).forEach(key => {
script.setAttribute(key, attr[key])
})
document.head.appendChild(script)
})
win.getCSS = (url, id = false) => new Promise((resolve, reject) => {
const link = document.createElement('link')
link.rel = 'stylesheet'
link.href = url
if (id) link.id = id
link.onerror = reject
link.onload = link.onreadystatechange = function() {
const loadState = this.readyState
if (loadState && loadState !== 'loaded' && loadState !== 'complete') return
link.onload = link.onreadystatechange = null
resolve()
}
document.head.appendChild(link)
})
win.activateDarkMode = () => {
document.documentElement.setAttribute('data-theme', 'dark')
if (document.querySelector('meta[name="theme-color"]') !== null) {
document.querySelector('meta[name="theme-color"]').setAttribute('content', '#0d0d0d')
}
}
win.activateLightMode = () => {
document.documentElement.setAttribute('data-theme', 'light')
if (document.querySelector('meta[name="theme-color"]') !== null) {
document.querySelector('meta[name="theme-color"]').setAttribute('content', '#ffffff')
}
}
const t = saveToLocal.get('theme')
if (t === 'dark') activateDarkMode()
else if (t === 'light') activateLightMode()
const asideStatus = saveToLocal.get('aside-status')
if (asideStatus !== undefined) {
if (asideStatus === 'hide') {
document.documentElement.classList.add('hide-aside')
} else {
document.documentElement.classList.remove('hide-aside')
}
}
const detectApple = () => {
if(/iPad|iPhone|iPod|Macintosh/.test(navigator.userAgent)){
document.documentElement.classList.add('apple')
}
}
detectApple()
})(window)</script><meta name="generator" content="Hexo 7.3.0"></head><body><div id="web_bg"></div><div id="sidebar"><div id="menu-mask"></div><div id="sidebar-menus"><div class="avatar-img is-center"><img src="/img/avatar.jpg" onerror="onerror=null;src='/img/friend_404.gif'" alt="avatar"/></div><div class="sidebar-site-data site-data is-center"><a href="/archives/"><div class="headline">文章</div><div class="length-num">17</div></a><a href="/tags/"><div class="headline">标签</div><div class="length-num">8</div></a><a href="/categories/"><div class="headline">分类</div><div class="length-num">2</div></a></div><hr class="custom-hr"/><div class="menus_items"><div class="menus_item"><a class="site-page" href="/"><i class="fa-fw fas fa-home"></i><span> Home</span></a></div><div class="menus_item"><a class="site-page" href="/archives/"><i class="fa-fw fas fa-archive"></i><span> Archives</span></a></div><div class="menus_item"><a class="site-page" href="/tags/"><i class="fa-fw fas fa-tags"></i><span> Tags</span></a></div><div class="menus_item"><a class="site-page" href="/categories/"><i class="fa-fw fas fa-folder-open"></i><span> Categories</span></a></div><div class="menus_item"><a class="site-page group" href="javascript:void(0);"><i class="fa-fw fas fa-list"></i><span> List</span><i class="fas fa-chevron-down"></i></a><ul class="menus_item_child"><li><a class="site-page child" href="/music/"><i class="fa-fw fas fa-music"></i><span> Music</span></a></li><li><a class="site-page child" href="/movies/"><i class="fa-fw fas fa-video"></i><span> Movie</span></a></li></ul></div><div class="menus_item"><a class="site-page" href="/link/"><i class="fa-fw fas fa-link"></i><span> Link</span></a></div><div class="menus_item"><a class="site-page" href="/about/"><i class="fa-fw fas fa-heart"></i><span> About</span></a></div></div></div></div><div class="post" id="body-wrap"><header class="post-bg fixed" id="page-header" style="background-image: url('/img/site01.jpg')"><nav id="nav"><span id="blog-info"><a href="/" title="QuickReference"><span class="site-name">QuickReference</span></a></span><div id="menus"><div id="search-button"><a class="site-page social-icon search" href="javascript:void(0);"><i class="fas fa-search fa-fw"></i><span> 搜索</span></a></div><div class="menus_items"><div class="menus_item"><a class="site-page" href="/"><i class="fa-fw fas fa-home"></i><span> Home</span></a></div><div class="menus_item"><a class="site-page" href="/archives/"><i class="fa-fw fas fa-archive"></i><span> Archives</span></a></div><div class="menus_item"><a class="site-page" href="/tags/"><i class="fa-fw fas fa-tags"></i><span> Tags</span></a></div><div class="menus_item"><a class="site-page" href="/categories/"><i class="fa-fw fas fa-folder-open"></i><span> Categories</span></a></div><div class="menus_item"><a class="site-page group" href="javascript:void(0);"><i class="fa-fw fas fa-list"></i><span> List</span><i class="fas fa-chevron-down"></i></a><ul class="menus_item_child"><li><a class="site-page child" href="/music/"><i class="fa-fw fas fa-music"></i><span> Music</span></a></li><li><a class="site-page child" href="/movies/"><i class="fa-fw fas fa-video"></i><span> Movie</span></a></li></ul></div><div class="menus_item"><a class="site-page" href="/link/"><i class="fa-fw fas fa-link"></i><span> Link</span></a></div><div class="menus_item"><a class="site-page" href="/about/"><i class="fa-fw fas fa-heart"></i><span> About</span></a></div></div><div id="toggle-menu"><a class="site-page" href="javascript:void(0);"><i class="fas fa-bars fa-fw"></i></a></div></div></nav><div id="post-info"><h1 class="post-title">k近邻算法K-Nearest NeighborsKNN</h1><div id="post-meta"><div class="meta-firstline"><span class="post-meta-date"><i class="far fa-calendar-alt fa-fw post-meta-icon"></i><span class="post-meta-label">发表于</span><time class="post-meta-date-created" datetime="2025-01-13T09:20:59.000Z" title="发表于 2025-01-13 17:20:59">2025-01-13</time><span class="post-meta-separator">|</span><i class="fas fa-history fa-fw post-meta-icon"></i><span class="post-meta-label">更新于</span><time class="post-meta-date-updated" datetime="2025-01-20T10:17:34.771Z" title="更新于 2025-01-20 18:17:34">2025-01-20</time></span><span class="post-meta-categories"><span class="post-meta-separator">|</span><i class="fas fa-inbox fa-fw post-meta-icon"></i><a class="post-meta-categories" href="/categories/machinelearning/">machinelearning</a></span></div><div class="meta-secondline"><span class="post-meta-separator">|</span><span class="post-meta-pv-cv" id="" data-flag-title="k近邻算法K-Nearest NeighborsKNN"><i class="far fa-eye fa-fw post-meta-icon"></i><span class="post-meta-label">阅读量:</span><span id="busuanzi_value_page_pv"><i class="fa-solid fa-spinner fa-spin"></i></span></span></div></div></div></header><main class="layout" id="content-inner"><div id="post"><article class="post-content" id="article-container"><h2 id="k近邻算法K-Nearest-NeighborsKNN"><a href="#k近邻算法K-Nearest-NeighborsKNN" class="headerlink" title="k近邻算法K-Nearest NeighborsKNN"></a><strong>k近邻算法K-Nearest NeighborsKNN</strong></h2><p>将当前样本的类别归类于距离最近的<strong>k</strong>个样本的类别</p>
<h4 id="距离公式-2维"><a href="#距离公式-2维" class="headerlink" title="距离公式(2维)"></a><strong>距离公式(2维)</strong></h4><ul>
<li>欧式距离<br>$$<br>d &#x3D; \sqrt{(x_1-y_1)^2 + (x_2 - y_2)^2}<br>$$</li>
<li>曼哈顿距离<br>$$<br>d &#x3D; |x_1 - x_2| + |y_1 - y_2|<br>$$</li>
<li>切比雪夫距离<br>$$<br>d &#x3D; \max\left(|x_1 - x_2|, |y_1 - y_2|\right)<br>$$</li>
</ul>
<h4 id="k值选择问题"><a href="#k值选择问题" class="headerlink" title="k值选择问题"></a>k值选择问题</h4><table>
<thead>
<tr>
<th>k值</th>
<th>影响</th>
</tr>
</thead>
<tbody><tr>
<td>越大</td>
<td>模型过拟合,准确率波动较大</td>
</tr>
<tr>
<td>越小</td>
<td>模型欠拟合,准确率趋于稳定但可能较低</td>
</tr>
</tbody></table>
<h3 id="特征预处理"><a href="#特征预处理" class="headerlink" title="特征预处理"></a>特征预处理</h3><blockquote>
<p>通过一些转换函数将特征数据转换成更加适合算法模型的特征数据过程 </p>
</blockquote>
<ul>
<li><p>归一化<br>将数据变换到指定区间(默认是[0,1]<br>$$ x &#x3D; \frac{x- x_{\text {min}}}{x_{\text{max}} - x_{\text{min}}} $$<br>若需要缩放到任意区间 ([a, b]),公式为: $$ x &#x3D; a + \frac{(x - x_{\text{min}}) \cdot (b - a)}{x_{\text{max}} - x_{\text{min}}} $$<br>其中:( [a, b] ):目标区间的范围<br>归一化受到数据集的异常值的影响,需要进行标准化处理(更加合理)</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">from</span> sklearn.preprocessing <span class="keyword">import</span> MinMaxScaler <span class="comment"># 归一化</span></span><br></pre></td></tr></table></figure></li>
<li><p>标准化<br>将数据调整为均值为 0标准差为 1 的标准正态分布<br>$$ z &#x3D; \frac{x - \mu}{\sigma} $$<br>( z ):标准化后的值 ( x ):原始数据值 ( $\mu$ ):数据的均值 ( $\sigma$):数据的标准差</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">from</span> sklearn.preprocessing <span class="keyword">import</span> StandardScaler <span class="comment"># 标准化</span></span><br></pre></td></tr></table></figure></li>
</ul>
<h3 id="KNN代码实现"><a href="#KNN代码实现" class="headerlink" title="KNN代码实现"></a>KNN代码实现</h3><figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">import</span> seaborn <span class="keyword">as</span> sns</span><br><span class="line"><span class="keyword">import</span> matplotlib.pyplot <span class="keyword">as</span> plt </span><br><span class="line"><span class="keyword">import</span> pandas <span class="keyword">as</span> pd</span><br><span class="line"><span class="keyword">from</span> sklearn.model_selection <span class="keyword">import</span> train_test_split</span><br><span class="line"><span class="keyword">from</span> sklearn.datasets <span class="keyword">import</span> load_iris</span><br><span class="line"><span class="keyword">from</span> sklearn.preprocessing <span class="keyword">import</span> MinMaxScaler,StandardScaler</span><br><span class="line"><span class="keyword">from</span> sklearn.neighbors <span class="keyword">import</span> KNeighborsClassifier</span><br><span class="line"><span class="keyword">from</span> sklearn.metrics <span class="keyword">import</span> accuracy_score</span><br><span class="line"></span><br><span class="line"><span class="comment"># 1 数据集获取</span></span><br><span class="line">iris = load_iris()</span><br><span class="line"><span class="comment"># print(iris.feature_names)</span></span><br><span class="line">iris_data = pd.DataFrame(iris.data,columns=[<span class="string">&#x27;Sepal_Length&#x27;</span>, <span class="string">&#x27;Sepal_Width&#x27;</span>, <span class="string">&#x27;Petal_Length&#x27;</span>, <span class="string">&#x27;Petal_Width&#x27;</span>])</span><br><span class="line">iris_data[<span class="string">&#x27;target&#x27;</span>] = iris.target</span><br><span class="line"></span><br><span class="line"><span class="keyword">def</span> <span class="title function_">iris_plot</span>(<span class="params">data,col1,col2</span>):</span><br><span class="line"> sns.lmplot(x=col1,y=col2,data=data,hue=<span class="string">&quot;target&quot;</span>,fit_reg=<span class="literal">False</span>)</span><br><span class="line"> plt.show()</span><br><span class="line"><span class="comment"># 2 数据集可视化</span></span><br><span class="line"><span class="comment"># iris_plot(iris_data, &#x27;Sepal_Width&#x27;, &#x27;Petal_Length&#x27;)</span></span><br><span class="line"></span><br><span class="line"><span class="comment"># 3 数据集的划分</span></span><br><span class="line">X_train,X_test,y_train,y_test = train_test_split(iris.data,iris.target,test_size=<span class="number">0.2</span>,random_state=<span class="number">44</span>)</span><br><span class="line"><span class="comment"># print(&quot;训练集的特征值:\n&quot;,X_train)</span></span><br><span class="line"><span class="comment"># print(&quot;训练集的目标值:\n&quot;,y_train)</span></span><br><span class="line"><span class="comment"># print(&quot;测试集的特征值:\n&quot;,X_test)</span></span><br><span class="line"><span class="comment"># print(&quot;测试集的特征值:\n&quot;,y_test)</span></span><br><span class="line"></span><br><span class="line"><span class="comment"># 4 归一化</span></span><br><span class="line">transfer = StandardScaler()</span><br><span class="line">X_train = transfer.fit_transform(X_train)</span><br><span class="line">X_test = transfer.transform(X_test)</span><br><span class="line"><span class="comment"># print(&quot;归一化的,X_train\n&quot;,X_train)</span></span><br><span class="line"><span class="comment"># print(&quot;归一化的X_test\n&quot;,X_test)</span></span><br><span class="line"></span><br><span class="line"><span class="comment"># 5 机器学习 KNN</span></span><br><span class="line"><span class="comment"># 5.1 实例化估计器</span></span><br><span class="line">estimator = KNeighborsClassifier(n_neighbors=<span class="number">9</span>)</span><br><span class="line"><span class="comment"># 5.2 进行训练</span></span><br><span class="line">estimator.fit(X_train,y_train)</span><br><span class="line"></span><br><span class="line"><span class="comment"># 6 模型评估</span></span><br><span class="line">y_pred = estimator.predict(X_test)</span><br><span class="line"><span class="built_in">print</span>(<span class="string">&quot;预测值:\n&quot;</span>,y_pre)</span><br><span class="line"><span class="built_in">print</span>(<span class="string">&quot;预测值与真实值是否相等:\n&quot;</span>,y_pred==y_test)</span><br><span class="line">accuracy = accuracy_score(y_test, y_pred)</span><br><span class="line"><span class="built_in">print</span>(<span class="string">f&quot;\nKNN 模型的准确率: <span class="subst">&#123;accuracy:<span class="number">.4</span>f&#125;</span>&quot;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="/img/machinelearning/knn-01.png"></p>
<h3 id="交叉验证与网格搜索"><a href="#交叉验证与网格搜索" class="headerlink" title="交叉验证与网格搜索"></a>交叉验证与网格搜索</h3><figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">import</span> seaborn <span class="keyword">as</span> sns</span><br><span class="line"><span class="keyword">import</span> matplotlib.pyplot <span class="keyword">as</span> plt </span><br><span class="line"><span class="keyword">import</span> pandas <span class="keyword">as</span> pd</span><br><span class="line"><span class="keyword">from</span> sklearn.model_selection <span class="keyword">import</span> train_test_split,GridSearchCV</span><br><span class="line"><span class="keyword">from</span> sklearn.datasets <span class="keyword">import</span> load_iris</span><br><span class="line"><span class="keyword">from</span> sklearn.preprocessing <span class="keyword">import</span> MinMaxScaler,StandardScaler</span><br><span class="line"><span class="keyword">from</span> sklearn.neighbors <span class="keyword">import</span> KNeighborsClassifier</span><br><span class="line"><span class="keyword">from</span> sklearn.metrics <span class="keyword">import</span> accuracy_score</span><br><span class="line"></span><br><span class="line"><span class="comment"># 1 数据集获取</span></span><br><span class="line">iris = load_iris()</span><br><span class="line">iris_data = pd.DataFrame(iris.data,columns=[<span class="string">&#x27;Sepal_Length&#x27;</span>, <span class="string">&#x27;Sepal_Width&#x27;</span>, <span class="string">&#x27;Petal_Length&#x27;</span>, <span class="string">&#x27;Petal_Width&#x27;</span>])</span><br><span class="line">iris_data[<span class="string">&#x27;target&#x27;</span>] = iris.target</span><br><span class="line"></span><br><span class="line"><span class="comment"># 3 数据集的划分</span></span><br><span class="line">X_train,X_test,y_train,y_test = train_test_split(iris.data,iris.target,test_size=<span class="number">0.2</span>)</span><br><span class="line"></span><br><span class="line"><span class="comment"># 4 归一化</span></span><br><span class="line">transfer = StandardScaler()</span><br><span class="line">X_train = transfer.fit_transform(X_train)</span><br><span class="line">X_test = transfer.transform(X_test)</span><br><span class="line"></span><br><span class="line"><span class="comment"># 5 机器学习 KNN</span></span><br><span class="line"><span class="comment"># 5.1 实例化估计器</span></span><br><span class="line"><span class="comment">#</span></span><br><span class="line"><span class="comment">#不指定 &lt;code&gt; n_neighbors &lt;/code&gt; ,使用网格搜索进行循环训练</span></span><br><span class="line">estimator = KNeighborsClassifier()</span><br><span class="line"><span class="comment"># 5.2 模型调优 -- 交叉验证,网格搜素</span></span><br><span class="line">estimator = GridSearchCV(estimator,param_grid=&#123;<span class="string">&quot;n_neighbors&quot;</span>:[<span class="number">1</span>,<span class="number">3</span>,<span class="number">5</span>,<span class="number">7</span>]&#125;,cv=<span class="number">5</span>) <span class="comment"># 5 折</span></span><br><span class="line"><span class="comment"># 5.2 进行训练</span></span><br><span class="line">estimator.fit(X_train,y_train)</span><br><span class="line"> </span><br><span class="line"><span class="comment"># 6 模型评估</span></span><br><span class="line">y_pred = estimator.predict(X_test)</span><br><span class="line"><span class="built_in">print</span>(<span class="string">&quot;预测值:\n&quot;</span>,y_pred)</span><br><span class="line"><span class="built_in">print</span>(<span class="string">&quot;预测值与真实值是否相等:\n&quot;</span>,y_pred==y_test)</span><br><span class="line">accuracy = accuracy_score(y_test, y_pred)</span><br><span class="line"><span class="built_in">print</span>(<span class="string">f&quot;\nKNN 模型的准确率: <span class="subst">&#123;accuracy:<span class="number">.4</span>f&#125;</span>&quot;</span>)</span><br><span class="line"></span><br><span class="line"><span class="comment"># 交叉验证的相关参数</span></span><br><span class="line"><span class="built_in">print</span>(<span class="string">f&quot;最好结果:<span class="subst">&#123;estimator.best_score_&#125;</span>&quot;</span>)</span><br><span class="line"><span class="built_in">print</span>(<span class="string">f&quot;最好模型:<span class="subst">&#123;estimator.best_estimator_&#125;</span>&quot;</span>)</span><br><span class="line"><span class="built_in">print</span>(<span class="string">f&quot;最好模型结果:<span class="subst">&#123;estimator.cv_results_&#125;</span>&quot;</span>)</span><br></pre></td></tr></table></figure>
<p><img src="/img/machinelearning/cros-valid.png"></p>
<h3 id="机器学习的基本步骤"><a href="#机器学习的基本步骤" class="headerlink" title="机器学习的基本步骤"></a>机器学习的基本步骤</h3><ul>
<li>获取数据集</li>
<li>数据集基本处理<ul>
<li>去重去空、填充等操作 </li>
<li>确定特征值和目标值</li>
<li>分割数据集</li>
</ul>
</li>
<li>特征工程(特征预处理 标准化等)</li>
<li>机器学习</li>
<li>模型评估</li>
</ul>
<h3 id="数据分割的方法"><a href="#数据分割的方法" class="headerlink" title="数据分割的方法"></a>数据分割的方法</h3><ul>
<li>留出法<br>训练&#x2F;测试集的划分要尽可能保持数据分布的一致性,避免因数据划分过程引入额外的偏差而对最终结果产生影响。<br>单次使用留出法得到的估计结果往往不够稳定可靠,在使用留出法时,一般要采用若干次随机划分、重复进行实验评估后取平均值作为留出法的评估结果。<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">from</span> sklearn.model_selection <span class="keyword">import</span> KFold,StratifiedKFold</span><br><span class="line"><span class="keyword">import</span> pandas <span class="keyword">as</span> pd</span><br><span class="line">X = np.array([</span><br><span class="line">[<span class="number">1</span>,<span class="number">2</span>,<span class="number">3</span>,<span class="number">4</span>],</span><br><span class="line">[<span class="number">11</span>,<span class="number">12</span>,<span class="number">13</span>,<span class="number">14</span>],</span><br><span class="line">[<span class="number">21</span>,<span class="number">22</span>,<span class="number">23</span>,<span class="number">24</span>],</span><br><span class="line">[<span class="number">31</span>,<span class="number">32</span>,<span class="number">33</span>,<span class="number">34</span>],</span><br><span class="line">[<span class="number">41</span>,<span class="number">42</span>,<span class="number">43</span>,<span class="number">44</span>],</span><br><span class="line">[<span class="number">51</span>,<span class="number">52</span>,<span class="number">53</span>,<span class="number">54</span>],</span><br><span class="line">[<span class="number">61</span>,<span class="number">62</span>,<span class="number">63</span>,<span class="number">64</span>],</span><br><span class="line">[<span class="number">71</span>,<span class="number">72</span>,<span class="number">73</span>,<span class="number">74</span>]</span><br><span class="line">])</span><br><span class="line">y=np.array([<span class="number">1</span>,<span class="number">1</span>,<span class="number">0</span>,<span class="number">0</span>,<span class="number">1</span>,<span class="number">1</span>,<span class="number">0</span>,<span class="number">0</span>])</span><br><span class="line">folder = KFold(n_splits=<span class="number">4</span>)</span><br><span class="line">sfloder = StratifiedKFold(n_splits=<span class="number">4</span>)</span><br><span class="line"><span class="built_in">print</span>(<span class="string">&quot;KFOLD:&quot;</span>)</span><br><span class="line"><span class="keyword">for</span> train,test <span class="keyword">in</span> folder.split(X,y):</span><br><span class="line"> <span class="built_in">print</span>(<span class="string">f&quot;train:<span class="subst">&#123;train&#125;</span>,test:<span class="subst">&#123;test&#125;</span>&quot;</span>)</span><br><span class="line"><span class="built_in">print</span>(<span class="string">&quot;SKFOLD:&quot;</span>)</span><br><span class="line"><span class="keyword">for</span> train,test <span class="keyword">in</span> sfloder.split(X,y):</span><br><span class="line"> <span class="built_in">print</span>(<span class="string">f&quot;train:<span class="subst">&#123;train&#125;</span>,test:<span class="subst">&#123;test&#125;</span>&quot;</span>)</span><br></pre></td></tr></table></figure>
<img src="/img/machinelearning/kfold-skfold.png"></li>
<li>自助法</li>
<li>交叉验证法</li>
</ul>
</article><div class="post-copyright"><div class="post-copyright__author"><span class="post-copyright-meta"><i class="fas fa-circle-user fa-fw"></i>文章作者: </span><span class="post-copyright-info"><a href="https://rq.shenjianl.cn">shenjianZ</a></span></div><div class="post-copyright__type"><span class="post-copyright-meta"><i class="fas fa-square-arrow-up-right fa-fw"></i>文章链接: </span><span class="post-copyright-info"><a href="https://rq.shenjianl.cn/posts/29139.html">https://rq.shenjianl.cn/posts/29139.html</a></span></div><div class="post-copyright__notice"><span class="post-copyright-meta"><i class="fas fa-circle-exclamation fa-fw"></i>版权声明: </span><span class="post-copyright-info">本博客所有文章除特别声明外,均采用 <a href="https://qr.shenjianl.cn/licenses/by-nc-sa/4.0/" target="_blank">CC BY-NC-SA 4.0</a> 许可协议。转载请注明来自 <a href="https://rq.shenjianl.cn" target="_blank">QuickReference</a></span></div></div><div class="tag_share"><div class="post-meta__tag-list"><a class="post-meta__tags" href="/tags/KNN/">KNN</a></div><div class="post_share"><div class="social-share" data-image="/img/avatar.jpg" data-sites="facebook,twitter,wechat,weibo,qq"></div><link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/butterfly-extsrc@1.1.3/sharejs/dist/css/share.min.css" media="print" onload="this.media='all'"><script src="https://cdn.jsdelivr.net/npm/butterfly-extsrc@1.1.3/sharejs/dist/js/social-share.min.js" defer></script></div></div><nav class="pagination-post" id="pagination"><div class="prev-post pull-left"><a href="/posts/12462.html" title="C lang"><div class="cover" style="background: var(--default-bg-color)"></div><div class="pagination-info"><div class="label">上一篇</div><div class="prev_info">C lang</div></div></a></div><div class="next-post pull-right"><a href="/posts/61253.html" title="Hadoop集群搭建基础环境"><div class="cover" style="background: var(--default-bg-color)"></div><div class="pagination-info"><div class="label">下一篇</div><div class="next_info">Hadoop集群搭建基础环境</div></div></a></div></nav></div><div class="aside-content" id="aside-content"><div class="card-widget card-info"><div class="is-center"><div class="avatar-img"><img src="/img/avatar.jpg" onerror="this.onerror=null;this.src='/img/friend_404.gif'" alt="avatar"/></div><div class="author-info__name">shenjianZ</div><div class="author-info__description">一份快捷简便的文档,便于查阅编程的细节</div></div><div class="card-info-data site-data is-center"><a href="/archives/"><div class="headline">文章</div><div class="length-num">17</div></a><a href="/tags/"><div class="headline">标签</div><div class="length-num">8</div></a><a href="/categories/"><div class="headline">分类</div><div class="length-num">2</div></a></div><a id="card-info-btn" target="_blank" rel="noopener" href="https://github.com/shenjianz"><i class="fab fa-github"></i><span>Follow Me</span></a><div class="card-info-social-icons is-center"><a class="social-icon" href="https://github.com/shenjianZ" target="_blank" title="Github"><i class="fab fa-github" style="color: #24292e;"></i></a><a class="social-icon" href="mailto:15202078626@163.com" target="_blank" title="Email"><i class="fas fa-envelope" style="color: #4a7dbe;"></i></a></div></div><div class="card-widget card-announcement"><div class="item-headline"><i class="fas fa-bullhorn fa-shake"></i><span>公告</span></div><div class="announcement_content">一个简单快捷的文档知识点查阅网站</div></div><div class="sticky_layout"><div class="card-widget" id="card-toc"><div class="item-headline"><i class="fas fa-stream"></i><span>目录</span><span class="toc-percentage"></span></div><div class="toc-content is-expand"><ol class="toc"><li class="toc-item toc-level-2"><a class="toc-link" href="#k%E8%BF%91%E9%82%BB%E7%AE%97%E6%B3%95%EF%BC%88K-Nearest-Neighbors%EF%BC%89KNN"><span class="toc-number">1.</span> <span class="toc-text">k近邻算法K-Nearest NeighborsKNN</span></a><ol class="toc-child"><li class="toc-item toc-level-4"><a class="toc-link" href="#%E8%B7%9D%E7%A6%BB%E5%85%AC%E5%BC%8F-2%E7%BB%B4"><span class="toc-number">1.0.1.</span> <span class="toc-text">距离公式(2维)</span></a></li><li class="toc-item toc-level-4"><a class="toc-link" href="#k%E5%80%BC%E9%80%89%E6%8B%A9%E9%97%AE%E9%A2%98"><span class="toc-number">1.0.2.</span> <span class="toc-text">k值选择问题</span></a></li></ol></li><li class="toc-item toc-level-3"><a class="toc-link" href="#%E7%89%B9%E5%BE%81%E9%A2%84%E5%A4%84%E7%90%86"><span class="toc-number">1.1.</span> <span class="toc-text">特征预处理</span></a></li><li class="toc-item toc-level-3"><a class="toc-link" href="#KNN%E4%BB%A3%E7%A0%81%E5%AE%9E%E7%8E%B0"><span class="toc-number">1.2.</span> <span class="toc-text">KNN代码实现</span></a></li><li class="toc-item toc-level-3"><a class="toc-link" href="#%E4%BA%A4%E5%8F%89%E9%AA%8C%E8%AF%81%E4%B8%8E%E7%BD%91%E6%A0%BC%E6%90%9C%E7%B4%A2"><span class="toc-number">1.3.</span> <span class="toc-text">交叉验证与网格搜索</span></a></li><li class="toc-item toc-level-3"><a class="toc-link" href="#%E6%9C%BA%E5%99%A8%E5%AD%A6%E4%B9%A0%E7%9A%84%E5%9F%BA%E6%9C%AC%E6%AD%A5%E9%AA%A4"><span class="toc-number">1.4.</span> <span class="toc-text">机器学习的基本步骤</span></a></li><li class="toc-item toc-level-3"><a class="toc-link" href="#%E6%95%B0%E6%8D%AE%E5%88%86%E5%89%B2%E7%9A%84%E6%96%B9%E6%B3%95"><span class="toc-number">1.5.</span> <span class="toc-text">数据分割的方法</span></a></li></ol></li></ol></div></div><div class="card-widget card-recent-post"><div class="item-headline"><i class="fas fa-history"></i><span>最新文章</span></div><div class="aside-list"><div class="aside-list-item no-cover"><div class="content"><a class="title" href="/posts/95.html" title="决策树算法">决策树算法</a><time datetime="2025-01-24T04:39:59.000Z" title="发表于 2025-01-24 12:39:59">2025-01-24</time></div></div><div class="aside-list-item no-cover"><div class="content"><a class="title" href="/posts/60504.html" title="逻辑回归">逻辑回归</a><time datetime="2025-01-20T07:30:08.000Z" title="发表于 2025-01-20 15:30:08">2025-01-20</time></div></div><div class="aside-list-item no-cover"><div class="content"><a class="title" href="/posts/52662.html" title="线性回归">线性回归</a><time datetime="2025-01-19T08:46:51.000Z" title="发表于 2025-01-19 16:46:51">2025-01-19</time></div></div><div class="aside-list-item no-cover"><div class="content"><a class="title" href="/posts/12462.html" title="C lang">C lang</a><time datetime="2025-01-15T12:41:26.000Z" title="发表于 2025-01-15 20:41:26">2025-01-15</time></div></div><div class="aside-list-item no-cover"><div class="content"><a class="title" href="/posts/29139.html" title="k近邻算法K-Nearest NeighborsKNN">k近邻算法K-Nearest NeighborsKNN</a><time datetime="2025-01-13T09:20:59.000Z" title="发表于 2025-01-13 17:20:59">2025-01-13</time></div></div></div></div></div></div></main><footer id="footer" style="background: transparent"><div id="footer-wrap"><div class="copyright">&copy;2024 - 2025 By shenjianZ</div><div class="framework-info"><span>框架 </span><a target="_blank" rel="noopener" href="https://hexo.io">Hexo</a><span class="footer-separator">|</span><span>主题 </span><a target="_blank" rel="noopener" href="https://github.com/jerryc127/hexo-theme-butterfly">Butterfly</a></div><div class="footer_custom_text"><span>备案号豫ICP备2023019300号</span></div></div></footer></div><div id="rightside"><div id="rightside-config-hide"><button id="readmode" type="button" title="阅读模式"><i class="fas fa-book-open"></i></button><button id="darkmode" type="button" title="浅色和深色模式转换"><i class="fas fa-adjust"></i></button><button id="hide-aside-btn" type="button" title="单栏和双栏切换"><i class="fas fa-arrows-alt-h"></i></button></div><div id="rightside-config-show"><button id="rightside-config" type="button" title="设置"><i class="fas fa-cog fa-spin"></i></button><button class="close" id="mobile-toc-button" type="button" title="目录"><i class="fas fa-list-ul"></i></button><button id="go-up" type="button" title="回到顶部"><span class="scroll-percent"></span><i class="fas fa-arrow-up"></i></button></div></div><div><script src="/js/utils.js?v=4.13.0"></script><script src="/js/main.js?v=4.13.0"></script><script src="https://cdn.jsdelivr.net/npm/@fancyapps/ui@5.0.33/dist/fancybox/fancybox.umd.min.js"></script><div class="js-pjax"><script>if (!window.MathJax) {
window.MathJax = {
tex: {
inlineMath: [['$', '$'], ['\\(', '\\)']],
tags: 'ams'
},
chtml: {
scale: 1.1
},
options: {
renderActions: {
findScript: [10, doc => {
for (const node of document.querySelectorAll('script[type^="math/tex"]')) {
const display = !!node.type.match(/; *mode=display/)
const math = new doc.options.MathItem(node.textContent, doc.inputJax[0], display)
const text = document.createTextNode('')
node.parentNode.replaceChild(text, node)
math.start = {node: text, delim: '', n: 0}
math.end = {node: text, delim: '', n: 0}
doc.math.push(math)
}
}, '']
}
}
}
const script = document.createElement('script')
script.src = 'https://cdn.jsdelivr.net/npm/mathjax@3.2.2/es5/tex-mml-chtml.min.js'
script.id = 'MathJax-script'
script.async = true
document.head.appendChild(script)
} else {
MathJax.startup.document.state(0)
MathJax.texReset()
MathJax.typesetPromise()
}</script></div><script src="https://cdn.jsdelivr.net/npm/butterfly-extsrc@1.1.3/dist/activate-power-mode.min.js"></script><script>POWERMODE.colorful = true;
POWERMODE.shake = true;
POWERMODE.mobile = false;
document.body.addEventListener('input', POWERMODE);
</script><script async data-pjax src="//busuanzi.ibruce.info/busuanzi/2.3/busuanzi.pure.mini.js"></script><div id="local-search"><div class="search-dialog"><nav class="search-nav"><span class="search-dialog-title">搜索</span><span id="loading-status"></span><button class="search-close-button"><i class="fas fa-times"></i></button></nav><div class="is-center" id="loading-database"><i class="fas fa-spinner fa-pulse"></i><span> 数据库加载中</span></div><div class="search-wrap"><div id="local-search-input"><div class="local-search-box"><input class="local-search-box--input" placeholder="搜索文章" type="text"/></div></div><hr/><div id="local-search-results"></div><div id="local-search-stats-wrap"></div></div></div><div id="search-mask"></div><script src="/js/search/local-search.js?v=4.13.0"></script></div></div></body></html>