167 lines
36 KiB
HTML
167 lines
36 KiB
HTML
<!DOCTYPE html><html lang="zh-CN" data-theme="light"><head><meta charset="UTF-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0,viewport-fit=cover"><title>集成学习 | QuickReference</title><meta name="author" content="shenjianZ"><meta name="copyright" content="shenjianZ"><meta name="format-detection" content="telephone=no"><meta name="theme-color" content="#ffffff"><meta name="description" content="Bagging随机森林 Random-Forest 就是Bagging + Decisiontree 1234567891011121314151617181920212223242526272829303132333435import seaborn as snsimport pandas as pdimport numpy as npfrom sklearn.model_selection">
|
||
<meta property="og:type" content="article">
|
||
<meta property="og:title" content="集成学习">
|
||
<meta property="og:url" content="https://rq.shenjianl.cn/posts/8816.html">
|
||
<meta property="og:site_name" content="QuickReference">
|
||
<meta property="og:description" content="Bagging随机森林 Random-Forest 就是Bagging + Decisiontree 1234567891011121314151617181920212223242526272829303132333435import seaborn as snsimport pandas as pdimport numpy as npfrom sklearn.model_selection">
|
||
<meta property="og:locale" content="zh_CN">
|
||
<meta property="og:image" content="https://th.bing.com/th/id/OIP.SZA5W6cF-tYiiZ08KZ7l7wHaEm?w=250&h=180&c=7&r=0&o=5&dpr=1.3&pid=1.7">
|
||
<meta property="article:published_time" content="2025-01-25T07:12:08.000Z">
|
||
<meta property="article:modified_time" content="2025-01-25T11:29:19.724Z">
|
||
<meta property="article:author" content="shenjianZ">
|
||
<meta property="article:tag" content="ensemble-learning">
|
||
<meta name="twitter:card" content="summary">
|
||
<meta name="twitter:image" content="https://th.bing.com/th/id/OIP.SZA5W6cF-tYiiZ08KZ7l7wHaEm?w=250&h=180&c=7&r=0&o=5&dpr=1.3&pid=1.7"><link rel="shortcut icon" href="/img/favicon.png"><link rel="canonical" href="https://rq.shenjianl.cn/posts/8816.html"><link rel="preconnect" href="//cdn.jsdelivr.net"/><link rel="preconnect" href="//busuanzi.ibruce.info"/><link rel="stylesheet" href="/css/index.css?v=4.13.0"><link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@6.5.1/css/all.min.css"><link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fancyapps/ui@5.0.33/dist/fancybox/fancybox.min.css" media="print" onload="this.media='all'"><script>const GLOBAL_CONFIG = {
|
||
root: '/',
|
||
algolia: undefined,
|
||
localSearch: {"path":"/search.xml","preload":false,"top_n_per_article":1,"unescape":true,"languages":{"hits_empty":"找不到您查询的内容:${query}","hits_stats":"共找到 ${hits} 篇文章"}},
|
||
translate: undefined,
|
||
noticeOutdate: undefined,
|
||
highlight: {"plugin":"highlight.js","highlightCopy":true,"highlightLang":true,"highlightHeightLimit":false},
|
||
copy: {
|
||
success: '复制成功',
|
||
error: '复制错误',
|
||
noSupport: '浏览器不支持'
|
||
},
|
||
relativeDate: {
|
||
homepage: false,
|
||
post: false
|
||
},
|
||
runtime: '天',
|
||
dateSuffix: {
|
||
just: '刚刚',
|
||
min: '分钟前',
|
||
hour: '小时前',
|
||
day: '天前',
|
||
month: '个月前'
|
||
},
|
||
copyright: undefined,
|
||
lightbox: 'fancybox',
|
||
Snackbar: undefined,
|
||
infinitegrid: {
|
||
js: 'https://cdn.jsdelivr.net/npm/@egjs/infinitegrid@4.11.1/dist/infinitegrid.min.js',
|
||
buttonText: '加载更多'
|
||
},
|
||
isPhotoFigcaption: false,
|
||
islazyload: false,
|
||
isAnchor: false,
|
||
percent: {
|
||
toc: true,
|
||
rightside: false,
|
||
},
|
||
autoDarkmode: false
|
||
}</script><script id="config-diff">var GLOBAL_CONFIG_SITE = {
|
||
title: '集成学习',
|
||
isPost: true,
|
||
isHome: false,
|
||
isHighlightShrink: undefined,
|
||
isToc: true,
|
||
postUpdate: '2025-01-25 19:29:19'
|
||
}</script><script>(win=>{
|
||
win.saveToLocal = {
|
||
set: (key, value, ttl) => {
|
||
if (ttl === 0) return
|
||
const now = Date.now()
|
||
const expiry = now + ttl * 86400000
|
||
const item = {
|
||
value,
|
||
expiry
|
||
}
|
||
localStorage.setItem(key, JSON.stringify(item))
|
||
},
|
||
|
||
get: key => {
|
||
const itemStr = localStorage.getItem(key)
|
||
|
||
if (!itemStr) {
|
||
return undefined
|
||
}
|
||
const item = JSON.parse(itemStr)
|
||
const now = Date.now()
|
||
|
||
if (now > item.expiry) {
|
||
localStorage.removeItem(key)
|
||
return undefined
|
||
}
|
||
return item.value
|
||
}
|
||
}
|
||
|
||
win.getScript = (url, attr = {}) => new Promise((resolve, reject) => {
|
||
const script = document.createElement('script')
|
||
script.src = url
|
||
script.async = true
|
||
script.onerror = reject
|
||
script.onload = script.onreadystatechange = function() {
|
||
const loadState = this.readyState
|
||
if (loadState && loadState !== 'loaded' && loadState !== 'complete') return
|
||
script.onload = script.onreadystatechange = null
|
||
resolve()
|
||
}
|
||
|
||
Object.keys(attr).forEach(key => {
|
||
script.setAttribute(key, attr[key])
|
||
})
|
||
|
||
document.head.appendChild(script)
|
||
})
|
||
|
||
win.getCSS = (url, id = false) => new Promise((resolve, reject) => {
|
||
const link = document.createElement('link')
|
||
link.rel = 'stylesheet'
|
||
link.href = url
|
||
if (id) link.id = id
|
||
link.onerror = reject
|
||
link.onload = link.onreadystatechange = function() {
|
||
const loadState = this.readyState
|
||
if (loadState && loadState !== 'loaded' && loadState !== 'complete') return
|
||
link.onload = link.onreadystatechange = null
|
||
resolve()
|
||
}
|
||
document.head.appendChild(link)
|
||
})
|
||
|
||
win.activateDarkMode = () => {
|
||
document.documentElement.setAttribute('data-theme', 'dark')
|
||
if (document.querySelector('meta[name="theme-color"]') !== null) {
|
||
document.querySelector('meta[name="theme-color"]').setAttribute('content', '#0d0d0d')
|
||
}
|
||
}
|
||
win.activateLightMode = () => {
|
||
document.documentElement.setAttribute('data-theme', 'light')
|
||
if (document.querySelector('meta[name="theme-color"]') !== null) {
|
||
document.querySelector('meta[name="theme-color"]').setAttribute('content', '#ffffff')
|
||
}
|
||
}
|
||
const t = saveToLocal.get('theme')
|
||
|
||
if (t === 'dark') activateDarkMode()
|
||
else if (t === 'light') activateLightMode()
|
||
|
||
const asideStatus = saveToLocal.get('aside-status')
|
||
if (asideStatus !== undefined) {
|
||
if (asideStatus === 'hide') {
|
||
document.documentElement.classList.add('hide-aside')
|
||
} else {
|
||
document.documentElement.classList.remove('hide-aside')
|
||
}
|
||
}
|
||
|
||
const detectApple = () => {
|
||
if(/iPad|iPhone|iPod|Macintosh/.test(navigator.userAgent)){
|
||
document.documentElement.classList.add('apple')
|
||
}
|
||
}
|
||
detectApple()
|
||
})(window)</script><meta name="generator" content="Hexo 7.3.0"></head><body><div id="web_bg"></div><div id="sidebar"><div id="menu-mask"></div><div id="sidebar-menus"><div class="avatar-img is-center"><img src="/img/avatar.jpg" onerror="onerror=null;src='/img/friend_404.gif'" alt="avatar"/></div><div class="sidebar-site-data site-data is-center"><a href="/archives/"><div class="headline">文章</div><div class="length-num">18</div></a><a href="/tags/"><div class="headline">标签</div><div class="length-num">9</div></a><a href="/categories/"><div class="headline">分类</div><div class="length-num">2</div></a></div><hr class="custom-hr"/><div class="menus_items"><div class="menus_item"><a class="site-page" href="/"><i class="fa-fw fas fa-home"></i><span> Home</span></a></div><div class="menus_item"><a class="site-page" href="/archives/"><i class="fa-fw fas fa-archive"></i><span> Archives</span></a></div><div class="menus_item"><a class="site-page" href="/tags/"><i class="fa-fw fas fa-tags"></i><span> Tags</span></a></div><div class="menus_item"><a class="site-page" href="/categories/"><i class="fa-fw fas fa-folder-open"></i><span> Categories</span></a></div><div class="menus_item"><a class="site-page group" href="javascript:void(0);"><i class="fa-fw fas fa-list"></i><span> List</span><i class="fas fa-chevron-down"></i></a><ul class="menus_item_child"><li><a class="site-page child" href="/music/"><i class="fa-fw fas fa-music"></i><span> Music</span></a></li><li><a class="site-page child" href="/movies/"><i class="fa-fw fas fa-video"></i><span> Movie</span></a></li></ul></div><div class="menus_item"><a class="site-page" href="/link/"><i class="fa-fw fas fa-link"></i><span> Link</span></a></div><div class="menus_item"><a class="site-page" href="/about/"><i class="fa-fw fas fa-heart"></i><span> About</span></a></div></div></div></div><div class="post" id="body-wrap"><header class="post-bg fixed" id="page-header" style="background-image: url('https://th.bing.com/th/id/OIP.SZA5W6cF-tYiiZ08KZ7l7wHaEm?w=250&h=180&c=7&r=0&o=5&dpr=1.3&pid=1.7')"><nav id="nav"><span id="blog-info"><a href="/" title="QuickReference"><span class="site-name">QuickReference</span></a></span><div id="menus"><div id="search-button"><a class="site-page social-icon search" href="javascript:void(0);"><i class="fas fa-search fa-fw"></i><span> 搜索</span></a></div><div class="menus_items"><div class="menus_item"><a class="site-page" href="/"><i class="fa-fw fas fa-home"></i><span> Home</span></a></div><div class="menus_item"><a class="site-page" href="/archives/"><i class="fa-fw fas fa-archive"></i><span> Archives</span></a></div><div class="menus_item"><a class="site-page" href="/tags/"><i class="fa-fw fas fa-tags"></i><span> Tags</span></a></div><div class="menus_item"><a class="site-page" href="/categories/"><i class="fa-fw fas fa-folder-open"></i><span> Categories</span></a></div><div class="menus_item"><a class="site-page group" href="javascript:void(0);"><i class="fa-fw fas fa-list"></i><span> List</span><i class="fas fa-chevron-down"></i></a><ul class="menus_item_child"><li><a class="site-page child" href="/music/"><i class="fa-fw fas fa-music"></i><span> Music</span></a></li><li><a class="site-page child" href="/movies/"><i class="fa-fw fas fa-video"></i><span> Movie</span></a></li></ul></div><div class="menus_item"><a class="site-page" href="/link/"><i class="fa-fw fas fa-link"></i><span> Link</span></a></div><div class="menus_item"><a class="site-page" href="/about/"><i class="fa-fw fas fa-heart"></i><span> About</span></a></div></div><div id="toggle-menu"><a class="site-page" href="javascript:void(0);"><i class="fas fa-bars fa-fw"></i></a></div></div></nav><div id="post-info"><h1 class="post-title">集成学习</h1><div id="post-meta"><div class="meta-firstline"><span class="post-meta-date"><i class="far fa-calendar-alt fa-fw post-meta-icon"></i><span class="post-meta-label">发表于</span><time class="post-meta-date-created" datetime="2025-01-25T07:12:08.000Z" title="发表于 2025-01-25 15:12:08">2025-01-25</time><span class="post-meta-separator">|</span><i class="fas fa-history fa-fw post-meta-icon"></i><span class="post-meta-label">更新于</span><time class="post-meta-date-updated" datetime="2025-01-25T11:29:19.724Z" title="更新于 2025-01-25 19:29:19">2025-01-25</time></span><span class="post-meta-categories"><span class="post-meta-separator">|</span><i class="fas fa-inbox fa-fw post-meta-icon"></i><a class="post-meta-categories" href="/categories/machinelearning/">machinelearning</a></span></div><div class="meta-secondline"><span class="post-meta-separator">|</span><span class="post-meta-pv-cv" id="" data-flag-title="集成学习"><i class="far fa-eye fa-fw post-meta-icon"></i><span class="post-meta-label">阅读量:</span><span id="busuanzi_value_page_pv"><i class="fa-solid fa-spinner fa-spin"></i></span></span></div></div></div></header><main class="layout" id="content-inner"><div id="post"><article class="post-content" id="article-container"><h3 id="Bagging"><a href="#Bagging" class="headerlink" title="Bagging"></a>Bagging</h3><h3 id="随机森林"><a href="#随机森林" class="headerlink" title="随机森林"></a>随机森林</h3><blockquote>
|
||
<p><code>Random-Forest</code> 就是<code>Bagging + Decisiontree</code></p>
|
||
</blockquote>
|
||
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">import</span> seaborn <span class="keyword">as</span> sns</span><br><span class="line"><span class="keyword">import</span> pandas <span class="keyword">as</span> pd</span><br><span class="line"><span class="keyword">import</span> numpy <span class="keyword">as</span> np</span><br><span class="line"><span class="keyword">from</span> sklearn.model_selection <span class="keyword">import</span> train_test_split,GridSearchCV</span><br><span class="line"><span class="keyword">from</span> sklearn.feature_extraction <span class="keyword">import</span> DictVectorizer</span><br><span class="line"><span class="keyword">from</span> sklearn.ensemble <span class="keyword">import</span> RandomForestClassifier</span><br><span class="line"><span class="comment"># 1.获取数据集 - 加载 Titanic 数据集</span></span><br><span class="line">titanic = sns.load_dataset(<span class="string">'titanic'</span>)</span><br><span class="line">missing_age_count = titanic[<span class="string">'age'</span>].isna().<span class="built_in">sum</span>()</span><br><span class="line"><span class="comment"># print(f"缺失的 age 数量: {missing_age_count}")</span></span><br><span class="line"><span class="comment"># 2. 数据基本处理</span></span><br><span class="line"><span class="comment"># 2.1 确认特征值、目标值</span></span><br><span class="line">X = titanic[[<span class="string">'pclass'</span>,<span class="string">'age'</span>,<span class="string">'sex'</span>]]</span><br><span class="line">y = titanic[<span class="string">'survived'</span>]</span><br><span class="line"><span class="comment"># 2.2 缺失值处理</span></span><br><span class="line">X.loc[:, <span class="string">'age'</span>] = X[<span class="string">'age'</span>].fillna(value=X[<span class="string">'age'</span>].mean()) <span class="comment"># 使用 .loc 进行修改</span></span><br><span class="line"><span class="comment"># 2.3 划分数据集</span></span><br><span class="line">X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=<span class="number">22</span>)</span><br><span class="line"><span class="comment"># 3. 特征工程(字典特征提取)</span></span><br><span class="line">X_train = X_train.to_dict(orient=<span class="string">"records"</span>)</span><br><span class="line">X_test= X_test.to_dict(orient=<span class="string">"records"</span>)</span><br><span class="line">transfer = DictVectorizer()</span><br><span class="line">X_train = transfer.fit_transform(X_train)</span><br><span class="line">X_test = transfer.transform(X_test)</span><br><span class="line"><span class="comment"># 4. 机器学习 随机森林</span></span><br><span class="line">rf = RandomForestClassifier()</span><br><span class="line">gc = GridSearchCV(estimator=rf ,param_grid={<span class="string">"n_estimators"</span>:[<span class="number">100</span>,<span class="number">120</span>,<span class="number">300</span>],<span class="string">"max_depth"</span>:[<span class="number">3</span>,<span class="number">7</span>,<span class="number">11</span>]},cv=<span class="number">3</span>)</span><br><span class="line">gc.fit(X_train,y_train)</span><br><span class="line">y_pred = gc.predict(X_test)</span><br><span class="line"><span class="built_in">print</span>(<span class="string">f"模型的测试集的预测值:<span class="subst">{y_pred}</span>"</span>)</span><br><span class="line">ret = gc.score(X_test,y_test)</span><br><span class="line"><span class="built_in">print</span>(<span class="string">f"最佳模型在测试集上的评分:<span class="subst">{ret}</span>"</span>)</span><br><span class="line"><span class="built_in">print</span>(<span class="string">f"最佳模型的参数:<span class="subst">{gc.best_estimator_}</span>"</span>)</span><br><span class="line"><span class="built_in">print</span>(<span class="string">f"最佳模型在训练集上的评分:<span class="subst">{gc.best_score_}</span>"</span>)</span><br><span class="line"><span class="built_in">print</span>(X_test.toarray())</span><br></pre></td></tr></table></figure>
|
||
<p><img src="/img/machinelearning/random-forest.png"></p>
|
||
<h3 id="ott案例"><a href="#ott案例" class="headerlink" title="ott案例"></a>ott案例</h3><figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br><span class="line">48</span><br><span class="line">49</span><br><span class="line">50</span><br><span class="line">51</span><br><span class="line">52</span><br><span class="line">53</span><br><span class="line">54</span><br><span class="line">55</span><br><span class="line">56</span><br><span class="line">57</span><br><span class="line">58</span><br><span class="line">59</span><br><span class="line">60</span><br><span class="line">61</span><br><span class="line">62</span><br><span class="line">63</span><br><span class="line">64</span><br><span class="line">65</span><br><span class="line">66</span><br><span class="line">67</span><br><span class="line">68</span><br><span class="line">69</span><br><span class="line">70</span><br><span class="line">71</span><br><span class="line">72</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">import</span> pandas <span class="keyword">as</span> pd</span><br><span class="line"><span class="keyword">import</span> numpy <span class="keyword">as</span> np</span><br><span class="line"><span class="keyword">import</span> matplotlib.pyplot <span class="keyword">as</span> plt</span><br><span class="line"><span class="keyword">from</span> imblearn.under_sampling <span class="keyword">import</span> RandomUnderSampler</span><br><span class="line"><span class="keyword">from</span> sklearn.model_selection <span class="keyword">import</span> train_test_split</span><br><span class="line"><span class="keyword">from</span> sklearn.preprocessing <span class="keyword">import</span> LabelEncoder</span><br><span class="line"><span class="keyword">from</span> sklearn.ensemble <span class="keyword">import</span> RandomForestClassifier</span><br><span class="line"><span class="keyword">from</span> sklearn.metrics <span class="keyword">import</span> log_loss</span><br><span class="line"><span class="keyword">from</span> sklearn.preprocessing <span class="keyword">import</span> OneHotEncoder</span><br><span class="line"><span class="comment"># 1. 获取数据集</span></span><br><span class="line">data = pd.read_csv(<span class="string">'./data/train.csv'</span>)</span><br><span class="line"><span class="comment"># 查看目标值分类</span></span><br><span class="line"><span class="keyword">import</span> seaborn <span class="keyword">as</span> sns</span><br><span class="line">sns.countplot(data=data, x=<span class="string">'target'</span>, hue=<span class="string">'target'</span>, palette=<span class="string">"Set2"</span>, legend=<span class="literal">False</span>) <span class="comment"># 使用 hue='target' 替代 palette</span></span><br><span class="line">plt.show()</span><br><span class="line"></span><br><span class="line"><span class="comment"># 2. 数据集的基本处理</span></span><br><span class="line"><span class="comment"># 2.1 确定特征值、目标值</span></span><br><span class="line">x = data.drop([<span class="string">"id"</span>, <span class="string">"target"</span>], axis=<span class="number">1</span>)</span><br><span class="line">y = data[<span class="string">'target'</span>]</span><br><span class="line"></span><br><span class="line"><span class="comment"># 2.2 使用随机欠采样进行平衡</span></span><br><span class="line">undersampler = RandomUnderSampler(sampling_strategy=<span class="string">'auto'</span>, random_state=<span class="number">0</span>)</span><br><span class="line">x_resampled, y_resampled = undersampler.fit_resample(x, y)</span><br><span class="line"></span><br><span class="line"><span class="comment"># 查看欠采样后的类别分布</span></span><br><span class="line"><span class="comment"># print(f"欠采样后训练集中的类别分布:\n{y_train_resampled.value_counts()}")</span></span><br><span class="line"></span><br><span class="line"><span class="comment"># 2.3. 将标签转换为数字</span></span><br><span class="line">le = LabelEncoder()</span><br><span class="line">y_resampled = le.fit_transform(y_resampled)</span><br><span class="line"></span><br><span class="line"><span class="comment"># 2.4. 划分训练集和测试集</span></span><br><span class="line">x_train, x_test, y_train, y_test = train_test_split(x_resampled, y_resampled, test_size=<span class="number">0.2</span>)</span><br><span class="line"></span><br><span class="line"> <span class="comment"># 3. 机器学习</span></span><br><span class="line">rf = RandomForestClassifier(oob_score = <span class="literal">True</span>)</span><br><span class="line">rf.fit(x_train,y_train)</span><br><span class="line">y_pred = rf.predict(x_test)</span><br><span class="line"><span class="built_in">print</span>(<span class="string">f"预测值:<span class="subst">{y_pred}</span>"</span>)</span><br><span class="line"><span class="built_in">print</span>(<span class="string">f"评分:<span class="subst">{rf.score(x_test,y_test)}</span>"</span>)</span><br><span class="line"></span><br><span class="line"><span class="comment"># # 4. 模型评估 (解决二分类预测问题)</span></span><br><span class="line"><span class="comment"># import numpy as np</span></span><br><span class="line"><span class="comment"># from sklearn.metrics import log_loss</span></span><br><span class="line"><span class="comment"># # 假设 y_pred_prob 是通过 predict_proba 得到的预测概率</span></span><br><span class="line"><span class="comment"># # 对预测概率进行裁剪,将其限制在 [eps, 1-eps] 范围内</span></span><br><span class="line"><span class="comment"># eps = 1e-15 # 设置一个小的eps值,避免极端值</span></span><br><span class="line"><span class="comment"># y_pred_prob = rf.predict_proba(x_test)</span></span><br><span class="line"><span class="comment"># y_pred_prob = np.clip(y_pred_prob, eps, 1 - eps)</span></span><br><span class="line"></span><br><span class="line"><span class="comment"># # 计算 log_loss</span></span><br><span class="line"><span class="comment"># loss = log_loss(y_test, y_pred_prob, normalize=True)</span></span><br><span class="line"><span class="comment"># print(f"Log Loss: {loss}")</span></span><br><span class="line"></span><br><span class="line"><span class="comment"># 4. 模型评估 (解决多分类预测问题)</span></span><br><span class="line"></span><br><span class="line"><span class="comment"># 获取预测的概率</span></span><br><span class="line">y_pred_prob = rf.predict_proba(x_test)</span><br><span class="line"></span><br><span class="line"><span class="comment"># 使用 OneHotEncoder 对 y_test 进行 One-Hot 编码</span></span><br><span class="line">encoder = OneHotEncoder(sparse_output=<span class="literal">False</span>) <span class="comment"># 确保返回的是密集矩阵</span></span><br><span class="line">y_test_one_hot = encoder.fit_transform(y_test.reshape(-<span class="number">1</span>, <span class="number">1</span>))</span><br><span class="line"></span><br><span class="line"><span class="comment"># 对预测概率进行裁剪,将其限制在 [eps, 1-eps] 范围内</span></span><br><span class="line">eps = <span class="number">1e-15</span></span><br><span class="line">y_pred_prob = np.clip(y_pred_prob, eps, <span class="number">1</span> - eps)</span><br><span class="line"></span><br><span class="line"><span class="comment"># 计算 log_loss</span></span><br><span class="line">loss = log_loss(y_test_one_hot, y_pred_prob, normalize=<span class="literal">True</span>)</span><br><span class="line"><span class="built_in">print</span>(<span class="string">f"Log Loss: <span class="subst">{loss}</span>"</span>)</span><br><span class="line"></span><br></pre></td></tr></table></figure>
|
||
<p><img src="/img/machinelearning/ott.png"></p>
|
||
</article><div class="post-copyright"><div class="post-copyright__author"><span class="post-copyright-meta"><i class="fas fa-circle-user fa-fw"></i>文章作者: </span><span class="post-copyright-info"><a href="https://rq.shenjianl.cn">shenjianZ</a></span></div><div class="post-copyright__type"><span class="post-copyright-meta"><i class="fas fa-square-arrow-up-right fa-fw"></i>文章链接: </span><span class="post-copyright-info"><a href="https://rq.shenjianl.cn/posts/8816.html">https://rq.shenjianl.cn/posts/8816.html</a></span></div><div class="post-copyright__notice"><span class="post-copyright-meta"><i class="fas fa-circle-exclamation fa-fw"></i>版权声明: </span><span class="post-copyright-info">本博客所有文章除特别声明外,均采用 <a href="https://qr.shenjianl.cn/licenses/by-nc-sa/4.0/" target="_blank">CC BY-NC-SA 4.0</a> 许可协议。转载请注明来自 <a href="https://rq.shenjianl.cn" target="_blank">QuickReference</a>!</span></div></div><div class="tag_share"><div class="post-meta__tag-list"><a class="post-meta__tags" href="/tags/ensemble-learning/">ensemble-learning</a></div><div class="post_share"><div class="social-share" data-image="https://th.bing.com/th/id/OIP.SZA5W6cF-tYiiZ08KZ7l7wHaEm?w=250&h=180&c=7&r=0&o=5&dpr=1.3&pid=1.7" data-sites="facebook,twitter,wechat,weibo,qq"></div><link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/butterfly-extsrc@1.1.3/sharejs/dist/css/share.min.css" media="print" onload="this.media='all'"><script src="https://cdn.jsdelivr.net/npm/butterfly-extsrc@1.1.3/sharejs/dist/js/social-share.min.js" defer></script></div></div><nav class="pagination-post" id="pagination"><div class="next-post pull-full"><a href="/posts/95.html" title="决策树算法"><img class="cover" src="https://th.bing.com/th/id/OIP.XaPUn6eccfS_z_wTLQNFzgHaEK?w=240&h=180&c=7&r=0&o=5&dpr=1.9&pid=1.7" onerror="onerror=null;src='/img/404.jpg'" alt="cover of next post"><div class="pagination-info"><div class="label">下一篇</div><div class="next_info">决策树算法</div></div></a></div></nav></div><div class="aside-content" id="aside-content"><div class="card-widget card-info"><div class="is-center"><div class="avatar-img"><img src="/img/avatar.jpg" onerror="this.onerror=null;this.src='/img/friend_404.gif'" alt="avatar"/></div><div class="author-info__name">shenjianZ</div><div class="author-info__description">一份快捷简便的文档,便于查阅编程的细节</div></div><div class="card-info-data site-data is-center"><a href="/archives/"><div class="headline">文章</div><div class="length-num">18</div></a><a href="/tags/"><div class="headline">标签</div><div class="length-num">9</div></a><a href="/categories/"><div class="headline">分类</div><div class="length-num">2</div></a></div><a id="card-info-btn" target="_blank" rel="noopener" href="https://github.com/shenjianz"><i class="fab fa-github"></i><span>Follow Me</span></a><div class="card-info-social-icons is-center"><a class="social-icon" href="https://github.com/shenjianZ" target="_blank" title="Github"><i class="fab fa-github" style="color: #24292e;"></i></a><a class="social-icon" href="mailto:15202078626@163.com" target="_blank" title="Email"><i class="fas fa-envelope" style="color: #4a7dbe;"></i></a></div></div><div class="card-widget card-announcement"><div class="item-headline"><i class="fas fa-bullhorn fa-shake"></i><span>公告</span></div><div class="announcement_content">一个简单快捷的文档知识点查阅网站</div></div><div class="sticky_layout"><div class="card-widget" id="card-toc"><div class="item-headline"><i class="fas fa-stream"></i><span>目录</span><span class="toc-percentage"></span></div><div class="toc-content is-expand"><ol class="toc"><li class="toc-item toc-level-3"><a class="toc-link" href="#Bagging"><span class="toc-number">1.</span> <span class="toc-text">Bagging</span></a></li><li class="toc-item toc-level-3"><a class="toc-link" href="#%E9%9A%8F%E6%9C%BA%E6%A3%AE%E6%9E%97"><span class="toc-number">2.</span> <span class="toc-text">随机森林</span></a></li><li class="toc-item toc-level-3"><a class="toc-link" href="#ott%E6%A1%88%E4%BE%8B"><span class="toc-number">3.</span> <span class="toc-text">ott案例</span></a></li></ol></div></div><div class="card-widget card-recent-post"><div class="item-headline"><i class="fas fa-history"></i><span>最新文章</span></div><div class="aside-list"><div class="aside-list-item"><a class="thumbnail" href="/posts/8816.html" title="集成学习"><img src="https://th.bing.com/th/id/OIP.SZA5W6cF-tYiiZ08KZ7l7wHaEm?w=250&h=180&c=7&r=0&o=5&dpr=1.3&pid=1.7" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="集成学习"/></a><div class="content"><a class="title" href="/posts/8816.html" title="集成学习">集成学习</a><time datetime="2025-01-25T07:12:08.000Z" title="发表于 2025-01-25 15:12:08">2025-01-25</time></div></div><div class="aside-list-item"><a class="thumbnail" href="/posts/95.html" title="决策树算法"><img src="https://th.bing.com/th/id/OIP.XaPUn6eccfS_z_wTLQNFzgHaEK?w=240&h=180&c=7&r=0&o=5&dpr=1.9&pid=1.7" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="决策树算法"/></a><div class="content"><a class="title" href="/posts/95.html" title="决策树算法">决策树算法</a><time datetime="2025-01-24T04:39:59.000Z" title="发表于 2025-01-24 12:39:59">2025-01-24</time></div></div><div class="aside-list-item no-cover"><div class="content"><a class="title" href="/posts/60504.html" title="逻辑回归">逻辑回归</a><time datetime="2025-01-20T07:30:08.000Z" title="发表于 2025-01-20 15:30:08">2025-01-20</time></div></div><div class="aside-list-item no-cover"><div class="content"><a class="title" href="/posts/52662.html" title="线性回归">线性回归</a><time datetime="2025-01-19T08:46:51.000Z" title="发表于 2025-01-19 16:46:51">2025-01-19</time></div></div><div class="aside-list-item no-cover"><div class="content"><a class="title" href="/posts/12462.html" title="C lang">C lang</a><time datetime="2025-01-15T12:41:26.000Z" title="发表于 2025-01-15 20:41:26">2025-01-15</time></div></div></div></div></div></div></main><footer id="footer" style="background: transparent"><div id="footer-wrap"><div class="copyright">©2024 - 2025 By shenjianZ</div><div class="framework-info"><span>框架 </span><a target="_blank" rel="noopener" href="https://hexo.io">Hexo</a><span class="footer-separator">|</span><span>主题 </span><a target="_blank" rel="noopener" href="https://github.com/jerryc127/hexo-theme-butterfly">Butterfly</a></div><div class="footer_custom_text"><a target="_blank" rel="noopener" href="https://beian.miit.gov.cn/#/Integrated/recordQuery"><img class="icp-icon" src="https://beian.mps.gov.cn/img/logo01.dd7ff50e.png"><span>备案号:豫ICP备2023019300号</span></a></div></div></footer></div><div id="rightside"><div id="rightside-config-hide"><button id="readmode" type="button" title="阅读模式"><i class="fas fa-book-open"></i></button><button id="darkmode" type="button" title="浅色和深色模式转换"><i class="fas fa-adjust"></i></button><button id="hide-aside-btn" type="button" title="单栏和双栏切换"><i class="fas fa-arrows-alt-h"></i></button></div><div id="rightside-config-show"><button id="rightside-config" type="button" title="设置"><i class="fas fa-cog fa-spin"></i></button><button class="close" id="mobile-toc-button" type="button" title="目录"><i class="fas fa-list-ul"></i></button><button id="go-up" type="button" title="回到顶部"><span class="scroll-percent"></span><i class="fas fa-arrow-up"></i></button></div></div><div><script src="/js/utils.js?v=4.13.0"></script><script src="/js/main.js?v=4.13.0"></script><script src="https://cdn.jsdelivr.net/npm/@fancyapps/ui@5.0.33/dist/fancybox/fancybox.umd.min.js"></script><div class="js-pjax"></div><script src="https://cdn.jsdelivr.net/npm/butterfly-extsrc@1.1.3/dist/activate-power-mode.min.js"></script><script>POWERMODE.colorful = true;
|
||
POWERMODE.shake = true;
|
||
POWERMODE.mobile = false;
|
||
document.body.addEventListener('input', POWERMODE);
|
||
</script><script async data-pjax src="//busuanzi.ibruce.info/busuanzi/2.3/busuanzi.pure.mini.js"></script><div id="local-search"><div class="search-dialog"><nav class="search-nav"><span class="search-dialog-title">搜索</span><span id="loading-status"></span><button class="search-close-button"><i class="fas fa-times"></i></button></nav><div class="is-center" id="loading-database"><i class="fas fa-spinner fa-pulse"></i><span> 数据库加载中</span></div><div class="search-wrap"><div id="local-search-input"><div class="local-search-box"><input class="local-search-box--input" placeholder="搜索文章" type="text"/></div></div><hr/><div id="local-search-results"></div><div id="local-search-stats-wrap"></div></div></div><div id="search-mask"></div><script src="/js/search/local-search.js?v=4.13.0"></script></div></div></body></html> |