228 lines
38 KiB
HTML
228 lines
38 KiB
HTML
<!DOCTYPE html><html lang="zh-CN" data-theme="light"><head><meta charset="UTF-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0,viewport-fit=cover"><title>逻辑回归 | QuickReference</title><meta name="author" content="shenjianZ"><meta name="copyright" content="shenjianZ"><meta name="format-detection" content="telephone=no"><meta name="theme-color" content="#ffffff"><meta name="description" content="logistic regression code1234567891011121314151617181920212223242526272829303132333435import pandas as pdimport numpy as npfrom sklearn.datasets import load_breast_cancerfrom sklearn.model_selection im">
|
||
<meta property="og:type" content="article">
|
||
<meta property="og:title" content="逻辑回归">
|
||
<meta property="og:url" content="https://rq.shenjianl.cn/posts/60504.html">
|
||
<meta property="og:site_name" content="QuickReference">
|
||
<meta property="og:description" content="logistic regression code1234567891011121314151617181920212223242526272829303132333435import pandas as pdimport numpy as npfrom sklearn.datasets import load_breast_cancerfrom sklearn.model_selection im">
|
||
<meta property="og:locale" content="zh_CN">
|
||
<meta property="og:image" content="https://rq.shenjianl.cn/img/avatar.jpg">
|
||
<meta property="article:published_time" content="2025-01-20T07:30:08.000Z">
|
||
<meta property="article:modified_time" content="2025-01-20T10:18:12.444Z">
|
||
<meta property="article:author" content="shenjianZ">
|
||
<meta property="article:tag" content="logistic-regression">
|
||
<meta name="twitter:card" content="summary">
|
||
<meta name="twitter:image" content="https://rq.shenjianl.cn/img/avatar.jpg"><link rel="shortcut icon" href="/img/favicon.png"><link rel="canonical" href="https://rq.shenjianl.cn/posts/60504.html"><link rel="preconnect" href="//cdn.jsdelivr.net"/><link rel="preconnect" href="//busuanzi.ibruce.info"/><link rel="stylesheet" href="/css/index.css?v=4.13.0"><link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@6.5.1/css/all.min.css"><link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fancyapps/ui@5.0.33/dist/fancybox/fancybox.min.css" media="print" onload="this.media='all'"><script>const GLOBAL_CONFIG = {
|
||
root: '/',
|
||
algolia: undefined,
|
||
localSearch: {"path":"/search.xml","preload":false,"top_n_per_article":1,"unescape":true,"languages":{"hits_empty":"找不到您查询的内容:${query}","hits_stats":"共找到 ${hits} 篇文章"}},
|
||
translate: undefined,
|
||
noticeOutdate: undefined,
|
||
highlight: {"plugin":"highlight.js","highlightCopy":true,"highlightLang":true,"highlightHeightLimit":false},
|
||
copy: {
|
||
success: '复制成功',
|
||
error: '复制错误',
|
||
noSupport: '浏览器不支持'
|
||
},
|
||
relativeDate: {
|
||
homepage: false,
|
||
post: false
|
||
},
|
||
runtime: '天',
|
||
dateSuffix: {
|
||
just: '刚刚',
|
||
min: '分钟前',
|
||
hour: '小时前',
|
||
day: '天前',
|
||
month: '个月前'
|
||
},
|
||
copyright: undefined,
|
||
lightbox: 'fancybox',
|
||
Snackbar: undefined,
|
||
infinitegrid: {
|
||
js: 'https://cdn.jsdelivr.net/npm/@egjs/infinitegrid@4.11.1/dist/infinitegrid.min.js',
|
||
buttonText: '加载更多'
|
||
},
|
||
isPhotoFigcaption: false,
|
||
islazyload: false,
|
||
isAnchor: false,
|
||
percent: {
|
||
toc: true,
|
||
rightside: false,
|
||
},
|
||
autoDarkmode: false
|
||
}</script><script id="config-diff">var GLOBAL_CONFIG_SITE = {
|
||
title: '逻辑回归',
|
||
isPost: true,
|
||
isHome: false,
|
||
isHighlightShrink: undefined,
|
||
isToc: true,
|
||
postUpdate: '2025-01-20 18:18:12'
|
||
}</script><script>(win=>{
|
||
win.saveToLocal = {
|
||
set: (key, value, ttl) => {
|
||
if (ttl === 0) return
|
||
const now = Date.now()
|
||
const expiry = now + ttl * 86400000
|
||
const item = {
|
||
value,
|
||
expiry
|
||
}
|
||
localStorage.setItem(key, JSON.stringify(item))
|
||
},
|
||
|
||
get: key => {
|
||
const itemStr = localStorage.getItem(key)
|
||
|
||
if (!itemStr) {
|
||
return undefined
|
||
}
|
||
const item = JSON.parse(itemStr)
|
||
const now = Date.now()
|
||
|
||
if (now > item.expiry) {
|
||
localStorage.removeItem(key)
|
||
return undefined
|
||
}
|
||
return item.value
|
||
}
|
||
}
|
||
|
||
win.getScript = (url, attr = {}) => new Promise((resolve, reject) => {
|
||
const script = document.createElement('script')
|
||
script.src = url
|
||
script.async = true
|
||
script.onerror = reject
|
||
script.onload = script.onreadystatechange = function() {
|
||
const loadState = this.readyState
|
||
if (loadState && loadState !== 'loaded' && loadState !== 'complete') return
|
||
script.onload = script.onreadystatechange = null
|
||
resolve()
|
||
}
|
||
|
||
Object.keys(attr).forEach(key => {
|
||
script.setAttribute(key, attr[key])
|
||
})
|
||
|
||
document.head.appendChild(script)
|
||
})
|
||
|
||
win.getCSS = (url, id = false) => new Promise((resolve, reject) => {
|
||
const link = document.createElement('link')
|
||
link.rel = 'stylesheet'
|
||
link.href = url
|
||
if (id) link.id = id
|
||
link.onerror = reject
|
||
link.onload = link.onreadystatechange = function() {
|
||
const loadState = this.readyState
|
||
if (loadState && loadState !== 'loaded' && loadState !== 'complete') return
|
||
link.onload = link.onreadystatechange = null
|
||
resolve()
|
||
}
|
||
document.head.appendChild(link)
|
||
})
|
||
|
||
win.activateDarkMode = () => {
|
||
document.documentElement.setAttribute('data-theme', 'dark')
|
||
if (document.querySelector('meta[name="theme-color"]') !== null) {
|
||
document.querySelector('meta[name="theme-color"]').setAttribute('content', '#0d0d0d')
|
||
}
|
||
}
|
||
win.activateLightMode = () => {
|
||
document.documentElement.setAttribute('data-theme', 'light')
|
||
if (document.querySelector('meta[name="theme-color"]') !== null) {
|
||
document.querySelector('meta[name="theme-color"]').setAttribute('content', '#ffffff')
|
||
}
|
||
}
|
||
const t = saveToLocal.get('theme')
|
||
|
||
if (t === 'dark') activateDarkMode()
|
||
else if (t === 'light') activateLightMode()
|
||
|
||
const asideStatus = saveToLocal.get('aside-status')
|
||
if (asideStatus !== undefined) {
|
||
if (asideStatus === 'hide') {
|
||
document.documentElement.classList.add('hide-aside')
|
||
} else {
|
||
document.documentElement.classList.remove('hide-aside')
|
||
}
|
||
}
|
||
|
||
const detectApple = () => {
|
||
if(/iPad|iPhone|iPod|Macintosh/.test(navigator.userAgent)){
|
||
document.documentElement.classList.add('apple')
|
||
}
|
||
}
|
||
detectApple()
|
||
})(window)</script><meta name="generator" content="Hexo 7.3.0"></head><body><div id="web_bg"></div><div id="sidebar"><div id="menu-mask"></div><div id="sidebar-menus"><div class="avatar-img is-center"><img src="/img/avatar.jpg" onerror="onerror=null;src='/img/friend_404.gif'" alt="avatar"/></div><div class="sidebar-site-data site-data is-center"><a href="/archives/"><div class="headline">文章</div><div class="length-num">17</div></a><a href="/tags/"><div class="headline">标签</div><div class="length-num">8</div></a><a href="/categories/"><div class="headline">分类</div><div class="length-num">2</div></a></div><hr class="custom-hr"/><div class="menus_items"><div class="menus_item"><a class="site-page" href="/"><i class="fa-fw fas fa-home"></i><span> Home</span></a></div><div class="menus_item"><a class="site-page" href="/archives/"><i class="fa-fw fas fa-archive"></i><span> Archives</span></a></div><div class="menus_item"><a class="site-page" href="/tags/"><i class="fa-fw fas fa-tags"></i><span> Tags</span></a></div><div class="menus_item"><a class="site-page" href="/categories/"><i class="fa-fw fas fa-folder-open"></i><span> Categories</span></a></div><div class="menus_item"><a class="site-page group" href="javascript:void(0);"><i class="fa-fw fas fa-list"></i><span> List</span><i class="fas fa-chevron-down"></i></a><ul class="menus_item_child"><li><a class="site-page child" href="/music/"><i class="fa-fw fas fa-music"></i><span> Music</span></a></li><li><a class="site-page child" href="/movies/"><i class="fa-fw fas fa-video"></i><span> Movie</span></a></li></ul></div><div class="menus_item"><a class="site-page" href="/link/"><i class="fa-fw fas fa-link"></i><span> Link</span></a></div><div class="menus_item"><a class="site-page" href="/about/"><i class="fa-fw fas fa-heart"></i><span> About</span></a></div></div></div></div><div class="post" id="body-wrap"><header class="post-bg fixed" id="page-header" style="background-image: url('/img/site01.jpg')"><nav id="nav"><span id="blog-info"><a href="/" title="QuickReference"><span class="site-name">QuickReference</span></a></span><div id="menus"><div id="search-button"><a class="site-page social-icon search" href="javascript:void(0);"><i class="fas fa-search fa-fw"></i><span> 搜索</span></a></div><div class="menus_items"><div class="menus_item"><a class="site-page" href="/"><i class="fa-fw fas fa-home"></i><span> Home</span></a></div><div class="menus_item"><a class="site-page" href="/archives/"><i class="fa-fw fas fa-archive"></i><span> Archives</span></a></div><div class="menus_item"><a class="site-page" href="/tags/"><i class="fa-fw fas fa-tags"></i><span> Tags</span></a></div><div class="menus_item"><a class="site-page" href="/categories/"><i class="fa-fw fas fa-folder-open"></i><span> Categories</span></a></div><div class="menus_item"><a class="site-page group" href="javascript:void(0);"><i class="fa-fw fas fa-list"></i><span> List</span><i class="fas fa-chevron-down"></i></a><ul class="menus_item_child"><li><a class="site-page child" href="/music/"><i class="fa-fw fas fa-music"></i><span> Music</span></a></li><li><a class="site-page child" href="/movies/"><i class="fa-fw fas fa-video"></i><span> Movie</span></a></li></ul></div><div class="menus_item"><a class="site-page" href="/link/"><i class="fa-fw fas fa-link"></i><span> Link</span></a></div><div class="menus_item"><a class="site-page" href="/about/"><i class="fa-fw fas fa-heart"></i><span> About</span></a></div></div><div id="toggle-menu"><a class="site-page" href="javascript:void(0);"><i class="fas fa-bars fa-fw"></i></a></div></div></nav><div id="post-info"><h1 class="post-title">逻辑回归</h1><div id="post-meta"><div class="meta-firstline"><span class="post-meta-date"><i class="far fa-calendar-alt fa-fw post-meta-icon"></i><span class="post-meta-label">发表于</span><time class="post-meta-date-created" datetime="2025-01-20T07:30:08.000Z" title="发表于 2025-01-20 15:30:08">2025-01-20</time><span class="post-meta-separator">|</span><i class="fas fa-history fa-fw post-meta-icon"></i><span class="post-meta-label">更新于</span><time class="post-meta-date-updated" datetime="2025-01-20T10:18:12.444Z" title="更新于 2025-01-20 18:18:12">2025-01-20</time></span><span class="post-meta-categories"><span class="post-meta-separator">|</span><i class="fas fa-inbox fa-fw post-meta-icon"></i><a class="post-meta-categories" href="/categories/machinelearning/">machinelearning</a></span></div><div class="meta-secondline"><span class="post-meta-separator">|</span><span class="post-meta-pv-cv" id="" data-flag-title="逻辑回归"><i class="far fa-eye fa-fw post-meta-icon"></i><span class="post-meta-label">阅读量:</span><span id="busuanzi_value_page_pv"><i class="fa-solid fa-spinner fa-spin"></i></span></span></div></div></div></header><main class="layout" id="content-inner"><div id="post"><article class="post-content" id="article-container"><h3 id="logistic-regression-code"><a href="#logistic-regression-code" class="headerlink" title="logistic regression code"></a>logistic regression code</h3><figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">import</span> pandas <span class="keyword">as</span> pd</span><br><span class="line"><span class="keyword">import</span> numpy <span class="keyword">as</span> np</span><br><span class="line"><span class="keyword">from</span> sklearn.datasets <span class="keyword">import</span> load_breast_cancer</span><br><span class="line"><span class="keyword">from</span> sklearn.model_selection <span class="keyword">import</span> train_test_split</span><br><span class="line"><span class="keyword">from</span> sklearn.preprocessing <span class="keyword">import</span> StandardScaler</span><br><span class="line"><span class="keyword">from</span> sklearn.linear_model <span class="keyword">import</span> LogisticRegression</span><br><span class="line"><span class="comment"># 1. 加载乳腺癌数据集</span></span><br><span class="line">data = load_breast_cancer()</span><br><span class="line"><span class="comment"># 2.1 数据集基本处理</span></span><br><span class="line">df = pd.DataFrame(data.data, columns=data.feature_names)</span><br><span class="line">df[<span class="string">'target'</span>] = data.target</span><br><span class="line"><span class="keyword">for</span> i <span class="keyword">in</span> df.columns:</span><br><span class="line"> <span class="comment"># 检查列是否有缺失值</span></span><br><span class="line"> <span class="keyword">if</span> np.<span class="built_in">any</span>(pd.isnull(df[i])):</span><br><span class="line"> <span class="built_in">print</span>(<span class="string">f"Filling missing values in column: <span class="subst">{i}</span>"</span>)</span><br><span class="line"><span class="comment">#2.2 确认特征值、目标值</span></span><br><span class="line">X = df.iloc[:,<span class="number">0</span>:df.shape[<span class="number">1</span>] - <span class="number">1</span>]</span><br><span class="line">y = df.loc[:,<span class="string">"target"</span>]</span><br><span class="line"><span class="comment"># 2.3 分割数据</span></span><br><span class="line">X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=<span class="number">0.3</span>)</span><br><span class="line"><span class="comment"># 显示前几行数据</span></span><br><span class="line">df.head(<span class="number">1</span>)</span><br><span class="line"></span><br><span class="line"><span class="comment"># 3. 特征工程 标准化</span></span><br><span class="line">transfer = StandardScaler()</span><br><span class="line">X_train = transfer.fit_transform(X_train)</span><br><span class="line">X_test = transfer.transform(X_test)</span><br><span class="line"></span><br><span class="line"><span class="comment"># 4 机器学习 逻辑回归</span></span><br><span class="line">estimator = LogisticRegression()</span><br><span class="line">estimator.fit(X_train,y_train)</span><br><span class="line"></span><br><span class="line"><span class="comment"># 5. 模型评估</span></span><br><span class="line"><span class="built_in">print</span>(<span class="string">f"模型准确率:<span class="subst">{estimator.score(X_test,y_test)}</span>"</span>)</span><br><span class="line"><span class="built_in">print</span>(<span class="string">f"模型预测值为:\n<span class="subst">{estimator.predict(X_test)}</span>"</span>)</span><br></pre></td></tr></table></figure>
|
||
|
||
<h3 id="分类评估的参数"><a href="#分类评估的参数" class="headerlink" title="分类评估的参数"></a>分类评估的参数</h3><ul>
|
||
<li><p>准确率<br>准确率是所有预测正确的样本占总样本的比例<br>$$Accuracy = \frac{TP+TN}{TP+FN+FP+TN}$$</p>
|
||
</li>
|
||
<li><p>精准率<br>精准率(又称查准率)是指所有被预测为正类的样本中,真正为正类的比例<br>$$Precision = \frac{TP}{TP+FP}$$</p>
|
||
</li>
|
||
<li><p>召回率<br>召回率(又称查全率)是指所有实际为正类的样本中,被正确预测为正类的比例<br>$$Recall = \frac{TP}{TP+FN}$$</p>
|
||
</li>
|
||
<li><p>F1-score<br>F1 值(F1 Score)是精准率和召回率的调和平均数,综合考虑了精准率和召回率的影响。<br>$$ F1 = 2 \times \frac{\text{Precision} \times \text{Recall}}{\text{Precision} + \text{Recall}} $$</p>
|
||
</li>
|
||
<li><p>roc曲线<br>tpr、fpr来衡量不平衡的二分类问题</p>
|
||
</li>
|
||
</ul>
|
||
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">import</span> pandas <span class="keyword">as</span> pd</span><br><span class="line"><span class="keyword">import</span> numpy <span class="keyword">as</span> np</span><br><span class="line"><span class="keyword">from</span> sklearn.datasets <span class="keyword">import</span> load_breast_cancer</span><br><span class="line"><span class="keyword">from</span> sklearn.model_selection <span class="keyword">import</span> train_test_split</span><br><span class="line"><span class="keyword">from</span> sklearn.preprocessing <span class="keyword">import</span> StandardScaler</span><br><span class="line"><span class="keyword">from</span> sklearn.linear_model <span class="keyword">import</span> LogisticRegression</span><br><span class="line"><span class="keyword">from</span> sklearn.metrics <span class="keyword">import</span> classification_report, roc_auc_score</span><br><span class="line"><span class="comment"># 1. 加载乳腺癌数据集</span></span><br><span class="line">data = load_breast_cancer()</span><br><span class="line"><span class="comment"># 2.1 数据集基本处理</span></span><br><span class="line">df = pd.DataFrame(data.data, columns=data.feature_names)</span><br><span class="line">df[<span class="string">'target'</span>] = data.target</span><br><span class="line"><span class="keyword">for</span> i <span class="keyword">in</span> df.columns:</span><br><span class="line"> <span class="comment"># 检查列是否有缺失值</span></span><br><span class="line"> <span class="keyword">if</span> np.<span class="built_in">any</span>(pd.isnull(df[i])):</span><br><span class="line"> <span class="built_in">print</span>(<span class="string">f"Filling missing values in column: <span class="subst">{i}</span>"</span>)</span><br><span class="line"><span class="comment"># 2.2 确认特征值、目标值</span></span><br><span class="line">X = df.iloc[:, <span class="number">0</span>:df.shape[<span class="number">1</span>] - <span class="number">1</span>]</span><br><span class="line">y = df.loc[:, <span class="string">"target"</span>]</span><br><span class="line"><span class="comment"># 2.3 分割数据</span></span><br><span class="line">X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=<span class="number">0.3</span>)</span><br><span class="line"><span class="comment"># 显示前几行数据</span></span><br><span class="line">df.head(<span class="number">1</span>)</span><br><span class="line"></span><br><span class="line"><span class="comment"># 3. 特征工程 标准化</span></span><br><span class="line">transfer = StandardScaler()</span><br><span class="line">X_train = transfer.fit_transform(X_train)</span><br><span class="line">X_test = transfer.transform(X_test)</span><br><span class="line"></span><br><span class="line"><span class="comment"># 4 机器学习 逻辑回归</span></span><br><span class="line">estimator = LogisticRegression()</span><br><span class="line">estimator.fit(X_train, y_train)</span><br><span class="line"></span><br><span class="line"><span class="comment"># 5. 模型评估</span></span><br><span class="line"><span class="built_in">print</span>(<span class="string">f"模型准确率:<span class="subst">{estimator.score(X_test, y_test)}</span>"</span>)</span><br><span class="line">y_pred = estimator.predict(X_test)</span><br><span class="line"><span class="built_in">print</span>(<span class="string">f"模型预测值为:\n<span class="subst">{y_pred}</span>"</span>)</span><br><span class="line"><span class="comment"># 5.1 精确率、召回率</span></span><br><span class="line">ret = classification_report(y_test, y_pred, labels=[<span class="number">1</span>, <span class="number">0</span>], target_names=[<span class="string">"良性"</span>, <span class="string">"恶性"</span>])</span><br><span class="line">roc_score = roc_auc_score(y_test, y_pred)</span><br><span class="line"><span class="built_in">print</span>(<span class="string">f"准确率、召回率:<span class="subst">{ret}</span>"</span>)</span><br><span class="line"><span class="built_in">print</span>(<span class="string">f"roc_score:<span class="subst">{roc_score}</span>"</span>)</span><br></pre></td></tr></table></figure>
|
||
|
||
<h3 id="类别不平衡的处理"><a href="#类别不平衡的处理" class="headerlink" title="类别不平衡的处理"></a>类别不平衡的处理</h3><p>先准备类别不平衡的数据</p>
|
||
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">from</span> imblearn.over_sampling <span class="keyword">import</span> RandomOverSampler,SMOTE</span><br><span class="line"><span class="keyword">from</span> imblearn.under_sampling <span class="keyword">import</span> RandomUnderSampler</span><br><span class="line"><span class="keyword">from</span> sklearn.datasets <span class="keyword">import</span> make_classification</span><br><span class="line"><span class="keyword">import</span> matplotlib.pyplot <span class="keyword">as</span> plt</span><br><span class="line"><span class="keyword">from</span> collections <span class="keyword">import</span> Counter</span><br><span class="line"></span><br><span class="line"><span class="comment"># 1.准备类别不平衡的数据</span></span><br><span class="line">X, y = make_classification(</span><br><span class="line"> n_samples=<span class="number">5000</span>,</span><br><span class="line"> n_features=<span class="number">2</span>,</span><br><span class="line"> n_informative=<span class="number">2</span>,</span><br><span class="line"> n_redundant=<span class="number">0</span>,</span><br><span class="line"> n_repeated=<span class="number">0</span>,</span><br><span class="line"> n_classes=<span class="number">3</span>,</span><br><span class="line"> n_clusters_per_class=<span class="number">1</span>,</span><br><span class="line"> weights=[<span class="number">0.01</span>, <span class="number">0.05</span>, <span class="number">0.94</span>],</span><br><span class="line"> random_state=<span class="number">0</span>,</span><br><span class="line">)</span><br><span class="line">counter = Counter(y)</span><br><span class="line">plt.scatter(X[:,<span class="number">0</span>],X[:,<span class="number">1</span>],c=y)</span><br><span class="line">plt.show()</span><br></pre></td></tr></table></figure>
|
||
|
||
<ul>
|
||
<li>过采样<br>增加训练集的少数的类别的样本,使得正反例样本数据接近 <ul>
|
||
<li>随机过采样(RandomOverSampler)<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line">ros = RandomOverSampler()</span><br><span class="line">X_resampled,y_resampled = ros.fit_resample(X,y)</span><br><span class="line"><span class="built_in">print</span>(Counter(y_resampled))</span><br><span class="line">plt.scatter(X_resampled[:,<span class="number">0</span>],X_resampled[:,<span class="number">1</span>],c=y_resampled)</span><br><span class="line">plt.show()</span><br></pre></td></tr></table></figure>
|
||
<img src="/img/machinelearning/over_random_sampling.png"></li>
|
||
<li><code>SMOTE</code>过采样(SMOTE)<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line">smote = SMOTE()</span><br><span class="line">X_resampled,y_resampled = smote.fit_resample(X,y)</span><br><span class="line"><span class="built_in">print</span>(Counter(y_resampled))</span><br><span class="line">plt.scatter(X_resampled[:,<span class="number">0</span>],X_resampled[:,<span class="number">1</span>],c=y_resampled)</span><br><span class="line">plt.show()</span><br></pre></td></tr></table></figure>
|
||
<img src="/img/machinelearning/over_smote_sampling.png"></li>
|
||
</ul>
|
||
</li>
|
||
<li>欠采样<br>减少训练集的多数的类别的样本,使得正反例样本数据接近<ul>
|
||
<li>随机欠采样(RandomUnderSampler)<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line">rus = RandomUnderSampler(random_state=<span class="number">0</span>)</span><br><span class="line">X_resampled,y_resampled = rus.fit_resample(X,y)</span><br><span class="line"><span class="built_in">print</span>(Counter(y_resampled))</span><br><span class="line">plt.scatter(X_resampled[:,<span class="number">0</span>],X_resampled[:,<span class="number">1</span>],c=y_resampled)</span><br><span class="line">plt.show()</span><br></pre></td></tr></table></figure>
|
||
<img src="/img/machinelearning/under_sampling.png"></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</article><div class="post-copyright"><div class="post-copyright__author"><span class="post-copyright-meta"><i class="fas fa-circle-user fa-fw"></i>文章作者: </span><span class="post-copyright-info"><a href="https://rq.shenjianl.cn">shenjianZ</a></span></div><div class="post-copyright__type"><span class="post-copyright-meta"><i class="fas fa-square-arrow-up-right fa-fw"></i>文章链接: </span><span class="post-copyright-info"><a href="https://rq.shenjianl.cn/posts/60504.html">https://rq.shenjianl.cn/posts/60504.html</a></span></div><div class="post-copyright__notice"><span class="post-copyright-meta"><i class="fas fa-circle-exclamation fa-fw"></i>版权声明: </span><span class="post-copyright-info">本博客所有文章除特别声明外,均采用 <a href="https://qr.shenjianl.cn/licenses/by-nc-sa/4.0/" target="_blank">CC BY-NC-SA 4.0</a> 许可协议。转载请注明来自 <a href="https://rq.shenjianl.cn" target="_blank">QuickReference</a>!</span></div></div><div class="tag_share"><div class="post-meta__tag-list"><a class="post-meta__tags" href="/tags/logistic-regression/">logistic-regression</a></div><div class="post_share"><div class="social-share" data-image="/img/avatar.jpg" data-sites="facebook,twitter,wechat,weibo,qq"></div><link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/butterfly-extsrc@1.1.3/sharejs/dist/css/share.min.css" media="print" onload="this.media='all'"><script src="https://cdn.jsdelivr.net/npm/butterfly-extsrc@1.1.3/sharejs/dist/js/social-share.min.js" defer></script></div></div><nav class="pagination-post" id="pagination"><div class="prev-post pull-left"><a href="/posts/95.html" title="决策树算法"><div class="cover" style="background: var(--default-bg-color)"></div><div class="pagination-info"><div class="label">上一篇</div><div class="prev_info">决策树算法</div></div></a></div><div class="next-post pull-right"><a href="/posts/52662.html" title="线性回归"><div class="cover" style="background: var(--default-bg-color)"></div><div class="pagination-info"><div class="label">下一篇</div><div class="next_info">线性回归</div></div></a></div></nav></div><div class="aside-content" id="aside-content"><div class="card-widget card-info"><div class="is-center"><div class="avatar-img"><img src="/img/avatar.jpg" onerror="this.onerror=null;this.src='/img/friend_404.gif'" alt="avatar"/></div><div class="author-info__name">shenjianZ</div><div class="author-info__description">一份快捷简便的文档,便于查阅编程的细节</div></div><div class="card-info-data site-data is-center"><a href="/archives/"><div class="headline">文章</div><div class="length-num">17</div></a><a href="/tags/"><div class="headline">标签</div><div class="length-num">8</div></a><a href="/categories/"><div class="headline">分类</div><div class="length-num">2</div></a></div><a id="card-info-btn" target="_blank" rel="noopener" href="https://github.com/shenjianz"><i class="fab fa-github"></i><span>Follow Me</span></a><div class="card-info-social-icons is-center"><a class="social-icon" href="https://github.com/shenjianZ" target="_blank" title="Github"><i class="fab fa-github" style="color: #24292e;"></i></a><a class="social-icon" href="mailto:15202078626@163.com" target="_blank" title="Email"><i class="fas fa-envelope" style="color: #4a7dbe;"></i></a></div></div><div class="card-widget card-announcement"><div class="item-headline"><i class="fas fa-bullhorn fa-shake"></i><span>公告</span></div><div class="announcement_content">一个简单快捷的文档知识点查阅网站</div></div><div class="sticky_layout"><div class="card-widget" id="card-toc"><div class="item-headline"><i class="fas fa-stream"></i><span>目录</span><span class="toc-percentage"></span></div><div class="toc-content is-expand"><ol class="toc"><li class="toc-item toc-level-3"><a class="toc-link" href="#logistic-regression-code"><span class="toc-number">1.</span> <span class="toc-text">logistic regression code</span></a></li><li class="toc-item toc-level-3"><a class="toc-link" href="#%E5%88%86%E7%B1%BB%E8%AF%84%E4%BC%B0%E7%9A%84%E5%8F%82%E6%95%B0"><span class="toc-number">2.</span> <span class="toc-text">分类评估的参数</span></a></li><li class="toc-item toc-level-3"><a class="toc-link" href="#%E7%B1%BB%E5%88%AB%E4%B8%8D%E5%B9%B3%E8%A1%A1%E7%9A%84%E5%A4%84%E7%90%86"><span class="toc-number">3.</span> <span class="toc-text">类别不平衡的处理</span></a></li></ol></div></div><div class="card-widget card-recent-post"><div class="item-headline"><i class="fas fa-history"></i><span>最新文章</span></div><div class="aside-list"><div class="aside-list-item no-cover"><div class="content"><a class="title" href="/posts/95.html" title="决策树算法">决策树算法</a><time datetime="2025-01-24T04:39:59.000Z" title="发表于 2025-01-24 12:39:59">2025-01-24</time></div></div><div class="aside-list-item no-cover"><div class="content"><a class="title" href="/posts/60504.html" title="逻辑回归">逻辑回归</a><time datetime="2025-01-20T07:30:08.000Z" title="发表于 2025-01-20 15:30:08">2025-01-20</time></div></div><div class="aside-list-item no-cover"><div class="content"><a class="title" href="/posts/52662.html" title="线性回归">线性回归</a><time datetime="2025-01-19T08:46:51.000Z" title="发表于 2025-01-19 16:46:51">2025-01-19</time></div></div><div class="aside-list-item no-cover"><div class="content"><a class="title" href="/posts/12462.html" title="C lang">C lang</a><time datetime="2025-01-15T12:41:26.000Z" title="发表于 2025-01-15 20:41:26">2025-01-15</time></div></div><div class="aside-list-item no-cover"><div class="content"><a class="title" href="/posts/29139.html" title="k近邻算法(K-Nearest Neighbors)KNN">k近邻算法(K-Nearest Neighbors)KNN</a><time datetime="2025-01-13T09:20:59.000Z" title="发表于 2025-01-13 17:20:59">2025-01-13</time></div></div></div></div></div></div></main><footer id="footer" style="background: transparent"><div id="footer-wrap"><div class="copyright">©2024 - 2025 By shenjianZ</div><div class="framework-info"><span>框架 </span><a target="_blank" rel="noopener" href="https://hexo.io">Hexo</a><span class="footer-separator">|</span><span>主题 </span><a target="_blank" rel="noopener" href="https://github.com/jerryc127/hexo-theme-butterfly">Butterfly</a></div><div class="footer_custom_text"><span>备案号:豫ICP备2023019300号</span></div></div></footer></div><div id="rightside"><div id="rightside-config-hide"><button id="readmode" type="button" title="阅读模式"><i class="fas fa-book-open"></i></button><button id="darkmode" type="button" title="浅色和深色模式转换"><i class="fas fa-adjust"></i></button><button id="hide-aside-btn" type="button" title="单栏和双栏切换"><i class="fas fa-arrows-alt-h"></i></button></div><div id="rightside-config-show"><button id="rightside-config" type="button" title="设置"><i class="fas fa-cog fa-spin"></i></button><button class="close" id="mobile-toc-button" type="button" title="目录"><i class="fas fa-list-ul"></i></button><button id="go-up" type="button" title="回到顶部"><span class="scroll-percent"></span><i class="fas fa-arrow-up"></i></button></div></div><div><script src="/js/utils.js?v=4.13.0"></script><script src="/js/main.js?v=4.13.0"></script><script src="https://cdn.jsdelivr.net/npm/@fancyapps/ui@5.0.33/dist/fancybox/fancybox.umd.min.js"></script><div class="js-pjax"><script>if (!window.MathJax) {
|
||
window.MathJax = {
|
||
tex: {
|
||
inlineMath: [['$', '$'], ['\\(', '\\)']],
|
||
tags: 'ams'
|
||
},
|
||
chtml: {
|
||
scale: 1.1
|
||
},
|
||
options: {
|
||
renderActions: {
|
||
findScript: [10, doc => {
|
||
for (const node of document.querySelectorAll('script[type^="math/tex"]')) {
|
||
const display = !!node.type.match(/; *mode=display/)
|
||
const math = new doc.options.MathItem(node.textContent, doc.inputJax[0], display)
|
||
const text = document.createTextNode('')
|
||
node.parentNode.replaceChild(text, node)
|
||
math.start = {node: text, delim: '', n: 0}
|
||
math.end = {node: text, delim: '', n: 0}
|
||
doc.math.push(math)
|
||
}
|
||
}, '']
|
||
}
|
||
}
|
||
}
|
||
|
||
const script = document.createElement('script')
|
||
script.src = 'https://cdn.jsdelivr.net/npm/mathjax@3.2.2/es5/tex-mml-chtml.min.js'
|
||
script.id = 'MathJax-script'
|
||
script.async = true
|
||
document.head.appendChild(script)
|
||
} else {
|
||
MathJax.startup.document.state(0)
|
||
MathJax.texReset()
|
||
MathJax.typesetPromise()
|
||
}</script></div><script src="https://cdn.jsdelivr.net/npm/butterfly-extsrc@1.1.3/dist/activate-power-mode.min.js"></script><script>POWERMODE.colorful = true;
|
||
POWERMODE.shake = true;
|
||
POWERMODE.mobile = false;
|
||
document.body.addEventListener('input', POWERMODE);
|
||
</script><script async data-pjax src="//busuanzi.ibruce.info/busuanzi/2.3/busuanzi.pure.mini.js"></script><div id="local-search"><div class="search-dialog"><nav class="search-nav"><span class="search-dialog-title">搜索</span><span id="loading-status"></span><button class="search-close-button"><i class="fas fa-times"></i></button></nav><div class="is-center" id="loading-database"><i class="fas fa-spinner fa-pulse"></i><span> 数据库加载中</span></div><div class="search-wrap"><div id="local-search-input"><div class="local-search-box"><input class="local-search-box--input" placeholder="搜索文章" type="text"/></div></div><hr/><div id="local-search-results"></div><div id="local-search-stats-wrap"></div></div></div><div id="search-mask"></div><script src="/js/search/local-search.js?v=4.13.0"></script></div></div></body></html> |