add linear regression details

This commit is contained in:
2025-01-19 17:11:00 +08:00
parent ee2c51ff65
commit 5ea5e3cba1
48 changed files with 1292 additions and 229 deletions

View File

@@ -0,0 +1,81 @@
---
title: C lang
tags: C C++
abbrlink: 12462
date: 2025-01-15 20:41:26
---
### c lang在windows下的开发VS code
[WinLibs - GCC+MinGW-w64 compiler for Windows](https://winlibs.com/#download-release)下载你需要的版本
解压到`D:\ProgramModule`,并将 `bin\`加入环境变量`PATH`
打开新的`Terminal`输入`gcc -v`,查看`gcc`是否安装成功
`VS code` 的插件管理下载`Code Runner``C\C++`这两个插件
`*.c`源文件的内容区,右键点击`Run Code` ,即可运行成功
![](/img/language/c-env-conf.png)
### 数据类型
- 整数类型
```c
    short a = 12;
    int b = 100;
    long c = 1000L;
    long long d = 1000000LL;
    unsigned int e = 10;
    printf("a: %hd\n",a);
    printf("b: %d\n",b);
    printf("c: %ld\n",c);
    printf("d: %lld\n",d);
    printf("e: %u\n",e);
    printf("f: %.3f\n",f);
```
- 小数类型
```c
float f = 3.14F;
printf("f: %.3f\n",f);
double g = 5.65;
printf("g: %.2lf\n",g);
```
- 字符类型
```c
char h = 'x';
printf("x: %c\n",x);
```
### 类型转换
- 隐式转换
- 强制转换
```c
int b = 23;
short c = (short) b;
```
### 数组
```c
#include <stdio.h>
int main(){
    int arr [10] = {2,3,4,5,6,7,8,9,10,11};
    arr[0] = 1525;
    *(arr+1) = 25;
    int len = sizeof(arr)/sizeof(arr[0]);
    void printArr(int arr[], int len){
        for (int i = 0; i < len;i++){
            printf("%d\t",arr[i]);
        }
    }
    printArr(arr,len);
    return 0;
}
```
### 指针
```c
// swap the value of a and b
    void swap(int* x, int* y){
        int temp = *x;
        *x = *y;
        *y = temp;
    }
    int a = 5;
    int b = 10;
    swap(&a, &b);
    printf("a = %d b = %d\n", a, b);
```

View File

@@ -0,0 +1,199 @@
---
title: 线性回归
tags: linear-regression
mathjax: true
abbrlink: 52662
date: 2025-01-19 16:46:51
---
### 线性回归简介
>用于预测一个连续的目标变量(因变量),与一个或多个特征(自变量)之间存在线性关系。
假设函数:
$$y = w_1x_1 + w_2x_2 + \cdot\cdot\cdot+w_nx_n$$
- $y$ 是目标变量(因变量),即我们希望预测的值。
- $x1,x2,…,xn$ 是特征变量(自变量),即输入的值。
### 损失函数
为了找到最佳的线性模型,我们需要通过最小化损失函数来优化模型参数。在线性回归中,常用的损失函数是 **均方误差MSE**
$$MSE = \frac{1}{m} \sum_{i=1}^{m} (y_i - \hat{y}_i)^2$$
- m 是样本的数量。
- $y_i$ 是第 i 个样本的真实值。
- $\hat{y}_i$ 是模型预测的第 i 个样本的值。
### 线性回归优化
- 梯度下降法
```python
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import mean_squared_error
# 1. 获取数据集
housing = fetch_california_housing()
# 2. 数据集处理
# 2.1 分割数据集
X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, test_size=0.25)
# 3. 特征工程
# 3.1 标准化
transfer = StandardScaler()
X_train = transfer.fit_transform(X_train)
X_test = transfer.transform(X_test) # 使用 transform() 而不是 fit_transform()
# 4.机器学习- 梯度下降法
estimater = SGDRegressor(max_iter=1000, eta0=0.01)
estimater.fit(X_train, y_train)
print(f"SGD模型的偏置是{estimater.intercept_}")
print(f"SGD模型的系数是{estimater.coef_}")
# 5. 模型评估
y_pred = estimater.predict(X_test)
print(f"SGD模型预测值{y_pred}")
mse = mean_squared_error(y_test, y_pred)
print(f"SGD模型均方误差:{mse}")
```
- 正规方程
```python
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
# 1. 获取数据集
housing = fetch_california_housing()
# 2. 数据集处理
# 2.1 分割数据集
X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, test_size=0.25)
# 3. 特征工程
# 3.1 标准化
transfer = StandardScaler()
X_train = transfer.fit_transform(X_train)
X_test = transfer.fit_transform(X_test)
# 4.机器学习- 线性回归
estimater = LinearRegression()
estimater.fit(X_train, y_train)
print(f"模型的偏置是:{estimater.intercept_}")
print(f"模型的系数是:{estimater.coef_}")
# 5. 模型评估
y_pred = estimater.predict(X_test)
print(f"模型预测值:{y_pred}")
mse = mean_squared_error(y_test, y_pred)
print(f"模型均方误差:{mse}")
```
- 岭回归
```python
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge, RidgeCV
from sklearn.metrics import mean_squared_error
# 1. 获取数据集
housing = fetch_california_housing()
# 2. 数据集处理
# 2.1 分割数据集
X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, test_size=0.25)
# 3. 特征工程
# 3.1 标准化
transfer = StandardScaler()
X_train = transfer.fit_transform(X_train)
X_test = transfer.transform(X_test) # 使用 transform() 而不是 fit_transform()
# 4.机器学习- 岭回归 使用了Ridge的alpha的搜索
# estimater = Ridge(alpha=1.0)
estimater = RidgeCV(alphas=[0.001, 0.01, 0.1, 1, 10, 100])
estimater.fit(X_train, y_train)
print(f"Ridge模型的偏置是{estimater.intercept_}")
print(f"Ridge模型的系数是{estimater.coef_}")
# 查看最佳 alpha
print(f"最佳 alpha 值是:{estimater.alpha_}")
# 5. 模型评估
y_pred = estimater.predict(X_test)
print(f"Ridge模型预测值{y_pred}")
mse = mean_squared_error(y_test, y_pred)
print(f"Ridge模型均方误差:{mse}")
```
这样每个代码块的缩进保持一致,便于阅读和理解。如果有其他优化需求,随时告诉我!
![](/img/machinelearning/linear.png)
![](/img/machinelearning/fitting.png)
### 模型保存和加载
```python
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge, RidgeCV
from sklearn.metrics import mean_squared_error
import joblib
def save_model():
# 1. 获取数据集
housing = fetch_california_housing()
# 2. 数据集处理
# 2.1 分割数据集
X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, test_size=0.25)
# 3. 特征工程
# 3.1 标准化
transfer = StandardScaler()
X_train = transfer.fit_transform(X_train)
X_test = transfer.transform(X_test) # 使用 transform() 而不是 fit_transform()
# 4. 机器学习 - 岭回归 使用了Ridge的alpha的搜索
estimater = RidgeCV(alphas=[0.001, 0.01, 0.1, 1, 10, 100])
estimater.fit(X_train, y_train)
print(f"Ridge模型的偏置是{estimater.intercept_}")
print(f"Ridge模型的系数是{estimater.coef_}")
# 保存模型
joblib.dump(estimater, 'ridge_model.pkl')
# 查看最佳 alpha
print(f"最佳 alpha 值是:{estimater.alpha_}")
# 5. 模型评估
y_pred = estimater.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Ridge模型均方误差:{mse}")
def load_model():
# 1. 获取数据集
housing = fetch_california_housing()
# 2. 数据集处理
# 2.1 分割数据集
X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, test_size=0.25)
# 3. 特征工程
# 3.1 标准化
transfer = StandardScaler()
X_train = transfer.fit_transform(X_train)
X_test = transfer.transform(X_test) # 使用 transform() 而不是 fit_transform()
# 加载模型
estimater = joblib.load('ridge_model.pkl')
print(f"Ridge模型的偏置是{estimater.intercept_}")
print(f"Ridge模型的系数是{estimater.coef_}")
# 查看最佳 alpha
print(f"最佳 alpha 值是:{estimater.alpha_}")
# 5. 模型评估
y_pred = estimater.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Ridge模型预测值{y_pred}")
print(f"Ridge模型均方误差:{mse}")
print("训练并保存模型:")
save_model()
print("加载模型")
load_model()
```

Binary file not shown.

After

Width:  |  Height:  |  Size: 126 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 100 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 123 KiB