add linear regression details

2025-01-19 17:11:00 +08:00
parent ee2c51ff65
commit 5ea5e3cba1
48 changed files with 1292 additions and 229 deletions
--- a/source/_posts/language/C.md
+++ b/source/_posts/language/C.md
@@ -0,0 +1,81 @@
+---
+title: C lang
+tags: C C++
+abbrlink: 12462
+date: 2025-01-15 20:41:26
+---
+
+### c lang在windows下的开发（VS code）
+[WinLibs - GCC+MinGW-w64 compiler for Windows](https://winlibs.com/#download-release)下载你需要的版本
+解压到`D:\ProgramModule`，并将 `bin\`加入环境变量`PATH`
+打开新的`Terminal`输入`gcc -v`，查看`gcc`是否安装成功
+在`VS code` 的插件管理下载`Code Runner`、`C\C++`这两个插件
+在`*.c`源文件的内容区，右键点击`Run Code` ，即可运行成功
+![](/img/language/c-env-conf.png)
+
+### 数据类型
+- 整数类型
+  ```c
+    short a = 12;
+    int b = 100;
+    long c = 1000L;
+    long long d = 1000000LL;
+    unsigned int e = 10;
+    printf("a: %hd\n",a);
+    printf("b: %d\n",b);
+    printf("c: %ld\n",c);
+    printf("d: %lld\n",d);
+    printf("e: %u\n",e);
+    printf("f: %.3f\n",f);
+    ```
+- 小数类型
+  ```c
+    float f = 3.14F;
+    printf("f: %.3f\n",f);
+    double g = 5.65;
+    printf("g: %.2lf\n",g);
+    ```
+- 字符类型
+  ```c
+    char h = 'x';
+    printf("x: %c\n",x);
+    ```
+### 类型转换
+- 隐式转换
+- 强制转换
+  ```c
+    int b = 23;
+    short c = (short) b;
+    ```
+### 数组
+```c
+#include <stdio.h>
+
+int main(){
+    int arr [10] = {2,3,4,5,6,7,8,9,10,11};
+    arr[0] = 1525;
+    *(arr+1) = 25;
+    int len = sizeof(arr)/sizeof(arr[0]);
+    void printArr(int arr[], int len){
+        for (int i = 0; i < len;i++){
+            printf("%d\t",arr[i]);
+        }
+    }
+    printArr(arr,len);
+    return 0;
+}
+```
+### 指针
+```c
+    // swap the value of a and b
+    void swap(int* x, int* y){
+        int temp = *x;
+        *x = *y;
+        *y = temp;
+
+    }
+    int a = 5;
+    int b = 10;
+    swap(&a, &b);
+    printf("a = %d b = %d\n", a, b);
+```
--- a/source/_posts/machinelearning/linearreression.md
+++ b/source/_posts/machinelearning/linearreression.md
@@ -0,0 +1,199 @@
+---
+title: 线性回归
+tags: linear-regression
+mathjax: true
+abbrlink: 52662
+date: 2025-01-19 16:46:51
+---
+
+### 线性回归简介
+>用于预测一个连续的目标变量（因变量），与一个或多个特征（自变量）之间存在线性关系。
+
+假设函数：  
+$$y = w_1x_1 + w_2x_2 + \cdot\cdot\cdot+w_nx_n$$
+- $y$ 是目标变量（因变量），即我们希望预测的值。
+- $x1,x2,…,xn$ 是特征变量（自变量），即输入的值。
+### 损失函数
+
+为了找到最佳的线性模型，我们需要通过最小化损失函数来优化模型参数。在线性回归中，常用的损失函数是 **均方误差（MSE）**：
+$$MSE = \frac{1}{m} \sum_{i=1}^{m} (y_i - \hat{y}_i)^2$$
+- m 是样本的数量。
+- $y_i$ 是第 i 个样本的真实值。
+- $\hat{y}_i$ 是模型预测的第 i 个样本的值。
+
+### 线性回归优化
+
+- 梯度下降法
+    ```python
+    from sklearn.datasets import fetch_california_housing
+    from sklearn.model_selection import train_test_split
+    from sklearn.preprocessing import StandardScaler
+    from sklearn.linear_model import SGDRegressor
+    from sklearn.metrics import mean_squared_error
+
+    # 1. 获取数据集
+    housing = fetch_california_housing()
+
+    # 2. 数据集处理
+    # 2.1 分割数据集
+    X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, test_size=0.25)
+
+    # 3. 特征工程
+    # 3.1 标准化
+    transfer = StandardScaler()
+    X_train = transfer.fit_transform(X_train)
+    X_test = transfer.transform(X_test)  # 使用 transform() 而不是 fit_transform()
+
+    # 4.机器学习- 梯度下降法
+    estimater = SGDRegressor(max_iter=1000, eta0=0.01)
+    estimater.fit(X_train, y_train)
+    print(f"SGD模型的偏置是：{estimater.intercept_}")
+    print(f"SGD模型的系数是：{estimater.coef_}")
+
+    # 5. 模型评估
+    y_pred = estimater.predict(X_test)
+    print(f"SGD模型预测值：{y_pred}")
+    mse = mean_squared_error(y_test, y_pred)
+    print(f"SGD模型均方误差:{mse}")
+    ```
+
+- 正规方程
+    ```python
+    from sklearn.datasets import fetch_california_housing
+    from sklearn.model_selection import train_test_split
+    from sklearn.preprocessing import StandardScaler
+    from sklearn.linear_model import LinearRegression
+    from sklearn.metrics import mean_squared_error
+
+    # 1. 获取数据集
+    housing = fetch_california_housing()
+
+    # 2. 数据集处理
+    # 2.1 分割数据集
+    X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, test_size=0.25)
+
+    # 3. 特征工程
+    # 3.1 标准化
+    transfer = StandardScaler()
+    X_train = transfer.fit_transform(X_train)
+    X_test = transfer.fit_transform(X_test)
+
+    # 4.机器学习- 线性回归
+    estimater = LinearRegression()
+    estimater.fit(X_train, y_train)
+    print(f"模型的偏置是：{estimater.intercept_}")
+    print(f"模型的系数是：{estimater.coef_}")
+
+    # 5. 模型评估
+    y_pred = estimater.predict(X_test)
+    print(f"模型预测值：{y_pred}")
+    mse = mean_squared_error(y_test, y_pred)
+    print(f"模型均方误差:{mse}")
+    ```
+
+- 岭回归
+    ```python
+    from sklearn.datasets import fetch_california_housing
+    from sklearn.model_selection import train_test_split
+    from sklearn.preprocessing import StandardScaler
+    from sklearn.linear_model import Ridge, RidgeCV
+    from sklearn.metrics import mean_squared_error
+
+    # 1. 获取数据集
+    housing = fetch_california_housing()
+
+    # 2. 数据集处理
+    # 2.1 分割数据集
+    X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, test_size=0.25)
+
+    # 3. 特征工程
+    # 3.1 标准化
+    transfer = StandardScaler()
+    X_train = transfer.fit_transform(X_train)
+    X_test = transfer.transform(X_test)  # 使用 transform() 而不是 fit_transform()
+
+    # 4.机器学习- 岭回归 使用了Ridge的alpha的搜索
+    # estimater = Ridge(alpha=1.0)
+    estimater = RidgeCV(alphas=[0.001, 0.01, 0.1, 1, 10, 100])
+    estimater.fit(X_train, y_train)
+    print(f"Ridge模型的偏置是：{estimater.intercept_}")
+    print(f"Ridge模型的系数是：{estimater.coef_}")
+
+    # 查看最佳 alpha
+    print(f"最佳 alpha 值是：{estimater.alpha_}")
+
+    # 5. 模型评估
+    y_pred = estimater.predict(X_test)
+    print(f"Ridge模型预测值：{y_pred}")
+    mse = mean_squared_error(y_test, y_pred)
+    print(f"Ridge模型均方误差:{mse}")
+    ```
+
+这样每个代码块的缩进保持一致，便于阅读和理解。如果有其他优化需求，随时告诉我！
+
+
+![](/img/machinelearning/linear.png)
+
+![](/img/machinelearning/fitting.png)
+### 模型保存和加载
+```python
+from sklearn.datasets import fetch_california_housing
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
+from sklearn.linear_model import Ridge, RidgeCV
+from sklearn.metrics import mean_squared_error
+import joblib
+
+def save_model():
+    # 1. 获取数据集
+    housing = fetch_california_housing()
+    # 2. 数据集处理
+    # 2.1 分割数据集
+    X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, test_size=0.25)
+    # 3. 特征工程
+    # 3.1 标准化
+    transfer = StandardScaler()
+    X_train = transfer.fit_transform(X_train)
+    X_test = transfer.transform(X_test)  # 使用 transform() 而不是 fit_transform()
+    # 4. 机器学习 - 岭回归 使用了Ridge的alpha的搜索
+    estimater = RidgeCV(alphas=[0.001, 0.01, 0.1, 1, 10, 100])
+    estimater.fit(X_train, y_train)
+    print(f"Ridge模型的偏置是：{estimater.intercept_}")
+    print(f"Ridge模型的系数是：{estimater.coef_}")
+    # 保存模型
+    joblib.dump(estimater, 'ridge_model.pkl')
+    # 查看最佳 alpha
+    print(f"最佳 alpha 值是：{estimater.alpha_}")
+    # 5. 模型评估
+    y_pred = estimater.predict(X_test)
+    mse = mean_squared_error(y_test, y_pred)
+    print(f"Ridge模型均方误差:{mse}")
+
+def load_model():
+    # 1. 获取数据集
+    housing = fetch_california_housing()
+    # 2. 数据集处理
+    # 2.1 分割数据集
+    X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, test_size=0.25)
+    # 3. 特征工程
+    # 3.1 标准化
+    transfer = StandardScaler()
+    X_train = transfer.fit_transform(X_train)
+    X_test = transfer.transform(X_test)  # 使用 transform() 而不是 fit_transform()
+    # 加载模型
+    estimater = joblib.load('ridge_model.pkl')
+    print(f"Ridge模型的偏置是：{estimater.intercept_}")
+    print(f"Ridge模型的系数是：{estimater.coef_}")
+    # 查看最佳 alpha
+    print(f"最佳 alpha 值是：{estimater.alpha_}")
+    # 5. 模型评估
+    y_pred = estimater.predict(X_test)
+    mse = mean_squared_error(y_test, y_pred)
+    print(f"Ridge模型预测值：{y_pred}")
+    print(f"Ridge模型均方误差:{mse}")
+
+print("训练并保存模型：")
+save_model()
+print("加载模型")
+load_model()
+```
--- a/source/img/language/c-env-conf.png
+++ b/source/img/language/c-env-conf.png
--- a/source/img/machinelearning/fitting.png
+++ b/source/img/machinelearning/fitting.png
--- a/source/img/machinelearning/linear.png
+++ b/source/img/machinelearning/linear.png