77 lines
1.8 KiB
YAML
77 lines
1.8 KiB
YAML
# 混合策略分类器配置文件
|
||
# 基于毕业设计技术方案:BERT语义特征 + 传统分类器
|
||
|
||
# ================================
|
||
# 基线模型配置 (Baseline)
|
||
# ================================
|
||
baseline:
|
||
# 模型类型: nb=朴素贝叶斯, svm=支持向量机, lr=逻辑回归
|
||
model_type: svm
|
||
|
||
# 训练数据路径
|
||
data_path: ./data/processed/news_data.csv
|
||
|
||
# 测试集比例
|
||
test_size: 0.2
|
||
|
||
# ================================
|
||
# 混合分类器配置 (Hybrid)
|
||
# ================================
|
||
hybrid:
|
||
# 分类器类型: svm=支持向量机, lr=逻辑回归
|
||
classifier_type: svm
|
||
|
||
# BERT预训练模型选择
|
||
# 推荐选项:
|
||
# - bert-base-chinese: Google原版中文BERT
|
||
# - hfl/chinese-bert-wwm-ext: 哈工大讯飞全词掩码版本(推荐)
|
||
# - hfl/chinese-roberta-wwm-ext: 哈工大RoBERTa(效果最优)
|
||
bert_model: hfl/chinese-roberta-wwm-ext
|
||
|
||
# 训练数据路径
|
||
data_path: ./data/processed/news_data.csv
|
||
|
||
# 测试集比例
|
||
test_size: 0.2
|
||
|
||
# SVM正则化参数(如果使用SVM)
|
||
# C值越大,对误分类的惩罚越大
|
||
C: 1.0
|
||
|
||
# ================================
|
||
# BERT特征提取配置
|
||
# ================================
|
||
feature_extraction:
|
||
# 最大序列长度
|
||
max_length: 512
|
||
|
||
# 批处理大小(根据GPU内存调整)
|
||
batch_size: 32
|
||
|
||
# 是否使用GPU
|
||
use_gpu: true
|
||
|
||
# 特征缓存目录
|
||
cache_dir: ./data/bert_features
|
||
|
||
# ================================
|
||
# 可视化配置
|
||
# ================================
|
||
visualization:
|
||
# 图表输出目录
|
||
output_dir: ./outputs/visualizations
|
||
|
||
# 是否显示图表(训练时)
|
||
show: false
|
||
|
||
# t-SNE参数
|
||
tsne_perplexity: 30
|
||
tsne_n_iter: 1000
|
||
|
||
# ================================
|
||
# 日志配置
|
||
# ================================
|
||
logging:
|
||
level: INFO
|
||
file: ./training.log
|