{"meta":{"version":1,"warehouse":"5.0.1"},"models":{"Asset":[{"_id":"themes/butterfly/source/img/404.jpg","path":"img/404.jpg","modified":1,"renderable":1},{"_id":"themes/butterfly/source/img/avatar.jpg","path":"img/avatar.jpg","modified":1,"renderable":1},{"_id":"themes/butterfly/source/img/favicon.png","path":"img/favicon.png","modified":1,"renderable":1},{"_id":"themes/butterfly/source/img/friend_404.gif","path":"img/friend_404.gif","modified":1,"renderable":1},{"_id":"themes/butterfly/source/css/index.styl","path":"css/index.styl","modified":1,"renderable":1},{"_id":"themes/butterfly/source/css/var.styl","path":"css/var.styl","modified":1,"renderable":1},{"_id":"themes/butterfly/source/js/main.js","path":"js/main.js","modified":1,"renderable":1},{"_id":"themes/butterfly/source/js/tw_cn.js","path":"js/tw_cn.js","modified":1,"renderable":1},{"_id":"themes/butterfly/source/js/utils.js","path":"js/utils.js","modified":1,"renderable":1},{"_id":"themes/butterfly/source/js/search/algolia.js","path":"js/search/algolia.js","modified":1,"renderable":1},{"_id":"themes/butterfly/source/js/search/local-search.js","path":"js/search/local-search.js","modified":1,"renderable":1},{"_id":"source/img/dingyue.png","path":"img/dingyue.png","modified":1,"renderable":0},{"_id":"source/img/000001.png","path":"img/000001.png","modified":1,"renderable":0},{"_id":"source/img/peiqian.png","path":"img/peiqian.png","modified":1,"renderable":0},{"_id":"source/img/site01.jpg","path":"img/site01.jpg","modified":1,"renderable":0},{"_id":"source/img/yiyuan.png","path":"img/yiyuan.png","modified":1,"renderable":0},{"_id":"source/img/machinelearning/cros-valid.png","path":"img/machinelearning/cros-valid.png","modified":1,"renderable":0},{"_id":"source/img/machinelearning/decision-tree.png","path":"img/machinelearning/decision-tree.png","modified":1,"renderable":0},{"_id":"source/img/machinelearning/ensemble-learning.png","path":"img/machinelearning/ensemble-learning.png","modified":1,"renderable":0},{"_id":"source/img/machinelearning/fitting.png","path":"img/machinelearning/fitting.png","modified":1,"renderable":0},{"_id":"source/img/machinelearning/kfold-skfold.png","path":"img/machinelearning/kfold-skfold.png","modified":1,"renderable":0},{"_id":"source/img/machinelearning/knn-01.png","path":"img/machinelearning/knn-01.png","modified":1,"renderable":0},{"_id":"source/img/machinelearning/linear.png","path":"img/machinelearning/linear.png","modified":1,"renderable":0},{"_id":"source/img/machinelearning/ott.png","path":"img/machinelearning/ott.png","modified":1,"renderable":0},{"_id":"source/img/machinelearning/over_random_sampling.png","path":"img/machinelearning/over_random_sampling.png","modified":1,"renderable":0},{"_id":"source/img/machinelearning/over_smote_sampling.png","path":"img/machinelearning/over_smote_sampling.png","modified":1,"renderable":0},{"_id":"source/img/machinelearning/random-forest.png","path":"img/machinelearning/random-forest.png","modified":1,"renderable":0},{"_id":"source/img/machinelearning/under_sampling.png","path":"img/machinelearning/under_sampling.png","modified":1,"renderable":0},{"_id":"source/img/language/c-env-conf.png","path":"img/language/c-env-conf.png","modified":1,"renderable":0}],"Cache":[{"_id":"source/_posts/hello-world.md","hash":"40e804610ff712f079ace7012b862b4efecf82fb","modified":1723206110026},{"_id":"source/categories/index.md","hash":"49618dce0bee26dfc65f0de1794f01d2967cb7b8","modified":1722407843090},{"_id":"source/movies/index.md","hash":"4abf7a6a712f712f22d80203f6004a89feca2014","modified":1723257633715},{"_id":"source/link/index.md","hash":"26e21fe46bf7c0fc5ae95b73d52bf516368d6dc7","modified":1723257768927},{"_id":"source/_data/link.yml","hash":"f46a39dbf96726af4371a11db9df40bfbf630450","modified":1737713765648},{"_id":"source/music/index.md","hash":"d8dcf467af235e0bac09805db3a4ab73ad782b83","modified":1723257619897},{"_id":"source/_posts/frontend/frontend.md","hash":"3770215d35203b03d86d4f3a6ceee32f9849b584","modified":1723206110023},{"_id":"source/tags/index.md","hash":"1f27b735b6c7d629c8931b5bd3913bdd659f1981","modified":1722385980076},{"_id":"source/about/index.md","hash":"31fdd093368e1c18c1592a8cad6f3c3fe6d6711a","modified":1723257494626},{"_id":"source/_posts/linux/script.md","hash":"2deda929ed0c81ddcd00945c673b29f1bd1353c0","modified":1723997145731},{"_id":"source/_posts/machinelearning/ensemblelearning.md","hash":"20d3715838e9c671c64917467f1dbd9d12e17e4f","modified":1740201706779},{"_id":"source/_posts/language/C.md","hash":"970be14dae28449bc48d776a3c7382aefc1407c5","modified":1737713765649},{"_id":"source/_posts/machinelearning/knn.md","hash":"b05122909bc9a5b81f29ea3f87b6c5cfe7661ebf","modified":1737713765657},{"_id":"source/_posts/net/index.md","hash":"7eae8512c2a6bd937200487540b962d65a47ad9e","modified":1723206110028},{"_id":"source/_posts/machinelearning/linearreression.md","hash":"1d4be8c84289d0b65e899268284dac44ac51e8f5","modified":1739781086908},{"_id":"source/_posts/machinelearning/decisiontree.md","hash":"c686ef0d0026840c860a78972faf7ede46095627","modified":1740201694232},{"_id":"source/_posts/machinelearning/logisticregression.md","hash":"22e16e3be3f3805216e9ae8907ea0fa83c2e0ab2","modified":1737713765659},{"_id":"source/img/machinelearning/kfold-skfold.png","hash":"ab841f5412b7ea773468a3facff643df68a88c01","modified":1736846071435},{"_id":"source/img/machinelearning/knn-01.png","hash":"2a931981d1d5f72d6b42b39b8ef313569eab853e","modified":1736766866160},{"_id":"source/img/machinelearning/ott.png","hash":"949863066d4deff80e31db94b3de808bd5be2df7","modified":1737789011377},{"_id":"source/img/machinelearning/over_random_sampling.png","hash":"11457713572d2b37e7dc481fdaa9776fc3d68598","modified":1737713765664},{"_id":"source/img/machinelearning/over_smote_sampling.png","hash":"27cf8baad693e07f4a1134d675a82dfff06f7d6c","modified":1737713765665},{"_id":"source/img/machinelearning/under_sampling.png","hash":"02fd5e16a793c56a807efdbcd030a6be4e17bd2c","modified":1737713765667},{"_id":"source/_posts/frontend/deploy/deploy.md","hash":"ba4a05741f30f92f9fbe7b815519142de09b13fb","modified":1723206110045},{"_id":"source/_posts/frontend/uniapp/component1.md","hash":"b9b981d3903f5e57e7f327d930df4f04780211f9","modified":1723206110042},{"_id":"source/_posts/ancient/guwenguanzhi/1.md","hash":"629652f1d8e2a347e6b11c367d25b0a26698cd60","modified":1723257063678},{"_id":"source/_posts/bigdata/hadoop/env.md","hash":"4b184c804e9c8083966b1360fe9d5aa539930005","modified":1726065928095},{"_id":"source/_posts/bigdata/hadoop/hdfs.md","hash":"3148149529354105eb301c70961b6f1b68030835","modified":1726066302712},{"_id":"source/_posts/frontend/uniapp/uniapp.md","hash":"8c8594e3eb73d2c10c4c6dfc008b58c36763b054","modified":1723206110039},{"_id":"source/_posts/bigdata/hadoop/zookeper.md","hash":"96e58c86eba61accb620adc5e7b7dbc46e47b237","modified":1726066302706},{"_id":"source/_posts/net/jichang/jichang.md","hash":"4dec8de285eba4f0b5026c0b993fa8b6a4ddaf78","modified":1760584724708},{"_id":"source/img/machinelearning/fitting.png","hash":"89c0cf6301f0b42de93ac41e5bac33d99acfd530","modified":1737713765663},{"_id":"source/img/machinelearning/linear.png","hash":"4a54dbcc028e2385bd50bf8b022adde0a0d9fbcf","modified":1737713765664},{"_id":"source/img/machinelearning/random-forest.png","hash":"8cec4900c731097a7eed0bfe3efd839ee49ef836","modified":1737724341828},{"_id":"source/img/language/c-env-conf.png","hash":"09de801caef07f177c829e889464442f535b189c","modified":1737713765661},{"_id":"source/img/000001.png","hash":"ad86c3b72174364d462bdab1d09540bd79eb123c","modified":1725979378674},{"_id":"source/img/machinelearning/cros-valid.png","hash":"8f9b204b651f93c17ad0856daa628d6abe985d97","modified":1736768886866},{"_id":"source/img/yiyuan.png","hash":"817a89509a8ebcddff6b369979d53ecf44a30a9f","modified":1722998234119},{"_id":"source/img/peiqian.png","hash":"2f077f1fff014ee448cd58b57ff83901702e2d88","modified":1760585242601},{"_id":"themes/butterfly/LICENSE","hash":"c8bc7df08db9dd3b39c2c2259a163a36cf2f6808","modified":1722500710084},{"_id":"themes/butterfly/package.json","hash":"2b6fb6f62b9fa6a829311ffd532ae760fbd0a7db","modified":1722500710223},{"_id":"themes/butterfly/README_CN.md","hash":"9d729ee2ffc5e5f703ccfbfbbb7b286d59071125","modified":1722500710087},{"_id":"themes/butterfly/plugins.yml","hash":"7bb2c7350c0c57850aa30213cd0f26553a614702","modified":1722500710224},{"_id":"themes/butterfly/README.md","hash":"20a91bea7f7ada8b8195d2abff106a7ce21bba20","modified":1722500710085},{"_id":"themes/butterfly/.github/FUNDING.yml","hash":"3b572099a992e30267f5fe4cd3c582ff7ac9f083","modified":1722500710075},{"_id":"themes/butterfly/languages/default.yml","hash":"90e9e2f36dc51aa77eb7804ae048b4876035b12d","modified":1722500710090},{"_id":"themes/butterfly/_config.yml","hash":"c6fab3700a6502b5790ca20d20113020544ea048","modified":1722500710088},{"_id":"themes/butterfly/languages/zh-CN.yml","hash":"2dcc70a011b37890215ae0fd6d8f8c78aa8af6b0","modified":1722500710093},{"_id":"themes/butterfly/languages/en.yml","hash":"68127be0e6b44cfc5f31353d8b275c02939b3ff9","modified":1722500710092},{"_id":"themes/butterfly/layout/archive.pug","hash":"bc77220dfc269b8faad0930e1a4142ebf68165e5","modified":1722500710096},{"_id":"themes/butterfly/languages/zh-TW.yml","hash":"1392e7b8c678cdfb54f55523693e66abc7d80538","modified":1722500710094},{"_id":"themes/butterfly/layout/category.pug","hash":"bf979aec88d78b644fc5d31518f8679ad7625792","modified":1722500710097},{"_id":"themes/butterfly/layout/index.pug","hash":"648dcbdb3d145a710de81c909e000e8664d2ac9c","modified":1722500710218},{"_id":"themes/butterfly/layout/post.pug","hash":"fdbb508b5e6dec30fb8753c5a7fdd494410c4fc0","modified":1722500710220},{"_id":"themes/butterfly/layout/tag.pug","hash":"4bb5efc6dabdf1626685bf6771aaa1467155ae86","modified":1722500710222},{"_id":"themes/butterfly/.github/workflows/publish.yml","hash":"e320b40c051bae1549156cd5ea4a51383cf78598","modified":1722500710080},{"_id":"themes/butterfly/.github/workflows/stale.yml","hash":"4040c76547e270aaf184e9b219a44ca41bbb1b9f","modified":1722500710082},{"_id":"themes/butterfly/layout/page.pug","hash":"bf2d6c6d2d156777b55292e51be02b0b3acf0af8","modified":1722500710219},{"_id":"themes/butterfly/.github/ISSUE_TEMPLATE/bug_report.yml","hash":"eed9190301095b35081aa2658204cc3f15b9f5e1","modified":1722500710076},{"_id":"themes/butterfly/.github/ISSUE_TEMPLATE/config.yml","hash":"63ad2249ad09fb3fe21bd5ff9adefb304a7ab24a","modified":1722500710077},{"_id":"themes/butterfly/.github/ISSUE_TEMPLATE/feature_request.yml","hash":"6e0f9470b18bd37d4891282ac73d61676b040e8c","modified":1722500710079},{"_id":"themes/butterfly/layout/includes/404.pug","hash":"aace9ddff469de4226e47a52ede1c81e66d66d5c","modified":1722500710100},{"_id":"themes/butterfly/layout/includes/additional-js.pug","hash":"50eea5aa78cdeb6c72dd22f0aeabc407cc0f712e","modified":1722500710101},{"_id":"themes/butterfly/layout/includes/head.pug","hash":"ea8d4e8ac6af93cd268ba8f6ffcb80417bc2501e","modified":1722500710103},{"_id":"themes/butterfly/layout/includes/footer.pug","hash":"8715948b93e7508b84d913be1969b28c6b067b9b","modified":1722500710102},{"_id":"themes/butterfly/layout/includes/layout.pug","hash":"96df62e34661d8ca4a45267286127479e5178a79","modified":1722500710123},{"_id":"themes/butterfly/layout/includes/pagination.pug","hash":"c5c58714fb3cb839653e5c32e6094784c8662935","modified":1722500710135},{"_id":"themes/butterfly/layout/includes/rightside.pug","hash":"f448bf73103b88de4443e52d600e871cf3de3e32","modified":1722500710138},{"_id":"themes/butterfly/source/img/404.jpg","hash":"fb4489bc1d30c93d28f7332158c1c6c1416148de","modified":1722500710315},{"_id":"themes/butterfly/layout/includes/sidebar.pug","hash":"9277fead4c29dbe93976f078adaa26e8f9253da3","modified":1722500710140},{"_id":"themes/butterfly/source/img/favicon.png","hash":"3cf89864b4f6c9b532522a4d260a2e887971c92d","modified":1722500710317},{"_id":"themes/butterfly/source/img/avatar.jpg","hash":"cb0941101c6a6b8f762ce6ffc3c948641e7f642f","modified":1722500710316},{"_id":"themes/butterfly/source/img/friend_404.gif","hash":"8d2d0ebef70a8eb07329f57e645889b0e420fa48","modified":1722500710319},{"_id":"themes/butterfly/source/css/var.styl","hash":"950250f66faeb611a67540e0fa6cedbcf5a7a321","modified":1722500710313},{"_id":"themes/butterfly/source/css/index.styl","hash":"b13d96924a5534bff91d75566b196ac87b4fac22","modified":1722500710313},{"_id":"themes/butterfly/source/js/main.js","hash":"59cd756a94ecdf3ec7b18f50691a8a6305f7a65a","modified":1722500710321},{"_id":"themes/butterfly/source/js/tw_cn.js","hash":"d776c670e4076ad6049dbb64cdee7a734b51d37f","modified":1722500710326},{"_id":"themes/butterfly/scripts/events/cdn.js","hash":"7864ba45716c51aef8d8b04fd4bc212e0008ce3b","modified":1722500710226},{"_id":"themes/butterfly/scripts/events/404.js","hash":"f1d1c378356b776e9b2a8411e6dca88dc8c3245c","modified":1722500710226},{"_id":"themes/butterfly/source/js/utils.js","hash":"7b871fe0c4456660cff4c7b9cc4ed089adac2caf","modified":1722500710327},{"_id":"themes/butterfly/scripts/events/comment.js","hash":"95479790234c291b064d031577d71214cdd1d820","modified":1722500710229},{"_id":"themes/butterfly/scripts/events/init.js","hash":"ce68e84a9ccfcf91100befbaa9afc392a0cd93bb","modified":1722500710229},{"_id":"themes/butterfly/scripts/events/merge_config.js","hash":"b1dfc3c898b886eab1241b068fc27d7a26a3b7d2","modified":1722500710230},{"_id":"themes/butterfly/scripts/events/stylus.js","hash":"0a336dfe5ed08952fa0df1532421df38a74a20d6","modified":1722500710231},{"_id":"themes/butterfly/scripts/events/welcome.js","hash":"f59e10305fef59ea3e62a7395106c0927582879d","modified":1722500710232},{"_id":"themes/butterfly/scripts/helpers/aside_archives.js","hash":"4f712b4ea383b59a3122683db1d54c04a79ccc5d","modified":1722500710236},{"_id":"themes/butterfly/scripts/filters/post_lazyload.js","hash":"5ed2d7ef240c927fe1b7a7fb5bf9e55e2bfd55a5","modified":1722500710233},{"_id":"themes/butterfly/scripts/filters/random_cover.js","hash":"0df22d7dbfa766a65cb6032a1f003348f4307cfe","modified":1722500710234},{"_id":"themes/butterfly/scripts/helpers/findArchiveLength.js","hash":"b12895e0765d596494e5526d121de0dd5a7c23d3","modified":1722500710238},{"_id":"themes/butterfly/scripts/helpers/aside_categories.js","hash":"cdd992c8577d583c237b6aac9f5077d8200879b2","modified":1722500710237},{"_id":"themes/butterfly/scripts/helpers/inject_head_js.js","hash":"b55f71347d2ead097c7f98c0ec792b091433345c","modified":1722500710239},{"_id":"themes/butterfly/scripts/helpers/page.js","hash":"c74d6a9b8f71e69447f7847a5f5e81555d68b140","modified":1722500710240},{"_id":"themes/butterfly/scripts/helpers/related_post.js","hash":"76343ac8422c9c8539082e77eda6ffee4b877eb2","modified":1722500710241},{"_id":"themes/butterfly/scripts/helpers/series.js","hash":"17c0095bc8d612a268cdcab000b1742dc4c6f811","modified":1722500710243},{"_id":"themes/butterfly/scripts/tag/flink.js","hash":"3ba7677969ff01fab06fc6713455ddc6861f0024","modified":1722500710245},{"_id":"themes/butterfly/scripts/tag/button.js","hash":"164d5f1c2d1b4cb5a813a6fc574016743a53c019","modified":1722500710244},{"_id":"themes/butterfly/scripts/tag/gallery.js","hash":"7ec77b3093f5de67e7032f40a5b12f1389f6f6ff","modified":1722500710247},{"_id":"themes/butterfly/scripts/tag/inlineImg.js","hash":"c863d2732ce4bdc084f2d0db92f50f80328c1007","modified":1722500710248},{"_id":"themes/butterfly/scripts/tag/hide.js","hash":"5d08c3552f7d3c80a724ca628bff66321abe2e5a","modified":1722500710248},{"_id":"themes/butterfly/scripts/tag/label.js","hash":"b013dc0a3d57d2caa18b89263f23871da9ec456d","modified":1722500710248},{"_id":"themes/butterfly/scripts/tag/note.js","hash":"e68d8d21f3a86e3646907a3685550ee20e8d4a9f","modified":1722500710252},{"_id":"themes/butterfly/scripts/tag/mermaid.js","hash":"289f85847c58f0b2b7d98a68e370a2896edb8949","modified":1722500710250},{"_id":"themes/butterfly/scripts/tag/score.js","hash":"35d54adc92e717cc32e13515122b025fd1a98ea2","modified":1722500710252},{"_id":"themes/butterfly/scripts/tag/series.js","hash":"dc56e5182dd3813dc977c9bf8556dcc7615e467b","modified":1722500710252},{"_id":"themes/butterfly/scripts/tag/tabs.js","hash":"7c448886f230adb4f4a0208c88fff809abcb5637","modified":1722500710254},{"_id":"themes/butterfly/scripts/tag/timeline.js","hash":"e611074a5a7f489a8b04afac0a3f7f882ce26532","modified":1722500710255},{"_id":"themes/butterfly/layout/includes/head/Open_Graph.pug","hash":"c8dbdfe6145a0bc6f7691c9551be8169a2698f0a","modified":1722500710105},{"_id":"themes/butterfly/layout/includes/head/config_site.pug","hash":"bd5dd5452e28a4fe94c3241a758ec6f4fdb7a149","modified":1722500710109},{"_id":"themes/butterfly/layout/includes/head/analytics.pug","hash":"c7666a10448edd93f5ace37296051b7670495f1b","modified":1722500710106},{"_id":"themes/butterfly/layout/includes/head/config.pug","hash":"39e1ca0a54eb5fd3688a78737417a1aaa50914c9","modified":1722500710108},{"_id":"themes/butterfly/layout/includes/head/preconnect.pug","hash":"a7c929b90ae52b78b39b1728e3ab0e3db1cb7b9a","modified":1722500710111},{"_id":"themes/butterfly/layout/includes/head/google_adsense.pug","hash":"f29123e603cbbcc6ce277d4e8f600ba67498077c","modified":1722500710109},{"_id":"themes/butterfly/layout/includes/head/pwa.pug","hash":"6dc2c9b85df9ab4f5b554305339fd80a90a6cf43","modified":1722500710113},{"_id":"themes/butterfly/layout/includes/head/site_verification.pug","hash":"5168caadc4cf541f5d6676a9c5e8ae47a948f9ad","modified":1722500710114},{"_id":"themes/butterfly/layout/includes/header/index.pug","hash":"1bef867c799ba158c5417272fb137539951aa120","modified":1722500710117},{"_id":"themes/butterfly/layout/includes/header/nav.pug","hash":"962ee70a35e60a13c31eea47d16b9f98069fe417","modified":1722500710119},{"_id":"themes/butterfly/layout/includes/header/menu_item.pug","hash":"ca8bcd90ad9467819330bfe7c02b76322754bccf","modified":1722500710118},{"_id":"themes/butterfly/layout/includes/header/social.pug","hash":"7a641b5dd45b970e1dafd1433eb32ea149e55cf2","modified":1722500710121},{"_id":"themes/butterfly/layout/includes/loading/fullpage-loading.pug","hash":"766baca6ddce49d1724a02312387b292ff2d0bdc","modified":1722500710125},{"_id":"themes/butterfly/layout/includes/loading/index.pug","hash":"00ae419f527d8225a2dc03d4f977cec737248423","modified":1722500710126},{"_id":"themes/butterfly/layout/includes/loading/pace.pug","hash":"a6fde4835d6460ce7baf792fd5e1977fad73db25","modified":1722500710127},{"_id":"themes/butterfly/layout/includes/header/post-info.pug","hash":"cc99b2dc5c6b1f74391b0da609853ebc11de9610","modified":1722500710120},{"_id":"themes/butterfly/layout/includes/mixins/article-sort.pug","hash":"9155f01d4c644a2e19b2b13b2d3c6d5e34dd0abf","modified":1722500710128},{"_id":"themes/butterfly/layout/includes/mixins/post-ui.pug","hash":"90eb453b14f6b5c25bfd8d28aa67783603a1411d","modified":1722500710129},{"_id":"themes/butterfly/layout/includes/page/categories.pug","hash":"5276a8d2835e05bd535fedc9f593a0ce8c3e8437","modified":1722500710131},{"_id":"themes/butterfly/layout/includes/page/default-page.pug","hash":"e9459f122af7b733398578f9f0f8ab3c5e12a217","modified":1722500710131},{"_id":"themes/butterfly/layout/includes/page/flink.pug","hash":"e37681bc9c169d4220f26ecda2b3d5c02b6b9a0f","modified":1722500710133},{"_id":"themes/butterfly/layout/includes/page/tags.pug","hash":"12be059c536490af216a397e8f2a7abbf6d4610e","modified":1722500710134},{"_id":"themes/butterfly/layout/includes/post/reward.pug","hash":"912df10a053db3135968e92b6fd1a707ee94c968","modified":1722500710138},{"_id":"themes/butterfly/layout/includes/post/post-copyright.pug","hash":"0abad416b1974a17e5be7817931d5fe799180170","modified":1722500710136},{"_id":"themes/butterfly/layout/includes/third-party/effect.pug","hash":"43014bfc63583d3ee8808d526dd165848c0ed52f","modified":1722500710177},{"_id":"themes/butterfly/layout/includes/third-party/aplayer.pug","hash":"e939344fd389aeb11864ee697d5fd9b036d8325f","modified":1722500710146},{"_id":"themes/butterfly/layout/includes/third-party/prismjs.pug","hash":"08979afbfecb4476a5ae8e360947b92624d285b8","modified":1722500710194},{"_id":"themes/butterfly/layout/includes/third-party/pjax.pug","hash":"9b734d99963f3e7f562597dcf60485ccbf6e961c","modified":1722500710192},{"_id":"themes/butterfly/layout/includes/third-party/subtitle.pug","hash":"dfb5e16a7e7106bb20b2ac2d0df1251d0fc79609","modified":1722500710202},{"_id":"themes/butterfly/layout/includes/third-party/pangu.pug","hash":"f0898509da70388b5c532f19e762756d74080200","modified":1722500710191},{"_id":"themes/butterfly/layout/includes/widget/card_ad.pug","hash":"a8312b527493dabbadbb1280760168d3bc909a3b","modified":1722500710204},{"_id":"themes/butterfly/layout/includes/widget/card_archives.pug","hash":"73d33b6930e7944187a4b3403daf25d27077a2dd","modified":1722500710206},{"_id":"themes/butterfly/layout/includes/widget/card_announcement.pug","hash":"21e019bdc3b1e796bb00976bb29af2d51f873624","modified":1722500710205},{"_id":"themes/butterfly/layout/includes/widget/card_categories.pug","hash":"66e383b4ef374951eb87dd1bf4cdb7a667193fb5","modified":1722500710209},{"_id":"themes/butterfly/layout/includes/widget/card_author.pug","hash":"ab037bf5794638bd30da4cf7cf106e5d03b5f696","modified":1722500710207},{"_id":"themes/butterfly/layout/includes/widget/card_newest_comment.pug","hash":"8e22f53886a57a68286970d8af8b4c950fd4a1d7","modified":1722500710210},{"_id":"themes/butterfly/layout/includes/widget/card_post_series.pug","hash":"e0bb72fa0ce15964b11b8fe421cae3432394e35f","modified":1722500710210},{"_id":"themes/butterfly/layout/includes/widget/card_bottom_self.pug","hash":"1dba77d250eeebfb6e293d504352c7e9ea31980b","modified":1722500710208},{"_id":"themes/butterfly/layout/includes/widget/card_post_toc.pug","hash":"d48d77af1670bd568d784794408bf524a448bfcc","modified":1722500710211},{"_id":"themes/butterfly/layout/includes/widget/card_recent_post.pug","hash":"bb842d2aa6469d65bf06af1372f0a19a9e4ef44c","modified":1722500710214},{"_id":"themes/butterfly/layout/includes/widget/card_top_self.pug","hash":"7b5ae404a1205546b7de4be42291315cf918f2b3","modified":1722500710215},{"_id":"themes/butterfly/layout/includes/widget/card_tags.pug","hash":"842b772a387b576550fa127030e1c2e9bf65716d","modified":1722500710215},{"_id":"themes/butterfly/layout/includes/widget/index.pug","hash":"8df529f71e25f1c0a00e533de7944ed3d1ba7bd8","modified":1722500710217},{"_id":"themes/butterfly/layout/includes/widget/card_webinfo.pug","hash":"12185713f9ca08984fc74e3b69d8cd6828d23da8","modified":1722500710216},{"_id":"themes/butterfly/source/css/_global/function.styl","hash":"e920dae9ce00177922468db49240f5aca0af4f64","modified":1722500710258},{"_id":"themes/butterfly/source/css/_global/index.styl","hash":"0421da07907b3d98df64239e073b23fbb3f04149","modified":1722500710259},{"_id":"themes/butterfly/source/css/_highlight/highlight.styl","hash":"41054740cfbd1357138785464f6859681ca58493","modified":1722500710260},{"_id":"themes/butterfly/source/css/_highlight/theme.styl","hash":"3c178608406c31d768af355ef1d7326da37cc75f","modified":1722500710268},{"_id":"themes/butterfly/source/css/_layout/aside.styl","hash":"aae70ddd126b2e40158e45036abecbfa33cbfbba","modified":1722500710270},{"_id":"themes/butterfly/source/css/_layout/chat.styl","hash":"792a04d36de32f230ca3256ad87a90fe8392f333","modified":1722500710272},{"_id":"themes/butterfly/source/css/_layout/footer.styl","hash":"5e27f7842af82ff7498d4b59787ce9ca90fa9e6f","modified":1722500710275},{"_id":"themes/butterfly/source/css/_layout/head.styl","hash":"dd5d9a5631b682610ea699541b8246ceaa56fddb","modified":1723206391886},{"_id":"themes/butterfly/source/css/_layout/comments.styl","hash":"fbfce4d67cacd1df22fb73d89d008693f59d9d91","modified":1722500710273},{"_id":"themes/butterfly/source/css/_layout/loading.styl","hash":"f0b01bbf321c2c24fdccaee367dd9fd448031a72","modified":1722500710277},{"_id":"themes/butterfly/source/css/_layout/pagination.styl","hash":"bd099f7d3adef4b7edd24c0a25a07415b156e587","modified":1722500710278},{"_id":"themes/butterfly/source/css/_layout/post.styl","hash":"7ae27854a737a02eca89b0b92db94cb298fef59e","modified":1722500710280},{"_id":"themes/butterfly/source/css/_layout/relatedposts.styl","hash":"6dcf19c0933c8828a439f801b0f4b256447dec07","modified":1722500710281},{"_id":"themes/butterfly/source/css/_layout/reward.styl","hash":"c0b11a1a5f52e3a6af4e312a8134c93eda18a7dd","modified":1722500710281},{"_id":"themes/butterfly/source/css/_layout/sidebar.styl","hash":"80ee9d0bfe5d38aac1f0cdcea5fc88b71d310041","modified":1722500710283},{"_id":"themes/butterfly/source/css/_layout/third-party.styl","hash":"15ea7564b2e3bf46bc91fb6e49c94d057b37caaf","modified":1722500710283},{"_id":"themes/butterfly/source/css/_layout/rightside.styl","hash":"0322237e762db401d7b4aa33168d0b9334a9ec26","modified":1722500710282},{"_id":"themes/butterfly/source/css/_page/404.styl","hash":"a7223a8fcc4fa7b81e552c9a2554be7df9de312e","modified":1722500710289},{"_id":"themes/butterfly/source/css/_page/archives.styl","hash":"5dd1ba997741d02894ff846eda939ad8051c0bb2","modified":1722500710290},{"_id":"themes/butterfly/source/css/_page/categories.styl","hash":"68bc8cbea25dbb3cdc170f09f9b43ce130547717","modified":1722500710292},{"_id":"themes/butterfly/source/css/_page/flink.styl","hash":"ecc2b2e28c179eb9406fc2c6f00e141078249cdd","modified":1722500710294},{"_id":"themes/butterfly/source/css/_page/homepage.styl","hash":"a977cd8161ef4d6ddd5293e81403519076657430","modified":1722500710295},{"_id":"themes/butterfly/source/css/_page/tags.styl","hash":"9e35f91847773b915c74a78b8aa66c7bdb950ad0","modified":1722500710296},{"_id":"themes/butterfly/source/css/_page/common.styl","hash":"df7a51fcabbadab5aa31770e3202a47c9599bbb7","modified":1722500710293},{"_id":"themes/butterfly/source/css/_search/algolia.styl","hash":"37db99299af380e9111dce2a78a5049b301b13e0","modified":1722500710298},{"_id":"themes/butterfly/source/css/_search/index.styl","hash":"0b23010154e19f37f0c4af0110f9f834d6d41a13","modified":1722500710298},{"_id":"themes/butterfly/source/css/_search/local-search.styl","hash":"8a53d7ba5ca2f5eb4124b684e7845b648583f658","modified":1722500710301},{"_id":"themes/butterfly/source/css/_mode/darkmode.styl","hash":"dbc855795a881f8c805bf5c9c5c4d5d542a648ec","modified":1722500710286},{"_id":"themes/butterfly/source/css/_mode/readmode.styl","hash":"a22fd15048d21452f0015d0765d295d730203308","modified":1723078297080},{"_id":"themes/butterfly/source/css/_tags/gallery.styl","hash":"3e9355b76f87e2ee90f652855282b37ab5ae0b3e","modified":1722500710304},{"_id":"themes/butterfly/source/css/_tags/hexo.styl","hash":"985b183db7b7bfd8f9bdb60494549fb7f850348b","modified":1722500710305},{"_id":"themes/butterfly/source/css/_tags/hide.styl","hash":"b7cf7753479fcf2fe07287ffdb0e568adbba4c18","modified":1722500710306},{"_id":"themes/butterfly/source/css/_tags/inlineImg.styl","hash":"5a873d01fabebcf7ddf7a6b1c2e2e5e2714097f4","modified":1722500710307},{"_id":"themes/butterfly/source/css/_tags/label.styl","hash":"2f83bd145b870d80d4b18b0ac603235229a5694e","modified":1722500710307},{"_id":"themes/butterfly/source/css/_tags/note.styl","hash":"4929382bd60788d34752a66e2fe764ef797a72a0","modified":1722500710308},{"_id":"themes/butterfly/source/css/_tags/button.styl","hash":"62da1de0d5b8453fcecbfacddb16985265638ba5","modified":1722500710302},{"_id":"themes/butterfly/source/css/_tags/tabs.styl","hash":"353b95f9a6c2c1e777d978118cb61f909ccbf89c","modified":1722500710309},{"_id":"themes/butterfly/source/css/_tags/timeline.styl","hash":"07ea7134db7a66c87658116f089fb1a2a6906563","modified":1722500710310},{"_id":"themes/butterfly/source/css/_third-party/normalize.min.css","hash":"8549829fb7d3c21cd9e119884962e8c463a4a267","modified":1722500710312},{"_id":"themes/butterfly/source/js/search/algolia.js","hash":"a7c2fe73cc05ad3525909b86ad0ede1a9f2d3b48","modified":1722500710323},{"_id":"themes/butterfly/source/js/search/local-search.js","hash":"ab3904451ae1d78903424b8b2ef815c8571e1749","modified":1722500710325},{"_id":"themes/butterfly/layout/includes/third-party/abcjs/abcjs.pug","hash":"8f95aca305b56ccd7c8c7367b03d26db816ebd5f","modified":1722500710143},{"_id":"themes/butterfly/layout/includes/third-party/abcjs/index.pug","hash":"58f37823f6cd9a194fb50f7ca7c2233e49939034","modified":1722500710144},{"_id":"themes/butterfly/layout/includes/third-party/card-post-count/fb.pug","hash":"4b98145d6584d586cabf033493282afc72ae816a","modified":1722500710149},{"_id":"themes/butterfly/layout/includes/third-party/card-post-count/disqus.pug","hash":"d6fff5a7f84c8b09f282f9ddc0020a68a8aac9ea","modified":1722500710148},{"_id":"themes/butterfly/layout/includes/third-party/card-post-count/index.pug","hash":"846cabae287ae31b3bbfac3da022475713dd5ecc","modified":1722500710151},{"_id":"themes/butterfly/layout/includes/third-party/card-post-count/artalk.pug","hash":"b03ee8625149191f9d5d057bbc9824b68d8dd0c4","modified":1722500710147},{"_id":"themes/butterfly/layout/includes/third-party/card-post-count/remark42.pug","hash":"716dc463fe4ef5112e7018ed60804125fdfa5cad","modified":1722500710151},{"_id":"themes/butterfly/layout/includes/third-party/card-post-count/twikoo.pug","hash":"7e233f872aea6fd6beccdc9efd86b1bf9ec9f12d","modified":1722500710152},{"_id":"themes/butterfly/layout/includes/third-party/card-post-count/waline.pug","hash":"fd2320ee25507bb8ef49f932c2d170586b44ea4d","modified":1722500710154},{"_id":"themes/butterfly/layout/includes/third-party/card-post-count/valine.pug","hash":"cd4fc9c5a61608a5dedf645c1295430a1623040f","modified":1722500710153},{"_id":"themes/butterfly/layout/includes/third-party/comments/artalk.pug","hash":"5373b822aa72ddb96f2f1f4baf6c058b40d705d6","modified":1722500710163},{"_id":"themes/butterfly/layout/includes/third-party/comments/disqus.pug","hash":"364d1fd655baca9132038ef1e312abde2c0bc7de","modified":1722500710164},{"_id":"themes/butterfly/layout/includes/third-party/comments/disqusjs.pug","hash":"f78c9c20c86d58c7cf099f6f8d6097103d7d43e5","modified":1722500710165},{"_id":"themes/butterfly/layout/includes/third-party/comments/facebook_comments.pug","hash":"11f5dca1432e59f22955aaf4ac3e9de6b286d887","modified":1722500710166},{"_id":"themes/butterfly/layout/includes/third-party/comments/gitalk.pug","hash":"1c86c8fc1a28514a02a1f6a25ca9ec05eb3955b7","modified":1722500710168},{"_id":"themes/butterfly/layout/includes/third-party/comments/index.pug","hash":"db6713d2b90eb8183f86ac92c26761a8501c0ddb","modified":1722500710169},{"_id":"themes/butterfly/layout/includes/third-party/comments/js.pug","hash":"3abbaaa4ea575c45b3cebffd40bad1acc6ffce84","modified":1722500710170},{"_id":"themes/butterfly/layout/includes/third-party/comments/livere.pug","hash":"09c2ef4bc6d005f96dfa48b1d9af1ec095c5266d","modified":1722500710171},{"_id":"themes/butterfly/layout/includes/third-party/comments/remark42.pug","hash":"7f450664e6323a076ae59c393b0f22167cfa82e5","modified":1722500710172},{"_id":"themes/butterfly/layout/includes/third-party/comments/giscus.pug","hash":"1eab7ca1cb16c6786f9c3ca0efef8cc15e444ab4","modified":1722500710167},{"_id":"themes/butterfly/layout/includes/third-party/comments/twikoo.pug","hash":"9942a903227350960c1d0716e59516ae79ac24a8","modified":1722500710173},{"_id":"themes/butterfly/layout/includes/third-party/comments/utterances.pug","hash":"b65a42167df5fb07e2a63f312a58c321d3112a90","modified":1722500710174},{"_id":"themes/butterfly/layout/includes/third-party/comments/waline.pug","hash":"efb72547fc2d470a124f5636391128dc59627498","modified":1722500710176},{"_id":"themes/butterfly/layout/includes/third-party/comments/valine.pug","hash":"4ed7c74087e81c6fcaf4fca7dced58b4e19f4cb1","modified":1722500710175},{"_id":"themes/butterfly/layout/includes/third-party/newest-comments/artalk.pug","hash":"2e36fac4791e99844cd56676898be0dbf5eb4e99","modified":1722500710184},{"_id":"themes/butterfly/layout/includes/third-party/newest-comments/disqus-comment.pug","hash":"d8898e427acd91ceb97d6a7ee3acb011ca86b9fc","modified":1722500710184},{"_id":"themes/butterfly/layout/includes/third-party/newest-comments/index.pug","hash":"f8b65460c399973090c1fb7ab81e3708c252e7cc","modified":1722500710187},{"_id":"themes/butterfly/layout/includes/third-party/newest-comments/github-issues.pug","hash":"fc8814bd016d039874ec2fc24dcb78587892e2a6","modified":1722500710186},{"_id":"themes/butterfly/layout/includes/third-party/newest-comments/twikoo-comment.pug","hash":"17520a86de12ae585289463c066d3ac91b78a2ff","modified":1722500710188},{"_id":"themes/butterfly/layout/includes/third-party/newest-comments/remark42.pug","hash":"a4e52188b6effeee1df2a01dcbf4105de76a61a8","modified":1722500710188},{"_id":"themes/butterfly/layout/includes/third-party/newest-comments/waline.pug","hash":"0544d91c0bc9e26e0fe1b5ff490f4a8540ed1ee1","modified":1722500710191},{"_id":"themes/butterfly/layout/includes/third-party/math/index.pug","hash":"2afa4c21dd19890f47fb568cfb0d90efb676a253","modified":1722500710179},{"_id":"themes/butterfly/layout/includes/third-party/newest-comments/valine.pug","hash":"ecfff55b2c7f6d87ce4d5028fdf9f8c0bf155c73","modified":1722500710189},{"_id":"themes/butterfly/layout/includes/third-party/math/katex.pug","hash":"f0d3eddd2bed68e5517274b3530bfe0fa5057d8e","modified":1722500710180},{"_id":"themes/butterfly/layout/includes/third-party/math/mathjax.pug","hash":"bb944185f4bb9f9a9b9d70ee215f66ccd6d4c6cf","modified":1722500710181},{"_id":"themes/butterfly/layout/includes/third-party/math/mermaid.pug","hash":"c682e4d61017fb0dd2e837bfcc242371f1a13364","modified":1722500710182},{"_id":"themes/butterfly/layout/includes/third-party/chat/crisp.pug","hash":"09d2ab2570b67e6f09244a898ccab5567cb82ace","modified":1722500710156},{"_id":"themes/butterfly/layout/includes/third-party/chat/index.pug","hash":"1157118db9f5d7c0c5a0fc7c346f6e934ca00d52","modified":1722500710159},{"_id":"themes/butterfly/layout/includes/third-party/chat/chatra.pug","hash":"08a85e52fc800d3562df869e5e2613313e76fce6","modified":1722500710156},{"_id":"themes/butterfly/layout/includes/third-party/chat/daovoice.pug","hash":"0d960849d5b05d27ec87627b983ca35f2411b9e8","modified":1722500710158},{"_id":"themes/butterfly/layout/includes/third-party/chat/tidio.pug","hash":"6d40b521eec4136f6742c548a4445ed593470b1b","modified":1722500710161},{"_id":"themes/butterfly/layout/includes/third-party/chat/messenger.pug","hash":"799da8f3015e6fe440681b21644bcb3810a5518c","modified":1722500710160},{"_id":"themes/butterfly/layout/includes/third-party/share/addtoany.pug","hash":"1f02a26730e5f36cc2dfec7ff4d5c93a099ed5ba","modified":1722500710199},{"_id":"themes/butterfly/layout/includes/third-party/share/index.pug","hash":"c16ee69b5ca8db016db0508d014ae0867c4ce929","modified":1722500710201},{"_id":"themes/butterfly/layout/includes/third-party/search/index.pug","hash":"3adcf28a8d205ea3ee19828eda0e668702fac07a","modified":1722500710197},{"_id":"themes/butterfly/layout/includes/third-party/share/share-js.pug","hash":"8106bd031586f075a994956ee4438eb13be25d7b","modified":1722500710202},{"_id":"themes/butterfly/layout/includes/third-party/search/local-search.pug","hash":"420a86e73d0d748ac234fd00d06d9e433ca5e3f2","modified":1722500710198},{"_id":"themes/butterfly/layout/includes/third-party/search/docsearch.pug","hash":"52a06a2e039f44383085333cac69f3f4e7d0ad3a","modified":1722500710196},{"_id":"themes/butterfly/source/css/_highlight/prismjs/diff.styl","hash":"1309292f1c8c53d96cd7333507b106bcc24ca8fc","modified":1722500710265},{"_id":"themes/butterfly/source/css/_highlight/prismjs/index.styl","hash":"01ff9e77eb1bd454bec65a6ff5972c8e219bc708","modified":1722500710267},{"_id":"themes/butterfly/source/css/_highlight/prismjs/line-number.styl","hash":"7c9cc43e1d2577f7151039d58e603c30860fd281","modified":1722500710267},{"_id":"themes/butterfly/source/css/_highlight/highlight/diff.styl","hash":"6e77f1ca0cfb0db6b028f5c0238780e66d344f3d","modified":1722500710263},{"_id":"themes/butterfly/source/css/_highlight/highlight/index.styl","hash":"fc702a4614d0562a381907b083f71ba63d301d86","modified":1722500710264},{"_id":"themes/butterfly/layout/includes/third-party/search/algolia.pug","hash":"90338ac4cd114d324fe1caaaeea8be9ca05d6a46","modified":1722500710195},{"_id":"source/img/machinelearning/decision-tree.png","hash":"85b255c6cfb26c6e843c31adf2b1e4d0fee064c6","modified":1739067934683},{"_id":"source/img/dingyue.png","hash":"c6afcd1124d84f07caeefcb895be3f3a5b301678","modified":1723001642809},{"_id":"source/img/machinelearning/ensemble-learning.png","hash":"ea3de5b1ea8def17a040d719598508bb9ccfd5f1","modified":1739067570823},{"_id":"source/img/site01.jpg","hash":"d93084432feb123fd5d781210c3a2c4db43c10ec","modified":1722524985872},{"_id":"public/search.xml","hash":"d66709441d3d438097d920d70f151f3cc1b44574","modified":1760585270464},{"_id":"public/categories/index.html","hash":"74e91e28f95ae8c7de23e9cfac224f057e90ba69","modified":1760585270464},{"_id":"public/about/index.html","hash":"8c0032de82fc592931662bcf556fe999eaca6910","modified":1760585270464},{"_id":"public/link/index.html","hash":"435c7c9dd3d155a2e5740acf01971c0583142837","modified":1760585270464},{"_id":"public/movies/index.html","hash":"fe75a4733f80b4ecc54c71ccfba4cfff6925bf7b","modified":1760585270464},{"_id":"public/tags/index.html","hash":"a53295872a8e0a60e17dd50db9f91bded5f2ba49","modified":1760585270464},{"_id":"public/music/index.html","hash":"80810bdf33abf56f09c4e022908ee1fb0c235842","modified":1760585270464},{"_id":"public/posts/8816.html","hash":"81e5b6456b318a85958ff8e1a6dc33e4868fa11d","modified":1760585270464},{"_id":"public/posts/95.html","hash":"fd467bef2f5dab16d418dbe73f88a31cb08936aa","modified":1760585270464},{"_id":"public/posts/60504.html","hash":"c469bb851f5779346915bef0086cea5438d9c125","modified":1760585270464},{"_id":"public/posts/52662.html","hash":"941fe8914f7dfc47920f32f2cab3a9792bb95f1a","modified":1760585270464},{"_id":"public/posts/12462.html","hash":"3455530d8a256d8f36cd99332f6156e164758304","modified":1760585270464},{"_id":"public/posts/29139.html","hash":"ee62bb01dfb57051794357948023b30aa4be1383","modified":1760585270464},{"_id":"public/posts/61252.html","hash":"dba07b2494cd420554ee497c0ab255b82da5aa9c","modified":1760585270464},{"_id":"public/posts/61253.html","hash":"fd964e3ca95d530175d4f95e4aa28b1cc9e56c23","modified":1760585270464},{"_id":"public/posts/61251.html","hash":"a378b458e731ed8d7ccaf951b45e58d40f2ce082","modified":1760585270464},{"_id":"public/posts/34849.html","hash":"7747fa04441c52f10144282a739c2440cbba93ed","modified":1760585270464},{"_id":"public/posts/16107.html","hash":"95151709ee0e384aa3da70fb13c7ca675f81e74a","modified":1760585270464},{"_id":"public/posts/58638.html","hash":"d6ad4c8c7bbe40ffe8a9dd4205d3092589121fc8","modified":1760585270464},{"_id":"public/posts/41168.html","hash":"e47c1fb1a5bef0262a5975020b9b8d9399c0a36a","modified":1760585270464},{"_id":"public/posts/14011.html","hash":"610deb97494b7ad374c5cc049fe1397bb1340645","modified":1760585270464},{"_id":"public/posts/33957.html","hash":"acd4d273781a13af60d45440c460d4a6832059c1","modified":1760585270464},{"_id":"public/posts/47807.html","hash":"46dfb22ed5c2fd3d9f24529c91402a04440cb34e","modified":1760585270464},{"_id":"public/posts/58817.html","hash":"d52f032887950db93125ae72d50f659e950c7ee9","modified":1760585270464},{"_id":"public/posts/1441.html","hash":"8edd9830666f6ddc77a19d650212821a5be15341","modified":1760585270464},{"_id":"public/categories/machinelearning/index.html","hash":"cda44976b43f36bda90eac5aebdd8cbd03658ed1","modified":1760585270464},{"_id":"public/categories/古文观止/index.html","hash":"920dc8a46ce2b75514c081e061037ce7c2e09f3d","modified":1760585270464},{"_id":"public/archives/index.html","hash":"b71f61f6f1ad0f5c76bd9bba2b3ea74b5e14512d","modified":1760585270464},{"_id":"public/archives/page/2/index.html","hash":"60e2a23b6a46f387bf0ca4d3ff191efb0e81fe47","modified":1760585270464},{"_id":"public/archives/2024/index.html","hash":"fc06503cd10a56d49178db55fe6bd6d9c18527ec","modified":1760585270464},{"_id":"public/archives/2024/page/2/index.html","hash":"111fb7dcd95b95569bd9cf9c87edf6988f7016a3","modified":1760585270464},{"_id":"public/archives/2024/08/index.html","hash":"8aa564857ac3eb01a9e2c1c98f6a3210db4cad28","modified":1760585270464},{"_id":"public/archives/2024/09/index.html","hash":"30e8a9f3c5b2bf1556e6915b37a4230916710a41","modified":1760585270464},{"_id":"public/index.html","hash":"e1c05b88223046835d312648aade1938c0044bc7","modified":1760585270464},{"_id":"public/archives/2025/index.html","hash":"3f317554d59d1835ea8bc06650675d789f17fcf3","modified":1760585270464},{"_id":"public/archives/2025/01/index.html","hash":"e7fa9ae136ecac1af95fff3b9dce8a69c65b5ad2","modified":1760585270464},{"_id":"public/page/2/index.html","hash":"e1cb6613018b3a33b1ededc0b230dfb25739a42d","modified":1760585270464},{"_id":"public/tags/C-C/index.html","hash":"3e26c2e6f6764d1e68deeaf5d9de5d7d1586a198","modified":1760585270464},{"_id":"public/tags/decisiontree/index.html","hash":"d313122b1e2e3efb62bf968343df988470f9f275","modified":1760585270464},{"_id":"public/tags/ensemble-learning/index.html","hash":"34d25e92e72ea53da0bf49c8480a27fcaa861f6e","modified":1760585270464},{"_id":"public/tags/KNN/index.html","hash":"aa5b703f6e3ce52f7467b06bd1ceb344f39bfab4","modified":1760585270464},{"_id":"public/tags/linear-regression/index.html","hash":"a9adf62d0a97ae9d56c6e195a4b139b7f86a0f4b","modified":1760585270464},{"_id":"public/tags/logistic-regression/index.html","hash":"4739334ca1ad15dd99e60adb2aa6f46ec5df3c84","modified":1760585270464},{"_id":"public/tags/uniapp/index.html","hash":"1ce07c663a19979720e05bdbafca6def810c7b5e","modified":1760585270464},{"_id":"public/tags/网络代理/index.html","hash":"67789edc463708d0995418830ddabbf68f018055","modified":1760585270464},{"_id":"public/tags/古文观止/index.html","hash":"8db5a1e48312dddafece48c152a24c6047629b29","modified":1760585270464},{"_id":"public/img/404.jpg","hash":"fb4489bc1d30c93d28f7332158c1c6c1416148de","modified":1760585270464},{"_id":"public/img/avatar.jpg","hash":"cb0941101c6a6b8f762ce6ffc3c948641e7f642f","modified":1760585270464},{"_id":"public/img/favicon.png","hash":"3cf89864b4f6c9b532522a4d260a2e887971c92d","modified":1760585270464},{"_id":"public/img/friend_404.gif","hash":"8d2d0ebef70a8eb07329f57e645889b0e420fa48","modified":1760585270464},{"_id":"public/img/machinelearning/knn-01.png","hash":"2a931981d1d5f72d6b42b39b8ef313569eab853e","modified":1760585270464},{"_id":"public/img/machinelearning/kfold-skfold.png","hash":"ab841f5412b7ea773468a3facff643df68a88c01","modified":1760585270464},{"_id":"public/img/machinelearning/over_random_sampling.png","hash":"11457713572d2b37e7dc481fdaa9776fc3d68598","modified":1760585270464},{"_id":"public/img/machinelearning/ott.png","hash":"949863066d4deff80e31db94b3de808bd5be2df7","modified":1760585270464},{"_id":"public/img/machinelearning/over_smote_sampling.png","hash":"27cf8baad693e07f4a1134d675a82dfff06f7d6c","modified":1760585270464},{"_id":"public/img/machinelearning/under_sampling.png","hash":"02fd5e16a793c56a807efdbcd030a6be4e17bd2c","modified":1760585270464},{"_id":"public/img/machinelearning/fitting.png","hash":"89c0cf6301f0b42de93ac41e5bac33d99acfd530","modified":1760585270464},{"_id":"public/img/machinelearning/linear.png","hash":"4a54dbcc028e2385bd50bf8b022adde0a0d9fbcf","modified":1760585270464},{"_id":"public/img/language/c-env-conf.png","hash":"09de801caef07f177c829e889464442f535b189c","modified":1760585270464},{"_id":"public/img/machinelearning/random-forest.png","hash":"8cec4900c731097a7eed0bfe3efd839ee49ef836","modified":1760585270464},{"_id":"public/css/index.css","hash":"a86592daf1fcadb01092b449a0eb5100fc2351fb","modified":1760585270464},{"_id":"public/js/tw_cn.js","hash":"f8d2e3f31468991a7f5171cbfdb157dfb86d3372","modified":1760585270464},{"_id":"public/css/var.css","hash":"da39a3ee5e6b4b0d3255bfef95601890afd80709","modified":1760585270464},{"_id":"public/js/search/algolia.js","hash":"108988d046da9a4716148df43b3975217c8ceaae","modified":1760585270464},{"_id":"public/js/main.js","hash":"0dac585446445e0c419b86eec5580bc9b0657dc6","modified":1760585270464},{"_id":"public/js/utils.js","hash":"8e6b48d294e7aeaba8ff6348c43b2271cf865547","modified":1760585270464},{"_id":"public/js/search/local-search.js","hash":"e1f60ebac53a3f596fd0a4769b4f9275c48c6542","modified":1760585270464},{"_id":"public/img/000001.png","hash":"ad86c3b72174364d462bdab1d09540bd79eb123c","modified":1760585270464},{"_id":"public/img/machinelearning/cros-valid.png","hash":"8f9b204b651f93c17ad0856daa628d6abe985d97","modified":1760585270464},{"_id":"public/img/yiyuan.png","hash":"817a89509a8ebcddff6b369979d53ecf44a30a9f","modified":1760585270464},{"_id":"public/img/peiqian.png","hash":"2f077f1fff014ee448cd58b57ff83901702e2d88","modified":1760585270464},{"_id":"public/img/machinelearning/decision-tree.png","hash":"85b255c6cfb26c6e843c31adf2b1e4d0fee064c6","modified":1760585270464},{"_id":"public/img/dingyue.png","hash":"c6afcd1124d84f07caeefcb895be3f3a5b301678","modified":1760585270464},{"_id":"public/img/machinelearning/ensemble-learning.png","hash":"ea3de5b1ea8def17a040d719598508bb9ccfd5f1","modified":1760585270464},{"_id":"public/img/site01.jpg","hash":"d93084432feb123fd5d781210c3a2c4db43c10ec","modified":1760585270464}],"Category":[{"name":"machinelearning","_id":"cmgsv0jnj000e3wahedhm0vqs"},{"name":"古文观止","_id":"cmgsv0jns001c3wahdvavch4e"}],"Data":[{"_id":"link","data":[{"class_name":"友情链接","class_desc":"那些人,那些事","link_list":[{"name":"Hexo","link":"https://hexo.io/zh-cn/","avatar":"https://d33wubrfki0l68.cloudfront.net/6657ba50e702d84afb32fe846bed54fba1a77add/827ae/logo.svg","descr":"快速、简单且強大的文档框架"}]},{"class_name":"网站","class_desc":"值得推荐的网站","link_list":[{"name":"Youtube","link":"https://www.youtube.com/","avatar":"https://i.loli.net/2020/05/14/9ZkGg8v3azHJfM1.png","descr":"视频网站"},{"name":"Weibo","link":"https://www.weibo.com/","avatar":"https://i.loli.net/2020/05/14/TLJBum386vcnI1P.png","descr":"中国最大社交分享平台"},{"name":"Twitter","link":"https://twitter.com/","avatar":"https://i.loli.net/2020/05/14/5VyHPQqR6LWF39a.png","descr":"社交分享平台"}]}]}],"Page":[{"title":"categories","date":"2024-07-31T00:33:49.000Z","aside":false,"top_img":false,"type":"categories","_content":"\n### category","source":"categories/index.md","raw":"---\ntitle: categories\ndate: 2024-07-31 08:33:49\naside: false\ntop_img: false\ntype: \"categories\"\n---\n\n### category","updated":"2024-07-31T06:37:23.090Z","path":"categories/index.html","comments":1,"layout":"page","_id":"cmgsv0jn300003wah63uv9mi5","content":"

category

","cover":false,"excerpt":"","more":"

category

"},{"title":"About me","date":"2024-08-10T02:35:41.000Z","_content":"\n落花飞舞,翩若惊鸿。\n","source":"about/index.md","raw":"---\ntitle: About me\ndate: 2024-08-10 10:35:41\n---\n\n落花飞舞,翩若惊鸿。\n","updated":"2024-08-10T02:38:14.626Z","path":"about/index.html","comments":1,"layout":"page","_id":"cmgsv0jn800023wah7660521g","content":"

落花飞舞,翩若惊鸿。

\n","cover":false,"excerpt":"","more":"

落花飞舞,翩若惊鸿。

\n"},{"title":"link","date":"2024-08-10T02:42:35.000Z","type":"link","_content":"","source":"link/index.md","raw":"---\ntitle: link\ndate: 2024-08-10 10:42:35\ntype: \"link\"\n---\n","updated":"2024-08-10T02:42:48.927Z","path":"link/index.html","comments":1,"layout":"page","_id":"cmgsv0jna00043wah4j1ldviz","content":"","cover":false,"excerpt":"","more":""},{"title":"Movies","date":"2024-08-10T02:40:33.000Z","_content":"","source":"movies/index.md","raw":"---\ntitle: Movies\ndate: 2024-08-10 10:40:33\n---\n","updated":"2024-08-10T02:40:33.715Z","path":"movies/index.html","comments":1,"layout":"page","_id":"cmgsv0jnc00063wahg3xv74kz","content":"","cover":false,"excerpt":"","more":""},{"title":"tags","date":"2024-07-31T00:32:38.000Z","type":"tags","comments":0,"top_img":false,"_content":"","source":"tags/index.md","raw":"---\ntitle: tags\ndate: 2024-07-31 08:32:38\ntype: \"tags\"\ncomments: false\ntop_img: false\n---\n","updated":"2024-07-31T00:33:00.076Z","path":"tags/index.html","layout":"page","_id":"cmgsv0jnd00083wahgwp9h04f","content":"","cover":false,"excerpt":"","more":""},{"title":"Music","date":"2024-08-10T02:40:19.000Z","_content":"","source":"music/index.md","raw":"---\ntitle: Music\ndate: 2024-08-10 10:40:19\n---\n","updated":"2024-08-10T02:40:19.897Z","path":"music/index.html","comments":1,"layout":"page","_id":"cmgsv0jnf000a3wahankg5puk","content":"","cover":false,"excerpt":"","more":""}],"Post":[{"title":"page","abbrlink":1441,"date":"2024-08-01T01:00:10.000Z","_content":"\n- [deploy](./deploy)\n- ","source":"_posts/frontend/frontend.md","raw":"---\ntitle: page\nabbrlink: 1441\ndate: 2024-08-01 09:00:10\ntags:\n---\n\n- [deploy](./deploy)\n- ","slug":"frontend/frontend","published":1,"updated":"2024-08-09T12:21:50.023Z","comments":1,"layout":"post","photos":[],"_id":"cmgsv0jn500013wah341m9ld5","content":"\n","cover":false,"excerpt":"","more":"\n"},{"title":"Hello World","abbrlink":16107,"_content":"Welcome to [Hexo](https://hexo.io/)! This is your very first post. Check [documentation](https://hexo.io/docs/) for more info. If you get any problems when using Hexo, you can find the answer in [troubleshooting](https://hexo.io/docs/troubleshooting.html) or you can ask me on [GitHub](https://github.com/hexojs/hexo/issues).\n\n## Quick Start\n\n### Create a new post\n\n``` bash\n$ hexo new \"My New Post\"\n```\n\nMore info: [Writing](https://hexo.io/docs/writing.html)\n\n### Run server\n\n``` bash\n$ hexo server\n```\n\nMore info: [Server](https://hexo.io/docs/server.html)\n\n### Generate static files\n\n``` bash\n$ hexo generate\n```\n\nMore info: [Generating](https://hexo.io/docs/generating.html)\n\n### Deploy to remote sites\n\n``` bash\n$ hexo deploy\n```\n\nMore info: [Deployment](https://hexo.io/docs/one-command-deployment.html)\n","source":"_posts/hello-world.md","raw":"---\ntitle: Hello World\nabbrlink: 16107\n---\nWelcome to [Hexo](https://hexo.io/)! This is your very first post. Check [documentation](https://hexo.io/docs/) for more info. If you get any problems when using Hexo, you can find the answer in [troubleshooting](https://hexo.io/docs/troubleshooting.html) or you can ask me on [GitHub](https://github.com/hexojs/hexo/issues).\n\n## Quick Start\n\n### Create a new post\n\n``` bash\n$ hexo new \"My New Post\"\n```\n\nMore info: [Writing](https://hexo.io/docs/writing.html)\n\n### Run server\n\n``` bash\n$ hexo server\n```\n\nMore info: [Server](https://hexo.io/docs/server.html)\n\n### Generate static files\n\n``` bash\n$ hexo generate\n```\n\nMore info: [Generating](https://hexo.io/docs/generating.html)\n\n### Deploy to remote sites\n\n``` bash\n$ hexo deploy\n```\n\nMore info: [Deployment](https://hexo.io/docs/one-command-deployment.html)\n","slug":"hello-world","published":1,"date":"2024-09-11T00:01:10.419Z","updated":"2024-08-09T12:21:50.026Z","comments":1,"layout":"post","photos":[],"_id":"cmgsv0jn900033wah0hqm0628","content":"

Welcome to Hexo! This is your very first post. Check documentation for more info. If you get any problems when using Hexo, you can find the answer in troubleshooting or you can ask me on GitHub.

\n

Quick Start

Create a new post

1
$ hexo new "My New Post"
\n\n

More info: Writing

\n

Run server

1
$ hexo server
\n\n

More info: Server

\n

Generate static files

1
$ hexo generate
\n\n

More info: Generating

\n

Deploy to remote sites

1
$ hexo deploy
\n\n

More info: Deployment

\n","cover":false,"excerpt":"","more":"

Welcome to Hexo! This is your very first post. Check documentation for more info. If you get any problems when using Hexo, you can find the answer in troubleshooting or you can ask me on GitHub.

\n

Quick Start

Create a new post

1
$ hexo new "My New Post"
\n\n

More info: Writing

\n

Run server

1
$ hexo server
\n\n

More info: Server

\n

Generate static files

1
$ hexo generate
\n\n

More info: Generating

\n

Deploy to remote sites

1
$ hexo deploy
\n\n

More info: Deployment

\n"},{"title":"网络相关","abbrlink":41168,"date":"2024-08-07T02:06:08.000Z","_content":"\n","source":"_posts/net/index.md","raw":"---\ntitle: 网络相关\nabbrlink: 41168\ndate: 2024-08-07 10:06:08\ntags:\n---\n\n","slug":"net/index","published":1,"updated":"2024-08-09T12:21:50.028Z","comments":1,"layout":"post","photos":[],"_id":"cmgsv0jnb00053wahhn6qai8o","content":"","cover":false,"excerpt":"","more":""},{"title":"script","abbrlink":34849,"date":"2024-08-17T03:09:24.000Z","_content":"\n### 查看CPU、内存使用率\n```bash\n#!/bin/bash\n\n# 定义颜色\nRED='\\033[0;31m'\nGREEN='\\033[0;32m'\nYELLOW='\\033[0;33m'\nBLUE='\\033[0;34m'\nNC='\\033[0m' # 无颜色\n\nwhile true; do\n # 获取所有进程的CPU使用率和内存使用率\n cpu_usage=$(ps aux | awk '{sum_cpu += $3} END {print sum_cpu}')\n mem_usage=$(ps aux | awk '{sum_mem += $4} END {print sum_mem}')\n \n # 打印结果,带有时间戳、分隔线和颜色高亮\n echo -e \"${BLUE}==============================${NC}\"\n echo -e \"${YELLOW}Timestamp: $(date)${NC}\"\n echo -e \"${BLUE}==============================${NC}\"\n echo -e \"${GREEN}Total CPU usage: ${RED}$cpu_usage%${NC}\"\n echo -e \"${GREEN}Total Memory usage: ${RED}$mem_usage%${NC}\"\n echo -e \"${BLUE}==============================${NC}\"\n \n # 等待5秒后再次执行\n sleep 5\ndone\n\n```\n**保存脚本到/usr/local/bin目录下**\n```bash\n mv usage.sh /usr/local/bin/usage\n```\n\n### Shell脚本编写的基本信息\n\n```bash\n#! /bin/bash\n# -------------------------------------------------\n# Filename: test.sh\n# Version: 1.0\n# Date: 2024/05/02\n# Author: shenjianZ\n# Email: shenjianZLT@gmail.com\n# Website: https://blog.shenjianl.cn\n# Description: this is a test shell\n# CopyRight: 2024 All rights reserved shenjianZ\n# License GPL\n# ------------------------------------------------\n\n\n# Your script logic goes here\n```","source":"_posts/linux/script.md","raw":"---\ntitle: script\nabbrlink: 34849\ndate: 2024-08-17 11:09:24\ntags:\n---\n\n### 查看CPU、内存使用率\n```bash\n#!/bin/bash\n\n# 定义颜色\nRED='\\033[0;31m'\nGREEN='\\033[0;32m'\nYELLOW='\\033[0;33m'\nBLUE='\\033[0;34m'\nNC='\\033[0m' # 无颜色\n\nwhile true; do\n # 获取所有进程的CPU使用率和内存使用率\n cpu_usage=$(ps aux | awk '{sum_cpu += $3} END {print sum_cpu}')\n mem_usage=$(ps aux | awk '{sum_mem += $4} END {print sum_mem}')\n \n # 打印结果,带有时间戳、分隔线和颜色高亮\n echo -e \"${BLUE}==============================${NC}\"\n echo -e \"${YELLOW}Timestamp: $(date)${NC}\"\n echo -e \"${BLUE}==============================${NC}\"\n echo -e \"${GREEN}Total CPU usage: ${RED}$cpu_usage%${NC}\"\n echo -e \"${GREEN}Total Memory usage: ${RED}$mem_usage%${NC}\"\n echo -e \"${BLUE}==============================${NC}\"\n \n # 等待5秒后再次执行\n sleep 5\ndone\n\n```\n**保存脚本到/usr/local/bin目录下**\n```bash\n mv usage.sh /usr/local/bin/usage\n```\n\n### Shell脚本编写的基本信息\n\n```bash\n#! /bin/bash\n# -------------------------------------------------\n# Filename: test.sh\n# Version: 1.0\n# Date: 2024/05/02\n# Author: shenjianZ\n# Email: shenjianZLT@gmail.com\n# Website: https://blog.shenjianl.cn\n# Description: this is a test shell\n# CopyRight: 2024 All rights reserved shenjianZ\n# License GPL\n# ------------------------------------------------\n\n\n# Your script logic goes here\n```","slug":"linux/script","published":1,"updated":"2024-08-18T16:05:45.731Z","comments":1,"layout":"post","photos":[],"_id":"cmgsv0jnc00073wah87e345ck","content":"

查看CPU、内存使用率

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
#!/bin/bash

# 定义颜色
RED='\\033[0;31m'
GREEN='\\033[0;32m'
YELLOW='\\033[0;33m'
BLUE='\\033[0;34m'
NC='\\033[0m' # 无颜色

while true; do
# 获取所有进程的CPU使用率和内存使用率
cpu_usage=$(ps aux | awk '{sum_cpu += $3} END {print sum_cpu}')
mem_usage=$(ps aux | awk '{sum_mem += $4} END {print sum_mem}')

# 打印结果,带有时间戳、分隔线和颜色高亮
echo -e "${BLUE}==============================${NC}"
echo -e "${YELLOW}Timestamp: $(date)${NC}"
echo -e "${BLUE}==============================${NC}"
echo -e "${GREEN}Total CPU usage: ${RED}$cpu_usage%${NC}"
echo -e "${GREEN}Total Memory usage: ${RED}$mem_usage%${NC}"
echo -e "${BLUE}==============================${NC}"

# 等待5秒后再次执行
sleep 5
done

\n

保存脚本到/usr/local/bin目录下

\n
1
mv usage.sh /usr/local/bin/usage
\n\n

Shell脚本编写的基本信息

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#! /bin/bash
# -------------------------------------------------
# Filename: test.sh
# Version: 1.0
# Date: 2024/05/02
# Author: shenjianZ
# Email: shenjianZLT@gmail.com
# Website: https://blog.shenjianl.cn
# Description: this is a test shell
# CopyRight: 2024 All rights reserved shenjianZ
# License GPL
# ------------------------------------------------


# Your script logic goes here
","cover":false,"excerpt":"","more":"

查看CPU、内存使用率

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
#!/bin/bash

# 定义颜色
RED='\\033[0;31m'
GREEN='\\033[0;32m'
YELLOW='\\033[0;33m'
BLUE='\\033[0;34m'
NC='\\033[0m' # 无颜色

while true; do
# 获取所有进程的CPU使用率和内存使用率
cpu_usage=$(ps aux | awk '{sum_cpu += $3} END {print sum_cpu}')
mem_usage=$(ps aux | awk '{sum_mem += $4} END {print sum_mem}')

# 打印结果,带有时间戳、分隔线和颜色高亮
echo -e "${BLUE}==============================${NC}"
echo -e "${YELLOW}Timestamp: $(date)${NC}"
echo -e "${BLUE}==============================${NC}"
echo -e "${GREEN}Total CPU usage: ${RED}$cpu_usage%${NC}"
echo -e "${GREEN}Total Memory usage: ${RED}$mem_usage%${NC}"
echo -e "${BLUE}==============================${NC}"

# 等待5秒后再次执行
sleep 5
done

\n

保存脚本到/usr/local/bin目录下

\n
1
mv usage.sh /usr/local/bin/usage
\n\n

Shell脚本编写的基本信息

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#! /bin/bash
# -------------------------------------------------
# Filename: test.sh
# Version: 1.0
# Date: 2024/05/02
# Author: shenjianZ
# Email: shenjianZLT@gmail.com
# Website: https://blog.shenjianl.cn
# Description: this is a test shell
# CopyRight: 2024 All rights reserved shenjianZ
# License GPL
# ------------------------------------------------


# Your script logic goes here
"},{"title":"C lang","abbrlink":12462,"date":"2025-01-15T12:41:26.000Z","_content":"\n### c lang在windows下的开发(VS code)\n[WinLibs - GCC+MinGW-w64 compiler for Windows](https://winlibs.com/#download-release)下载你需要的版本\n解压到`D:\\ProgramModule`,并将 `bin\\`加入环境变量`PATH`\n打开新的`Terminal`输入`gcc -v`,查看`gcc`是否安装成功\n在`VS code` 的插件管理下载`Code Runner`、`C\\C++`这两个插件\n在`*.c`源文件的内容区,右键点击`Run Code` ,即可运行成功\n![](/img/language/c-env-conf.png)\n\n### 数据类型\n- 整数类型\n ```c\n    short a = 12;\n    int b = 100;\n    long c = 1000L;\n    long long d = 1000000LL;\n    unsigned int e = 10;\n    printf(\"a: %hd\\n\",a);\n    printf(\"b: %d\\n\",b);\n    printf(\"c: %ld\\n\",c);\n    printf(\"d: %lld\\n\",d);\n    printf(\"e: %u\\n\",e);\n    printf(\"f: %.3f\\n\",f);\n ```\n- 小数类型\n ```c\n float f = 3.14F;\n printf(\"f: %.3f\\n\",f);\n double g = 5.65;\n printf(\"g: %.2lf\\n\",g);\n ```\n- 字符类型\n ```c\n char h = 'x';\n printf(\"x: %c\\n\",x);\n ```\n### 类型转换\n- 隐式转换\n- 强制转换\n ```c\n int b = 23;\n short c = (short) b;\n ```\n### 数组\n```c\n#include \n\nint main(){\n    int arr [10] = {2,3,4,5,6,7,8,9,10,11};\n    arr[0] = 1525;\n    *(arr+1) = 25;\n    int len = sizeof(arr)/sizeof(arr[0]);\n    void printArr(int arr[], int len){\n        for (int i = 0; i < len;i++){\n            printf(\"%d\\t\",arr[i]);\n        }\n    }\n    printArr(arr,len);\n    return 0;\n}\n```\n### 指针\n```c\n // swap the value of a and b\n    void swap(int* x, int* y){\n        int temp = *x;\n        *x = *y;\n        *y = temp;\n\n    }\n    int a = 5;\n    int b = 10;\n    swap(&a, &b);\n    printf(\"a = %d b = %d\\n\", a, b);\n```\n","source":"_posts/language/C.md","raw":"---\ntitle: C lang\ntags: C C++\nabbrlink: 12462\ndate: 2025-01-15 20:41:26\n---\n\n### c lang在windows下的开发(VS code)\n[WinLibs - GCC+MinGW-w64 compiler for Windows](https://winlibs.com/#download-release)下载你需要的版本\n解压到`D:\\ProgramModule`,并将 `bin\\`加入环境变量`PATH`\n打开新的`Terminal`输入`gcc -v`,查看`gcc`是否安装成功\n在`VS code` 的插件管理下载`Code Runner`、`C\\C++`这两个插件\n在`*.c`源文件的内容区,右键点击`Run Code` ,即可运行成功\n![](/img/language/c-env-conf.png)\n\n### 数据类型\n- 整数类型\n ```c\n    short a = 12;\n    int b = 100;\n    long c = 1000L;\n    long long d = 1000000LL;\n    unsigned int e = 10;\n    printf(\"a: %hd\\n\",a);\n    printf(\"b: %d\\n\",b);\n    printf(\"c: %ld\\n\",c);\n    printf(\"d: %lld\\n\",d);\n    printf(\"e: %u\\n\",e);\n    printf(\"f: %.3f\\n\",f);\n ```\n- 小数类型\n ```c\n float f = 3.14F;\n printf(\"f: %.3f\\n\",f);\n double g = 5.65;\n printf(\"g: %.2lf\\n\",g);\n ```\n- 字符类型\n ```c\n char h = 'x';\n printf(\"x: %c\\n\",x);\n ```\n### 类型转换\n- 隐式转换\n- 强制转换\n ```c\n int b = 23;\n short c = (short) b;\n ```\n### 数组\n```c\n#include \n\nint main(){\n    int arr [10] = {2,3,4,5,6,7,8,9,10,11};\n    arr[0] = 1525;\n    *(arr+1) = 25;\n    int len = sizeof(arr)/sizeof(arr[0]);\n    void printArr(int arr[], int len){\n        for (int i = 0; i < len;i++){\n            printf(\"%d\\t\",arr[i]);\n        }\n    }\n    printArr(arr,len);\n    return 0;\n}\n```\n### 指针\n```c\n // swap the value of a and b\n    void swap(int* x, int* y){\n        int temp = *x;\n        *x = *y;\n        *y = temp;\n\n    }\n    int a = 5;\n    int b = 10;\n    swap(&a, &b);\n    printf(\"a = %d b = %d\\n\", a, b);\n```\n","slug":"language/C","published":1,"updated":"2025-01-24T10:16:05.649Z","comments":1,"layout":"post","photos":[],"_id":"cmgsv0jne00093wahhwfe0rze","content":"

c lang在windows下的开发(VS code)

WinLibs - GCC+MinGW-w64 compiler for Windows下载你需要的版本
解压到D:\\ProgramModule,并将 bin\\加入环境变量PATH
打开新的Terminal输入gcc -v,查看gcc是否安装成功
VS code 的插件管理下载Code RunnerC\\C++这两个插件
*.c源文件的内容区,右键点击Run Code ,即可运行成功

\n

数据类型

\n

类型转换

\n

数组

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#include <stdio.h>

int main(){
    int arr [10] = {2,3,4,5,6,7,8,9,10,11};
    arr[0] = 1525;
    *(arr+1) = 25;
    int len = sizeof(arr)/sizeof(arr[0]);
    void printArr(int arr[], int len){
        for (int i = 0; i < len;i++){
            printf("%d\\t",arr[i]);
        }
    }
    printArr(arr,len);
    return 0;
}
\n

指针

1
2
3
4
5
6
7
8
9
10
11
// swap the value of a and b
    void swap(int* x, int* y){
        int temp = *x;
        *x = *y;
        *y = temp;

    }
    int a = 5;
    int b = 10;
    swap(&a, &b);
    printf("a = %d b = %d\\n", a, b);
\n","cover":false,"excerpt":"","more":"

c lang在windows下的开发(VS code)

WinLibs - GCC+MinGW-w64 compiler for Windows下载你需要的版本
解压到D:\\ProgramModule,并将 bin\\加入环境变量PATH
打开新的Terminal输入gcc -v,查看gcc是否安装成功
VS code 的插件管理下载Code RunnerC\\C++这两个插件
*.c源文件的内容区,右键点击Run Code ,即可运行成功

\n

数据类型

\n

类型转换

\n

数组

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#include <stdio.h>

int main(){
    int arr [10] = {2,3,4,5,6,7,8,9,10,11};
    arr[0] = 1525;
    *(arr+1) = 25;
    int len = sizeof(arr)/sizeof(arr[0]);
    void printArr(int arr[], int len){
        for (int i = 0; i < len;i++){
            printf("%d\\t",arr[i]);
        }
    }
    printArr(arr,len);
    return 0;
}
\n

指针

1
2
3
4
5
6
7
8
9
10
11
// swap the value of a and b
    void swap(int* x, int* y){
        int temp = *x;
        *x = *y;
        *y = temp;

    }
    int a = 5;
    int b = 10;
    swap(&a, &b);
    printf("a = %d b = %d\\n", a, b);
\n"},{"title":"决策树算法","abbrlink":95,"date":"2025-01-24T04:39:59.000Z","cover":"/img/machinelearning/decision-tree.png","top_img":"/img/site01.jpg","_content":"\n### C4.5\nC4.5 是一种用于生成决策树的算法,不再使用信息增益,而是使用信息增益比,来避免偏向于选择取值较多的特征。信息增益比是信息增益与特征的熵的比值。\n### ID3\nD3 是一种基于信息增益(Information Gain)的决策树算法\n### Cart\nCART(分类与回归树)一种决策树算法,,CART 使用 **二叉树结构**,即每个节点只能有两个子节点。\n### cart剪枝\nCART 决策树的剪枝方法分为 **预剪枝**(Pre-pruning)和 **后剪枝**(Post-pruning)两种:\n#### **预剪枝**:\n预剪枝是在构建决策树时就决定是否停止进一步划分某个节点。主要通过以下标准来控制:\n- 当某个节点的样本数小于某个阈值时,不再继续划分。\n- 当某个节点的 Gini 不纯度小于某个阈值时,不再继续划分。\n预剪枝的优点是能够减少计算量,但缺点是可能会导致模型不够复杂,从而产生欠拟合。\n#### **后剪枝**:\n后剪枝是在决策树完全构建出来之后,对树进行修剪。具体过程如下:\n- 构建完整的决策树。\n- 从叶子节点开始,逐渐向上遍历树的每个节点。\n- 对每个节点进行判断,是否合适剪去该节点及其子树。如果剪去该子树后,模型的性能没有显著下降,就可以剪枝。\n后剪枝通过避免过度拟合来提高模型的泛化能力,但其计算开销较大。\n### 特征工程(特征提取)\n- **字典特征提取**\n 主要用于处理包含键值对(key-value pairs)的数据结构\n   ```python\n   from sklearn.feature_extraction import DictVectorizer\n   # 字典特征提取\n   data = [\n       {'city': 'beijing', 'temperature': 100},\n       {'city': 'shanghai', 'temperature': 95},\n       {'city': 'guangzhou', 'temperature': 98}\n   ]\n   transfer = DictVectorizer(sparse=False)\n   new_data = transfer.fit_transform(data)\n   print(transfer.feature_names_)\n   print(new_data)\n   ```\n- **文本特征提取**\n 主要用于将文本数据(如句子、段落、文章等)转换成数值型特征。这对于文本分类、信息检索等任务非常重要。\n   ```python\n   from sklearn.feature_extraction.text import CountVectorizer\n   # 示例文本数据\n   data = [\n       \"I love programming\",\n       \"Python is great\",\n       \"I love machine learning\"\n   ]\n   # 创建 CountVectorizer 对象\n   transfer = CountVectorizer()\n   # 将文本数据转换为特征向量\n   new_data = transfer.fit_transform(data)\n   # 输出特征名称\n   print(\"Feature Names:\", transfer.get_feature_names_out())\n   # 输出转换后的特征矩阵\n   print(\"Transformed Data:\", new_data.toarray())\n   ```\n- 文本特征提取(中文文本)\n ```python\n  from sklearn.feature_extraction.text import CountVectorizer\n  import jieba\n  # 中文文本数据(大于20个字)\n  data = [\n    \"我热爱编程,学习编程语言是一件非常有趣的事情,它能够提升我们解决问题的能力,编程让我变得更加有创意。\",\n    \"Python语言是一门非常强大的编程语言,具有简洁的语法和丰富的库,可以帮助开发者更高效地完成任务。\",\n    \"机器学习是一项非常有前途的技术,它能够让计算机从数据中自动学习,逐步提高模型的精确度,解决实际问题。\"\n\n  ]\n  # 使用jieba分词\n  text_list = []\n  for line in data:\n    text_list.append(\" \".join(list(jieba.cut(line))))\n  # 创建 CountVectorizer 对象\n  transfer = CountVectorizer()\n  # 将文本数据转换为特征向量\n  new_data = transfer.fit_transform(text_list)\n  # 输出特征名称\n  print(\"Feature Names:\", transfer.get_feature_names_out())\n  # 输出转换后的特征矩阵\n  print(\"Transformed Data:\", new_data.toarray())\n ```\n### tf-idf\n> 词频 * 逆文档频率\n```python\n# tfi-df\nfrom sklearn.feature_extraction.text import TfidfVectorizer\nimport jieba\ndata=[\"一种还是一种今天很残酷,明天更残酷,后天很美好,但绝对大部分是死在明天晚上,所以每个人不要放弃今天。\",\n \"我们看到的从很远星系来的光是在几百万年之前发出的,这样当我们看到宇宙时,我们是在看它的过去\",\n \"如果只用一种方式了解某样事物,你就不会真正了解它。了解事物真正含义的秘密取决于如何将其与我们所了解的事物相联系\"]\nlist = []\nfor item in data:\n list.append(\" \".join(jieba.cut(item)))\ntransfer = TfidfVectorizer()\nnew_data = transfer.fit_transform(list)\nprint(f\"特征名字:\\n{transfer.get_feature_names_out()}\")\n\nprint(f\"转换后的特征矩阵:\\n{ new_data.toarray()}\")\nprint(f\"转换后的数据:\\n{new_data}\")\n```\n\n\n### 回归决策树\n#### 决策树算法的应用 (泰坦尼克号沉船幸存者预测)\n```python\nimport seaborn as sns\nimport pandas as pd\nimport numpy as np\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.feature_extraction import DictVectorizer\nfrom sklearn.tree import DecisionTreeClassifier,export_graphviz\n# 1.获取数据集 - 加载 Titanic 数据集\ntitanic = sns.load_dataset('titanic')\nmissing_age_count = titanic['age'].isna().sum()\n# print(f\"缺失的 age 数量: {missing_age_count}\")\n# 2. 数据基本处理\n# 2.1 确认特征值、目标值\nX = titanic[['pclass','age','sex']]\ny = titanic['survived']\n# 2.2 缺失值处理\nX.loc[:, 'age'] = X['age'].fillna(value=X['age'].mean()) # 使用 .loc 进行修改\n# 2.3 划分数据集\nX_train,X_test,y_train,y_test = train_test_split(X,y,random_state=22)\n# 3. 特征工程(字典特征提取)\nX_train = X_train.to_dict(orient=\"records\")\nX_test= X_test.to_dict(orient=\"records\")\ntransfer = DictVectorizer()\nX_train = transfer.fit_transform(X_train)\nX_test = transfer.transform(X_test)\n# 4. 机器学习 决策树算法\nestimator = DecisionTreeClassifier(criterion=\"gini\")\nestimator.fit(X_train,y_train)\ny_pred = estimator.predict(X_test)\nprint(f\"模型的测试集的预测值:{y_pred}\")\nret = estimator.score(X_test,y_test)\nprint(f\"模型的评分:{ret}\")\nprint(X_test.toarray())\n```\n\n生成对应的图\n```python\nfrom sklearn.tree import export_graphviz\nimport graphviz # 用于渲染图像\n\n# 导出决策树的 Graphviz 表示\nexport_graphviz(estimator, out_file='./data/tree.dot', \n feature_names=transfer.get_feature_names_out()) # 特征名称\n# 使用 graphviz 渲染 .dot 文件\nwith open('./data/tree.dot', 'r') as f:\n dot_graph = f.read()\n# 渲染决策树\ngraph = graphviz.Source(dot_graph)\n\n# 设置保存路径\noutput_path = './data/decision_tree' # 自定义保存路径\n\n# 保存图像到指定路径,格式可以是 .png, .pdf, .jpg 等\n# graph.render(output_path, format='png') # 保存为 .png 文件\n\n# 显示图像\ngraph.view(output_path) # 打开图像,path为保存路径,不需要加后缀\n\n```\n\n[Webgraphviz](http://webgraphviz.com/),这个网站可以将`tree.dot`文件的内容生成对应的可视化树\n\n\n#### 回归决策树与线性回归的对比\n```python\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.linear_model import LinearRegression\nfrom sklearn.tree import DecisionTreeRegressor\nfrom matplotlib import rcParams\n\n# 设置matplotlib使用的字体为SimHei(黑体)\nrcParams['font.sans-serif'] = ['SimHei'] # 也可以使用 'Microsoft YaHei'\nrcParams['axes.unicode_minus'] = False # 正常显示负号\nx = np.array(list(range(1,11))).reshape(-1,1)\ny = ([5.56,5.70,5.91,6.40,6.80,7.05,8.90,8.70,9.00,9.05])\n\nm1 = DecisionTreeRegressor(max_depth=1)\nm2 = DecisionTreeRegressor(max_depth=3)\nm3 = DecisionTreeRegressor()\n\n# 模型训练\nm1.fit(x,y)\nm2.fit(x,y)\nm3.fit(x,y)\n\n# 模型预测\nx_test = np.arange(0,10,0.01).reshape(-1,1)\ny_1 = m1.predict(x_test)\ny_2 = m2.predict(x_test)\ny_3 = m3.predict(x_test)\n\n# 结果展示\nplt.figure(figsize=(10,6),dpi=100)\nplt.scatter(x,y ,label = \"data\")\nplt.plot(x_test,y_1,label = \"max_depth=1\")\nplt.plot(x_test,y_2,label = \"max_depth=3\")\nplt.plot(x_test,y_3,label = \"linearregression\")\nplt.xlabel(\"数据\")\nplt.ylabel(\"预测值\")\nplt.legend()\nplt.show()\n```\n","source":"_posts/machinelearning/decisiontree.md","raw":"---\ntitle: 决策树算法\ntags: decisiontree\ncategories: machinelearning\nabbrlink: 95\ndate: 2025-01-24 12:39:59\ncover: /img/machinelearning/decision-tree.png\ntop_img: /img/site01.jpg\n---\n\n### C4.5\nC4.5 是一种用于生成决策树的算法,不再使用信息增益,而是使用信息增益比,来避免偏向于选择取值较多的特征。信息增益比是信息增益与特征的熵的比值。\n### ID3\nD3 是一种基于信息增益(Information Gain)的决策树算法\n### Cart\nCART(分类与回归树)一种决策树算法,,CART 使用 **二叉树结构**,即每个节点只能有两个子节点。\n### cart剪枝\nCART 决策树的剪枝方法分为 **预剪枝**(Pre-pruning)和 **后剪枝**(Post-pruning)两种:\n#### **预剪枝**:\n预剪枝是在构建决策树时就决定是否停止进一步划分某个节点。主要通过以下标准来控制:\n- 当某个节点的样本数小于某个阈值时,不再继续划分。\n- 当某个节点的 Gini 不纯度小于某个阈值时,不再继续划分。\n预剪枝的优点是能够减少计算量,但缺点是可能会导致模型不够复杂,从而产生欠拟合。\n#### **后剪枝**:\n后剪枝是在决策树完全构建出来之后,对树进行修剪。具体过程如下:\n- 构建完整的决策树。\n- 从叶子节点开始,逐渐向上遍历树的每个节点。\n- 对每个节点进行判断,是否合适剪去该节点及其子树。如果剪去该子树后,模型的性能没有显著下降,就可以剪枝。\n后剪枝通过避免过度拟合来提高模型的泛化能力,但其计算开销较大。\n### 特征工程(特征提取)\n- **字典特征提取**\n 主要用于处理包含键值对(key-value pairs)的数据结构\n   ```python\n   from sklearn.feature_extraction import DictVectorizer\n   # 字典特征提取\n   data = [\n       {'city': 'beijing', 'temperature': 100},\n       {'city': 'shanghai', 'temperature': 95},\n       {'city': 'guangzhou', 'temperature': 98}\n   ]\n   transfer = DictVectorizer(sparse=False)\n   new_data = transfer.fit_transform(data)\n   print(transfer.feature_names_)\n   print(new_data)\n   ```\n- **文本特征提取**\n 主要用于将文本数据(如句子、段落、文章等)转换成数值型特征。这对于文本分类、信息检索等任务非常重要。\n   ```python\n   from sklearn.feature_extraction.text import CountVectorizer\n   # 示例文本数据\n   data = [\n       \"I love programming\",\n       \"Python is great\",\n       \"I love machine learning\"\n   ]\n   # 创建 CountVectorizer 对象\n   transfer = CountVectorizer()\n   # 将文本数据转换为特征向量\n   new_data = transfer.fit_transform(data)\n   # 输出特征名称\n   print(\"Feature Names:\", transfer.get_feature_names_out())\n   # 输出转换后的特征矩阵\n   print(\"Transformed Data:\", new_data.toarray())\n   ```\n- 文本特征提取(中文文本)\n ```python\n  from sklearn.feature_extraction.text import CountVectorizer\n  import jieba\n  # 中文文本数据(大于20个字)\n  data = [\n    \"我热爱编程,学习编程语言是一件非常有趣的事情,它能够提升我们解决问题的能力,编程让我变得更加有创意。\",\n    \"Python语言是一门非常强大的编程语言,具有简洁的语法和丰富的库,可以帮助开发者更高效地完成任务。\",\n    \"机器学习是一项非常有前途的技术,它能够让计算机从数据中自动学习,逐步提高模型的精确度,解决实际问题。\"\n\n  ]\n  # 使用jieba分词\n  text_list = []\n  for line in data:\n    text_list.append(\" \".join(list(jieba.cut(line))))\n  # 创建 CountVectorizer 对象\n  transfer = CountVectorizer()\n  # 将文本数据转换为特征向量\n  new_data = transfer.fit_transform(text_list)\n  # 输出特征名称\n  print(\"Feature Names:\", transfer.get_feature_names_out())\n  # 输出转换后的特征矩阵\n  print(\"Transformed Data:\", new_data.toarray())\n ```\n### tf-idf\n> 词频 * 逆文档频率\n```python\n# tfi-df\nfrom sklearn.feature_extraction.text import TfidfVectorizer\nimport jieba\ndata=[\"一种还是一种今天很残酷,明天更残酷,后天很美好,但绝对大部分是死在明天晚上,所以每个人不要放弃今天。\",\n \"我们看到的从很远星系来的光是在几百万年之前发出的,这样当我们看到宇宙时,我们是在看它的过去\",\n \"如果只用一种方式了解某样事物,你就不会真正了解它。了解事物真正含义的秘密取决于如何将其与我们所了解的事物相联系\"]\nlist = []\nfor item in data:\n list.append(\" \".join(jieba.cut(item)))\ntransfer = TfidfVectorizer()\nnew_data = transfer.fit_transform(list)\nprint(f\"特征名字:\\n{transfer.get_feature_names_out()}\")\n\nprint(f\"转换后的特征矩阵:\\n{ new_data.toarray()}\")\nprint(f\"转换后的数据:\\n{new_data}\")\n```\n\n\n### 回归决策树\n#### 决策树算法的应用 (泰坦尼克号沉船幸存者预测)\n```python\nimport seaborn as sns\nimport pandas as pd\nimport numpy as np\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.feature_extraction import DictVectorizer\nfrom sklearn.tree import DecisionTreeClassifier,export_graphviz\n# 1.获取数据集 - 加载 Titanic 数据集\ntitanic = sns.load_dataset('titanic')\nmissing_age_count = titanic['age'].isna().sum()\n# print(f\"缺失的 age 数量: {missing_age_count}\")\n# 2. 数据基本处理\n# 2.1 确认特征值、目标值\nX = titanic[['pclass','age','sex']]\ny = titanic['survived']\n# 2.2 缺失值处理\nX.loc[:, 'age'] = X['age'].fillna(value=X['age'].mean()) # 使用 .loc 进行修改\n# 2.3 划分数据集\nX_train,X_test,y_train,y_test = train_test_split(X,y,random_state=22)\n# 3. 特征工程(字典特征提取)\nX_train = X_train.to_dict(orient=\"records\")\nX_test= X_test.to_dict(orient=\"records\")\ntransfer = DictVectorizer()\nX_train = transfer.fit_transform(X_train)\nX_test = transfer.transform(X_test)\n# 4. 机器学习 决策树算法\nestimator = DecisionTreeClassifier(criterion=\"gini\")\nestimator.fit(X_train,y_train)\ny_pred = estimator.predict(X_test)\nprint(f\"模型的测试集的预测值:{y_pred}\")\nret = estimator.score(X_test,y_test)\nprint(f\"模型的评分:{ret}\")\nprint(X_test.toarray())\n```\n\n生成对应的图\n```python\nfrom sklearn.tree import export_graphviz\nimport graphviz # 用于渲染图像\n\n# 导出决策树的 Graphviz 表示\nexport_graphviz(estimator, out_file='./data/tree.dot', \n feature_names=transfer.get_feature_names_out()) # 特征名称\n# 使用 graphviz 渲染 .dot 文件\nwith open('./data/tree.dot', 'r') as f:\n dot_graph = f.read()\n# 渲染决策树\ngraph = graphviz.Source(dot_graph)\n\n# 设置保存路径\noutput_path = './data/decision_tree' # 自定义保存路径\n\n# 保存图像到指定路径,格式可以是 .png, .pdf, .jpg 等\n# graph.render(output_path, format='png') # 保存为 .png 文件\n\n# 显示图像\ngraph.view(output_path) # 打开图像,path为保存路径,不需要加后缀\n\n```\n\n[Webgraphviz](http://webgraphviz.com/),这个网站可以将`tree.dot`文件的内容生成对应的可视化树\n\n\n#### 回归决策树与线性回归的对比\n```python\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.linear_model import LinearRegression\nfrom sklearn.tree import DecisionTreeRegressor\nfrom matplotlib import rcParams\n\n# 设置matplotlib使用的字体为SimHei(黑体)\nrcParams['font.sans-serif'] = ['SimHei'] # 也可以使用 'Microsoft YaHei'\nrcParams['axes.unicode_minus'] = False # 正常显示负号\nx = np.array(list(range(1,11))).reshape(-1,1)\ny = ([5.56,5.70,5.91,6.40,6.80,7.05,8.90,8.70,9.00,9.05])\n\nm1 = DecisionTreeRegressor(max_depth=1)\nm2 = DecisionTreeRegressor(max_depth=3)\nm3 = DecisionTreeRegressor()\n\n# 模型训练\nm1.fit(x,y)\nm2.fit(x,y)\nm3.fit(x,y)\n\n# 模型预测\nx_test = np.arange(0,10,0.01).reshape(-1,1)\ny_1 = m1.predict(x_test)\ny_2 = m2.predict(x_test)\ny_3 = m3.predict(x_test)\n\n# 结果展示\nplt.figure(figsize=(10,6),dpi=100)\nplt.scatter(x,y ,label = \"data\")\nplt.plot(x_test,y_1,label = \"max_depth=1\")\nplt.plot(x_test,y_2,label = \"max_depth=3\")\nplt.plot(x_test,y_3,label = \"linearregression\")\nplt.xlabel(\"数据\")\nplt.ylabel(\"预测值\")\nplt.legend()\nplt.show()\n```\n","slug":"machinelearning/decisiontree","published":1,"updated":"2025-02-22T05:21:34.232Z","comments":1,"layout":"post","photos":[],"_id":"cmgsv0jnf000b3wahhxw69j7q","content":"

C4.5

C4.5 是一种用于生成决策树的算法,不再使用信息增益,而是使用信息增益比,来避免偏向于选择取值较多的特征。信息增益比是信息增益与特征的熵的比值。

\n

ID3

D3 是一种基于信息增益(Information Gain)的决策树算法

\n

Cart

CART(分类与回归树)一种决策树算法,,CART 使用 二叉树结构,即每个节点只能有两个子节点。

\n

cart剪枝

CART 决策树的剪枝方法分为 预剪枝(Pre-pruning)和 后剪枝(Post-pruning)两种:

\n

预剪枝

预剪枝是在构建决策树时就决定是否停止进一步划分某个节点。主要通过以下标准来控制:

\n\n

后剪枝

后剪枝是在决策树完全构建出来之后,对树进行修剪。具体过程如下:

\n\n

特征工程(特征提取)

\n

tf-idf

\n

词频 * 逆文档频率

\n
\n
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# tfi-df
from sklearn.feature_extraction.text import TfidfVectorizer
import jieba
data=["一种还是一种今天很残酷,明天更残酷,后天很美好,但绝对大部分是死在明天晚上,所以每个人不要放弃今天。",
"我们看到的从很远星系来的光是在几百万年之前发出的,这样当我们看到宇宙时,我们是在看它的过去",
"如果只用一种方式了解某样事物,你就不会真正了解它。了解事物真正含义的秘密取决于如何将其与我们所了解的事物相联系"]
list = []
for item in data:
list.append(" ".join(jieba.cut(item)))
transfer = TfidfVectorizer()
new_data = transfer.fit_transform(list)
print(f"特征名字:\\n{transfer.get_feature_names_out()}")

print(f"转换后的特征矩阵:\\n{ new_data.toarray()}")
print(f"转换后的数据:\\n{new_data}")
\n\n\n

回归决策树

决策树算法的应用 (泰坦尼克号沉船幸存者预测)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import seaborn as sns
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction import DictVectorizer
from sklearn.tree import DecisionTreeClassifier,export_graphviz
# 1.获取数据集 - 加载 Titanic 数据集
titanic = sns.load_dataset('titanic')
missing_age_count = titanic['age'].isna().sum()
# print(f"缺失的 age 数量: {missing_age_count}")
# 2. 数据基本处理
# 2.1 确认特征值、目标值
X = titanic[['pclass','age','sex']]
y = titanic['survived']
# 2.2 缺失值处理
X.loc[:, 'age'] = X['age'].fillna(value=X['age'].mean()) # 使用 .loc 进行修改
# 2.3 划分数据集
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=22)
# 3. 特征工程(字典特征提取)
X_train = X_train.to_dict(orient="records")
X_test= X_test.to_dict(orient="records")
transfer = DictVectorizer()
X_train = transfer.fit_transform(X_train)
X_test = transfer.transform(X_test)
# 4. 机器学习 决策树算法
estimator = DecisionTreeClassifier(criterion="gini")
estimator.fit(X_train,y_train)
y_pred = estimator.predict(X_test)
print(f"模型的测试集的预测值:{y_pred}")
ret = estimator.score(X_test,y_test)
print(f"模型的评分:{ret}")
print(X_test.toarray())
\n\n

生成对应的图

\n
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
from sklearn.tree import export_graphviz
import graphviz # 用于渲染图像

# 导出决策树的 Graphviz 表示
export_graphviz(estimator, out_file='./data/tree.dot',
feature_names=transfer.get_feature_names_out()) # 特征名称
# 使用 graphviz 渲染 .dot 文件
with open('./data/tree.dot', 'r') as f:
dot_graph = f.read()
# 渲染决策树
graph = graphviz.Source(dot_graph)

# 设置保存路径
output_path = './data/decision_tree' # 自定义保存路径

# 保存图像到指定路径,格式可以是 .png, .pdf, .jpg 等
# graph.render(output_path, format='png') # 保存为 .png 文件

# 显示图像
graph.view(output_path) # 打开图像,path为保存路径,不需要加后缀

\n\n

Webgraphviz,这个网站可以将tree.dot文件的内容生成对应的可视化树

\n

回归决策树与线性回归的对比

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from matplotlib import rcParams

# 设置matplotlib使用的字体为SimHei(黑体)
rcParams['font.sans-serif'] = ['SimHei'] # 也可以使用 'Microsoft YaHei'
rcParams['axes.unicode_minus'] = False # 正常显示负号
x = np.array(list(range(1,11))).reshape(-1,1)
y = ([5.56,5.70,5.91,6.40,6.80,7.05,8.90,8.70,9.00,9.05])

m1 = DecisionTreeRegressor(max_depth=1)
m2 = DecisionTreeRegressor(max_depth=3)
m3 = DecisionTreeRegressor()

# 模型训练
m1.fit(x,y)
m2.fit(x,y)
m3.fit(x,y)

# 模型预测
x_test = np.arange(0,10,0.01).reshape(-1,1)
y_1 = m1.predict(x_test)
y_2 = m2.predict(x_test)
y_3 = m3.predict(x_test)

# 结果展示
plt.figure(figsize=(10,6),dpi=100)
plt.scatter(x,y ,label = "data")
plt.plot(x_test,y_1,label = "max_depth=1")
plt.plot(x_test,y_2,label = "max_depth=3")
plt.plot(x_test,y_3,label = "linearregression")
plt.xlabel("数据")
plt.ylabel("预测值")
plt.legend()
plt.show()
\n","cover_type":"img","excerpt":"","more":"

C4.5

C4.5 是一种用于生成决策树的算法,不再使用信息增益,而是使用信息增益比,来避免偏向于选择取值较多的特征。信息增益比是信息增益与特征的熵的比值。

\n

ID3

D3 是一种基于信息增益(Information Gain)的决策树算法

\n

Cart

CART(分类与回归树)一种决策树算法,,CART 使用 二叉树结构,即每个节点只能有两个子节点。

\n

cart剪枝

CART 决策树的剪枝方法分为 预剪枝(Pre-pruning)和 后剪枝(Post-pruning)两种:

\n

预剪枝

预剪枝是在构建决策树时就决定是否停止进一步划分某个节点。主要通过以下标准来控制:

\n\n

后剪枝

后剪枝是在决策树完全构建出来之后,对树进行修剪。具体过程如下:

\n\n

特征工程(特征提取)

\n

tf-idf

\n

词频 * 逆文档频率

\n
\n
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# tfi-df
from sklearn.feature_extraction.text import TfidfVectorizer
import jieba
data=["一种还是一种今天很残酷,明天更残酷,后天很美好,但绝对大部分是死在明天晚上,所以每个人不要放弃今天。",
"我们看到的从很远星系来的光是在几百万年之前发出的,这样当我们看到宇宙时,我们是在看它的过去",
"如果只用一种方式了解某样事物,你就不会真正了解它。了解事物真正含义的秘密取决于如何将其与我们所了解的事物相联系"]
list = []
for item in data:
list.append(" ".join(jieba.cut(item)))
transfer = TfidfVectorizer()
new_data = transfer.fit_transform(list)
print(f"特征名字:\\n{transfer.get_feature_names_out()}")

print(f"转换后的特征矩阵:\\n{ new_data.toarray()}")
print(f"转换后的数据:\\n{new_data}")
\n\n\n

回归决策树

决策树算法的应用 (泰坦尼克号沉船幸存者预测)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import seaborn as sns
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction import DictVectorizer
from sklearn.tree import DecisionTreeClassifier,export_graphviz
# 1.获取数据集 - 加载 Titanic 数据集
titanic = sns.load_dataset('titanic')
missing_age_count = titanic['age'].isna().sum()
# print(f"缺失的 age 数量: {missing_age_count}")
# 2. 数据基本处理
# 2.1 确认特征值、目标值
X = titanic[['pclass','age','sex']]
y = titanic['survived']
# 2.2 缺失值处理
X.loc[:, 'age'] = X['age'].fillna(value=X['age'].mean()) # 使用 .loc 进行修改
# 2.3 划分数据集
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=22)
# 3. 特征工程(字典特征提取)
X_train = X_train.to_dict(orient="records")
X_test= X_test.to_dict(orient="records")
transfer = DictVectorizer()
X_train = transfer.fit_transform(X_train)
X_test = transfer.transform(X_test)
# 4. 机器学习 决策树算法
estimator = DecisionTreeClassifier(criterion="gini")
estimator.fit(X_train,y_train)
y_pred = estimator.predict(X_test)
print(f"模型的测试集的预测值:{y_pred}")
ret = estimator.score(X_test,y_test)
print(f"模型的评分:{ret}")
print(X_test.toarray())
\n\n

生成对应的图

\n
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
from sklearn.tree import export_graphviz
import graphviz # 用于渲染图像

# 导出决策树的 Graphviz 表示
export_graphviz(estimator, out_file='./data/tree.dot',
feature_names=transfer.get_feature_names_out()) # 特征名称
# 使用 graphviz 渲染 .dot 文件
with open('./data/tree.dot', 'r') as f:
dot_graph = f.read()
# 渲染决策树
graph = graphviz.Source(dot_graph)

# 设置保存路径
output_path = './data/decision_tree' # 自定义保存路径

# 保存图像到指定路径,格式可以是 .png, .pdf, .jpg 等
# graph.render(output_path, format='png') # 保存为 .png 文件

# 显示图像
graph.view(output_path) # 打开图像,path为保存路径,不需要加后缀

\n\n

Webgraphviz,这个网站可以将tree.dot文件的内容生成对应的可视化树

\n

回归决策树与线性回归的对比

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from matplotlib import rcParams

# 设置matplotlib使用的字体为SimHei(黑体)
rcParams['font.sans-serif'] = ['SimHei'] # 也可以使用 'Microsoft YaHei'
rcParams['axes.unicode_minus'] = False # 正常显示负号
x = np.array(list(range(1,11))).reshape(-1,1)
y = ([5.56,5.70,5.91,6.40,6.80,7.05,8.90,8.70,9.00,9.05])

m1 = DecisionTreeRegressor(max_depth=1)
m2 = DecisionTreeRegressor(max_depth=3)
m3 = DecisionTreeRegressor()

# 模型训练
m1.fit(x,y)
m2.fit(x,y)
m3.fit(x,y)

# 模型预测
x_test = np.arange(0,10,0.01).reshape(-1,1)
y_1 = m1.predict(x_test)
y_2 = m2.predict(x_test)
y_3 = m3.predict(x_test)

# 结果展示
plt.figure(figsize=(10,6),dpi=100)
plt.scatter(x,y ,label = "data")
plt.plot(x_test,y_1,label = "max_depth=1")
plt.plot(x_test,y_2,label = "max_depth=3")
plt.plot(x_test,y_3,label = "linearregression")
plt.xlabel("数据")
plt.ylabel("预测值")
plt.legend()
plt.show()
\n"},{"title":"集成学习","abbrlink":8816,"date":"2025-01-25T07:12:08.000Z","cover":"/img/machinelearning/ensemble-learning.png","top_img":"/img/site01.jpg","_content":"\n### Bagging\n\n### 随机森林\n> `Random-Forest` 就是`Bagging + Decisiontree`\n```python\nimport seaborn as sns\nimport pandas as pd\nimport numpy as np\nfrom sklearn.model_selection import train_test_split,GridSearchCV\nfrom sklearn.feature_extraction import DictVectorizer\nfrom sklearn.ensemble import RandomForestClassifier\n# 1.获取数据集 - 加载 Titanic 数据集\ntitanic = sns.load_dataset('titanic')\nmissing_age_count = titanic['age'].isna().sum()\n# print(f\"缺失的 age 数量: {missing_age_count}\")\n# 2. 数据基本处理\n# 2.1 确认特征值、目标值\nX = titanic[['pclass','age','sex']]\ny = titanic['survived']\n# 2.2 缺失值处理\nX.loc[:, 'age'] = X['age'].fillna(value=X['age'].mean()) # 使用 .loc 进行修改\n# 2.3 划分数据集\nX_train,X_test,y_train,y_test = train_test_split(X,y,random_state=22)\n# 3. 特征工程(字典特征提取)\nX_train = X_train.to_dict(orient=\"records\")\nX_test= X_test.to_dict(orient=\"records\")\ntransfer = DictVectorizer()\nX_train = transfer.fit_transform(X_train)\nX_test = transfer.transform(X_test)\n# 4. 机器学习 随机森林\nrf = RandomForestClassifier()\ngc = GridSearchCV(estimator=rf ,param_grid={\"n_estimators\":[100,120,300],\"max_depth\":[3,7,11]},cv=3)\ngc.fit(X_train,y_train)\ny_pred = gc.predict(X_test)\nprint(f\"模型的测试集的预测值:{y_pred}\")\nret = gc.score(X_test,y_test)\nprint(f\"最佳模型在测试集上的评分:{ret}\")\nprint(f\"最佳模型的参数:{gc.best_estimator_}\")\nprint(f\"最佳模型在训练集上的评分:{gc.best_score_}\")\nprint(X_test.toarray())\n```\n![](/img/machinelearning/random-forest.png)\n\n### ott案例\n```python\nimport pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom imblearn.under_sampling import RandomUnderSampler\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import LabelEncoder\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.metrics import log_loss\nfrom sklearn.preprocessing import OneHotEncoder\n# 1. 获取数据集\ndata = pd.read_csv('./data/train.csv')\n# 查看目标值分类\nimport seaborn as sns\nsns.countplot(data=data, x='target', hue='target', palette=\"Set2\", legend=False) # 使用 hue='target' 替代 palette\nplt.show()\n\n# 2. 数据集的基本处理\n# 2.1 确定特征值、目标值\nx = data.drop([\"id\", \"target\"], axis=1)\ny = data['target']\n\n# 2.2 使用随机欠采样进行平衡\nundersampler = RandomUnderSampler(sampling_strategy='auto', random_state=0)\nx_resampled, y_resampled = undersampler.fit_resample(x, y)\n\n# 查看欠采样后的类别分布\n# print(f\"欠采样后训练集中的类别分布:\\n{y_train_resampled.value_counts()}\")\n\n# 2.3. 将标签转换为数字\nle = LabelEncoder()\ny_resampled = le.fit_transform(y_resampled)\n\n# 2.4. 划分训练集和测试集\nx_train, x_test, y_train, y_test = train_test_split(x_resampled, y_resampled, test_size=0.2)\n\n # 3. 机器学习\nrf = RandomForestClassifier(oob_score = True)\nrf.fit(x_train,y_train)\ny_pred = rf.predict(x_test)\nprint(f\"预测值:{y_pred}\")\nprint(f\"评分:{rf.score(x_test,y_test)}\")\n\n# # 4. 模型评估 (解决二分类预测问题)\n# import numpy as np\n# from sklearn.metrics import log_loss\n# # 假设 y_pred_prob 是通过 predict_proba 得到的预测概率\n# # 对预测概率进行裁剪,将其限制在 [eps, 1-eps] 范围内\n# eps = 1e-15 # 设置一个小的eps值,避免极端值\n# y_pred_prob = rf.predict_proba(x_test)\n# y_pred_prob = np.clip(y_pred_prob, eps, 1 - eps)\n\n# # 计算 log_loss\n# loss = log_loss(y_test, y_pred_prob, normalize=True)\n# print(f\"Log Loss: {loss}\")\n\n# 4. 模型评估 (解决多分类预测问题)\n\n# 获取预测的概率\ny_pred_prob = rf.predict_proba(x_test)\n\n# 使用 OneHotEncoder 对 y_test 进行 One-Hot 编码\nencoder = OneHotEncoder(sparse_output=False) # 确保返回的是密集矩阵\ny_test_one_hot = encoder.fit_transform(y_test.reshape(-1, 1))\n\n# 对预测概率进行裁剪,将其限制在 [eps, 1-eps] 范围内\neps = 1e-15\ny_pred_prob = np.clip(y_pred_prob, eps, 1 - eps)\n\n# 计算 log_loss\nloss = log_loss(y_test_one_hot, y_pred_prob, normalize=True)\nprint(f\"Log Loss: {loss}\")\n\n```\n![](/img/machinelearning/ott.png)","source":"_posts/machinelearning/ensemblelearning.md","raw":"---\ntitle: 集成学习\ntags: ensemble-learning\ncategories: machinelearning\nabbrlink: 8816\ndate: 2025-01-25 15:12:08\ncover: /img/machinelearning/ensemble-learning.png\ntop_img: /img/site01.jpg\n---\n\n### Bagging\n\n### 随机森林\n> `Random-Forest` 就是`Bagging + Decisiontree`\n```python\nimport seaborn as sns\nimport pandas as pd\nimport numpy as np\nfrom sklearn.model_selection import train_test_split,GridSearchCV\nfrom sklearn.feature_extraction import DictVectorizer\nfrom sklearn.ensemble import RandomForestClassifier\n# 1.获取数据集 - 加载 Titanic 数据集\ntitanic = sns.load_dataset('titanic')\nmissing_age_count = titanic['age'].isna().sum()\n# print(f\"缺失的 age 数量: {missing_age_count}\")\n# 2. 数据基本处理\n# 2.1 确认特征值、目标值\nX = titanic[['pclass','age','sex']]\ny = titanic['survived']\n# 2.2 缺失值处理\nX.loc[:, 'age'] = X['age'].fillna(value=X['age'].mean()) # 使用 .loc 进行修改\n# 2.3 划分数据集\nX_train,X_test,y_train,y_test = train_test_split(X,y,random_state=22)\n# 3. 特征工程(字典特征提取)\nX_train = X_train.to_dict(orient=\"records\")\nX_test= X_test.to_dict(orient=\"records\")\ntransfer = DictVectorizer()\nX_train = transfer.fit_transform(X_train)\nX_test = transfer.transform(X_test)\n# 4. 机器学习 随机森林\nrf = RandomForestClassifier()\ngc = GridSearchCV(estimator=rf ,param_grid={\"n_estimators\":[100,120,300],\"max_depth\":[3,7,11]},cv=3)\ngc.fit(X_train,y_train)\ny_pred = gc.predict(X_test)\nprint(f\"模型的测试集的预测值:{y_pred}\")\nret = gc.score(X_test,y_test)\nprint(f\"最佳模型在测试集上的评分:{ret}\")\nprint(f\"最佳模型的参数:{gc.best_estimator_}\")\nprint(f\"最佳模型在训练集上的评分:{gc.best_score_}\")\nprint(X_test.toarray())\n```\n![](/img/machinelearning/random-forest.png)\n\n### ott案例\n```python\nimport pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom imblearn.under_sampling import RandomUnderSampler\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import LabelEncoder\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.metrics import log_loss\nfrom sklearn.preprocessing import OneHotEncoder\n# 1. 获取数据集\ndata = pd.read_csv('./data/train.csv')\n# 查看目标值分类\nimport seaborn as sns\nsns.countplot(data=data, x='target', hue='target', palette=\"Set2\", legend=False) # 使用 hue='target' 替代 palette\nplt.show()\n\n# 2. 数据集的基本处理\n# 2.1 确定特征值、目标值\nx = data.drop([\"id\", \"target\"], axis=1)\ny = data['target']\n\n# 2.2 使用随机欠采样进行平衡\nundersampler = RandomUnderSampler(sampling_strategy='auto', random_state=0)\nx_resampled, y_resampled = undersampler.fit_resample(x, y)\n\n# 查看欠采样后的类别分布\n# print(f\"欠采样后训练集中的类别分布:\\n{y_train_resampled.value_counts()}\")\n\n# 2.3. 将标签转换为数字\nle = LabelEncoder()\ny_resampled = le.fit_transform(y_resampled)\n\n# 2.4. 划分训练集和测试集\nx_train, x_test, y_train, y_test = train_test_split(x_resampled, y_resampled, test_size=0.2)\n\n # 3. 机器学习\nrf = RandomForestClassifier(oob_score = True)\nrf.fit(x_train,y_train)\ny_pred = rf.predict(x_test)\nprint(f\"预测值:{y_pred}\")\nprint(f\"评分:{rf.score(x_test,y_test)}\")\n\n# # 4. 模型评估 (解决二分类预测问题)\n# import numpy as np\n# from sklearn.metrics import log_loss\n# # 假设 y_pred_prob 是通过 predict_proba 得到的预测概率\n# # 对预测概率进行裁剪,将其限制在 [eps, 1-eps] 范围内\n# eps = 1e-15 # 设置一个小的eps值,避免极端值\n# y_pred_prob = rf.predict_proba(x_test)\n# y_pred_prob = np.clip(y_pred_prob, eps, 1 - eps)\n\n# # 计算 log_loss\n# loss = log_loss(y_test, y_pred_prob, normalize=True)\n# print(f\"Log Loss: {loss}\")\n\n# 4. 模型评估 (解决多分类预测问题)\n\n# 获取预测的概率\ny_pred_prob = rf.predict_proba(x_test)\n\n# 使用 OneHotEncoder 对 y_test 进行 One-Hot 编码\nencoder = OneHotEncoder(sparse_output=False) # 确保返回的是密集矩阵\ny_test_one_hot = encoder.fit_transform(y_test.reshape(-1, 1))\n\n# 对预测概率进行裁剪,将其限制在 [eps, 1-eps] 范围内\neps = 1e-15\ny_pred_prob = np.clip(y_pred_prob, eps, 1 - eps)\n\n# 计算 log_loss\nloss = log_loss(y_test_one_hot, y_pred_prob, normalize=True)\nprint(f\"Log Loss: {loss}\")\n\n```\n![](/img/machinelearning/ott.png)","slug":"machinelearning/ensemblelearning","published":1,"updated":"2025-02-22T05:21:46.779Z","comments":1,"layout":"post","photos":[],"_id":"cmgsv0jni000d3wah7wwg0zp1","content":"

Bagging

随机森林

\n

Random-Forest 就是Bagging + Decisiontree

\n
\n
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import seaborn as sns
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.feature_extraction import DictVectorizer
from sklearn.ensemble import RandomForestClassifier
# 1.获取数据集 - 加载 Titanic 数据集
titanic = sns.load_dataset('titanic')
missing_age_count = titanic['age'].isna().sum()
# print(f"缺失的 age 数量: {missing_age_count}")
# 2. 数据基本处理
# 2.1 确认特征值、目标值
X = titanic[['pclass','age','sex']]
y = titanic['survived']
# 2.2 缺失值处理
X.loc[:, 'age'] = X['age'].fillna(value=X['age'].mean()) # 使用 .loc 进行修改
# 2.3 划分数据集
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=22)
# 3. 特征工程(字典特征提取)
X_train = X_train.to_dict(orient="records")
X_test= X_test.to_dict(orient="records")
transfer = DictVectorizer()
X_train = transfer.fit_transform(X_train)
X_test = transfer.transform(X_test)
# 4. 机器学习 随机森林
rf = RandomForestClassifier()
gc = GridSearchCV(estimator=rf ,param_grid={"n_estimators":[100,120,300],"max_depth":[3,7,11]},cv=3)
gc.fit(X_train,y_train)
y_pred = gc.predict(X_test)
print(f"模型的测试集的预测值:{y_pred}")
ret = gc.score(X_test,y_test)
print(f"最佳模型在测试集上的评分:{ret}")
print(f"最佳模型的参数:{gc.best_estimator_}")
print(f"最佳模型在训练集上的评分:{gc.best_score_}")
print(X_test.toarray())
\n

\n

ott案例

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from imblearn.under_sampling import RandomUnderSampler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import log_loss
from sklearn.preprocessing import OneHotEncoder
# 1. 获取数据集
data = pd.read_csv('./data/train.csv')
# 查看目标值分类
import seaborn as sns
sns.countplot(data=data, x='target', hue='target', palette="Set2", legend=False) # 使用 hue='target' 替代 palette
plt.show()

# 2. 数据集的基本处理
# 2.1 确定特征值、目标值
x = data.drop(["id", "target"], axis=1)
y = data['target']

# 2.2 使用随机欠采样进行平衡
undersampler = RandomUnderSampler(sampling_strategy='auto', random_state=0)
x_resampled, y_resampled = undersampler.fit_resample(x, y)

# 查看欠采样后的类别分布
# print(f"欠采样后训练集中的类别分布:\\n{y_train_resampled.value_counts()}")

# 2.3. 将标签转换为数字
le = LabelEncoder()
y_resampled = le.fit_transform(y_resampled)

# 2.4. 划分训练集和测试集
x_train, x_test, y_train, y_test = train_test_split(x_resampled, y_resampled, test_size=0.2)

# 3. 机器学习
rf = RandomForestClassifier(oob_score = True)
rf.fit(x_train,y_train)
y_pred = rf.predict(x_test)
print(f"预测值:{y_pred}")
print(f"评分:{rf.score(x_test,y_test)}")

# # 4. 模型评估 (解决二分类预测问题)
# import numpy as np
# from sklearn.metrics import log_loss
# # 假设 y_pred_prob 是通过 predict_proba 得到的预测概率
# # 对预测概率进行裁剪,将其限制在 [eps, 1-eps] 范围内
# eps = 1e-15 # 设置一个小的eps值,避免极端值
# y_pred_prob = rf.predict_proba(x_test)
# y_pred_prob = np.clip(y_pred_prob, eps, 1 - eps)

# # 计算 log_loss
# loss = log_loss(y_test, y_pred_prob, normalize=True)
# print(f"Log Loss: {loss}")

# 4. 模型评估 (解决多分类预测问题)

# 获取预测的概率
y_pred_prob = rf.predict_proba(x_test)

# 使用 OneHotEncoder 对 y_test 进行 One-Hot 编码
encoder = OneHotEncoder(sparse_output=False) # 确保返回的是密集矩阵
y_test_one_hot = encoder.fit_transform(y_test.reshape(-1, 1))

# 对预测概率进行裁剪,将其限制在 [eps, 1-eps] 范围内
eps = 1e-15
y_pred_prob = np.clip(y_pred_prob, eps, 1 - eps)

# 计算 log_loss
loss = log_loss(y_test_one_hot, y_pred_prob, normalize=True)
print(f"Log Loss: {loss}")

\n

\n","cover_type":"img","excerpt":"","more":"

Bagging

随机森林

\n

Random-Forest 就是Bagging + Decisiontree

\n
\n
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import seaborn as sns
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.feature_extraction import DictVectorizer
from sklearn.ensemble import RandomForestClassifier
# 1.获取数据集 - 加载 Titanic 数据集
titanic = sns.load_dataset('titanic')
missing_age_count = titanic['age'].isna().sum()
# print(f"缺失的 age 数量: {missing_age_count}")
# 2. 数据基本处理
# 2.1 确认特征值、目标值
X = titanic[['pclass','age','sex']]
y = titanic['survived']
# 2.2 缺失值处理
X.loc[:, 'age'] = X['age'].fillna(value=X['age'].mean()) # 使用 .loc 进行修改
# 2.3 划分数据集
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=22)
# 3. 特征工程(字典特征提取)
X_train = X_train.to_dict(orient="records")
X_test= X_test.to_dict(orient="records")
transfer = DictVectorizer()
X_train = transfer.fit_transform(X_train)
X_test = transfer.transform(X_test)
# 4. 机器学习 随机森林
rf = RandomForestClassifier()
gc = GridSearchCV(estimator=rf ,param_grid={"n_estimators":[100,120,300],"max_depth":[3,7,11]},cv=3)
gc.fit(X_train,y_train)
y_pred = gc.predict(X_test)
print(f"模型的测试集的预测值:{y_pred}")
ret = gc.score(X_test,y_test)
print(f"最佳模型在测试集上的评分:{ret}")
print(f"最佳模型的参数:{gc.best_estimator_}")
print(f"最佳模型在训练集上的评分:{gc.best_score_}")
print(X_test.toarray())
\n

\n

ott案例

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from imblearn.under_sampling import RandomUnderSampler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import log_loss
from sklearn.preprocessing import OneHotEncoder
# 1. 获取数据集
data = pd.read_csv('./data/train.csv')
# 查看目标值分类
import seaborn as sns
sns.countplot(data=data, x='target', hue='target', palette="Set2", legend=False) # 使用 hue='target' 替代 palette
plt.show()

# 2. 数据集的基本处理
# 2.1 确定特征值、目标值
x = data.drop(["id", "target"], axis=1)
y = data['target']

# 2.2 使用随机欠采样进行平衡
undersampler = RandomUnderSampler(sampling_strategy='auto', random_state=0)
x_resampled, y_resampled = undersampler.fit_resample(x, y)

# 查看欠采样后的类别分布
# print(f"欠采样后训练集中的类别分布:\\n{y_train_resampled.value_counts()}")

# 2.3. 将标签转换为数字
le = LabelEncoder()
y_resampled = le.fit_transform(y_resampled)

# 2.4. 划分训练集和测试集
x_train, x_test, y_train, y_test = train_test_split(x_resampled, y_resampled, test_size=0.2)

# 3. 机器学习
rf = RandomForestClassifier(oob_score = True)
rf.fit(x_train,y_train)
y_pred = rf.predict(x_test)
print(f"预测值:{y_pred}")
print(f"评分:{rf.score(x_test,y_test)}")

# # 4. 模型评估 (解决二分类预测问题)
# import numpy as np
# from sklearn.metrics import log_loss
# # 假设 y_pred_prob 是通过 predict_proba 得到的预测概率
# # 对预测概率进行裁剪,将其限制在 [eps, 1-eps] 范围内
# eps = 1e-15 # 设置一个小的eps值,避免极端值
# y_pred_prob = rf.predict_proba(x_test)
# y_pred_prob = np.clip(y_pred_prob, eps, 1 - eps)

# # 计算 log_loss
# loss = log_loss(y_test, y_pred_prob, normalize=True)
# print(f"Log Loss: {loss}")

# 4. 模型评估 (解决多分类预测问题)

# 获取预测的概率
y_pred_prob = rf.predict_proba(x_test)

# 使用 OneHotEncoder 对 y_test 进行 One-Hot 编码
encoder = OneHotEncoder(sparse_output=False) # 确保返回的是密集矩阵
y_test_one_hot = encoder.fit_transform(y_test.reshape(-1, 1))

# 对预测概率进行裁剪,将其限制在 [eps, 1-eps] 范围内
eps = 1e-15
y_pred_prob = np.clip(y_pred_prob, eps, 1 - eps)

# 计算 log_loss
loss = log_loss(y_test_one_hot, y_pred_prob, normalize=True)
print(f"Log Loss: {loss}")

\n

\n"},{"title":"k近邻算法(K-Nearest Neighbors)KNN","abbrlink":29139,"mathjax":true,"date":"2025-01-13T09:20:59.000Z","_content":"## **k近邻算法(K-Nearest Neighbors)KNN**\n将当前样本的类别归类于距离最近的**k**个样本的类别\n\n#### **距离公式(2维)**\n\n- 欧式距离\n$$\nd = \\sqrt{(x_1-y_1)^2 + (x_2 - y_2)^2}\n$$\n- 曼哈顿距离\n$$\nd = |x_1 - x_2| + |y_1 - y_2|\n$$\n- 切比雪夫距离\n$$\nd = \\max\\left(|x_1 - x_2|, |y_1 - y_2|\\right)\n$$\n#### k值选择问题\n\n| k值 | 影响 |\n| --- | ------------------ |\n| 越大 | 模型过拟合,准确率波动较大 |\n| 越小 | 模型欠拟合,准确率趋于稳定但可能较低 |\n### 特征预处理\n> 通过一些转换函数将特征数据转换成更加适合算法模型的特征数据过程 \n- 归一化\n 将数据变换到指定区间(默认是\\[0,1\\])\n $$ x' = \\frac{x- x_{\\text {min}}}{x_{\\text{max}} - x_{\\text{min}}} $$\n 若需要缩放到任意区间 \\(\\[a, b\\]\\),公式为: $$ x' = a + \\frac{(x - x_{\\text{min}}) \\cdot (b - a)}{x_{\\text{max}} - x_{\\text{min}}} $$\n 其中:\\( \\[a, b\\] \\):目标区间的范围\n 归一化受到数据集的异常值的影响,需要进行标准化处理(更加合理)\n ``` python\n\t from sklearn.preprocessing import MinMaxScaler # 归一化\n\t```\n- 标准化\n 将数据调整为均值为 0,标准差为 1 的标准正态分布\n $$ z = \\frac{x - \\mu}{\\sigma} $$\n \\( z \\):标准化后的值 \\( x \\):原始数据值 \\( $\\mu$ \\):数据的均值 \\( $\\sigma$\\):数据的标准差\n \n ``` python\n \t from sklearn.preprocessing import StandardScaler # 标准化\n ```\n\n### KNN代码实现\n```python\nimport seaborn as sns\nimport matplotlib.pyplot as plt \nimport pandas as pd\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.datasets import load_iris\nfrom sklearn.preprocessing import MinMaxScaler,StandardScaler\nfrom sklearn.neighbors import KNeighborsClassifier\nfrom sklearn.metrics import accuracy_score\n\n# 1 数据集获取\niris = load_iris()\n# print(iris.feature_names)\niris_data = pd.DataFrame(iris.data,columns=['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width'])\niris_data['target'] = iris.target\n\ndef iris_plot(data,col1,col2):\n sns.lmplot(x=col1,y=col2,data=data,hue=\"target\",fit_reg=False)\n plt.show()\n# 2 数据集可视化\n# iris_plot(iris_data, 'Sepal_Width', 'Petal_Length')\n\n# 3 数据集的划分\nX_train,X_test,y_train,y_test = train_test_split(iris.data,iris.target,test_size=0.2,random_state=44)\n# print(\"训练集的特征值:\\n\",X_train)\n# print(\"训练集的目标值:\\n\",y_train)\n# print(\"测试集的特征值:\\n\",X_test)\n# print(\"测试集的特征值:\\n\",y_test)\n\n# 4 归一化\ntransfer = StandardScaler()\nX_train = transfer.fit_transform(X_train)\nX_test = transfer.transform(X_test)\n# print(\"归一化的,X_train:\\n\",X_train)\n# print(\"归一化的X_test:\\n\",X_test)\n\n# 5 机器学习 KNN\n# 5.1 实例化估计器\nestimator = KNeighborsClassifier(n_neighbors=9)\n# 5.2 进行训练\nestimator.fit(X_train,y_train)\n\n# 6 模型评估\ny_pred = estimator.predict(X_test)\nprint(\"预测值:\\n\",y_pre)\nprint(\"预测值与真实值是否相等:\\n\",y_pred==y_test)\naccuracy = accuracy_score(y_test, y_pred)\nprint(f\"\\nKNN 模型的准确率: {accuracy:.4f}\")\n```\n\n![](/img/machinelearning/knn-01.png)\n### 交叉验证与网格搜索\n```python\nimport seaborn as sns\nimport matplotlib.pyplot as plt \nimport pandas as pd\nfrom sklearn.model_selection import train_test_split,GridSearchCV\nfrom sklearn.datasets import load_iris\nfrom sklearn.preprocessing import MinMaxScaler,StandardScaler\nfrom sklearn.neighbors import KNeighborsClassifier\nfrom sklearn.metrics import accuracy_score\n\n# 1 数据集获取\niris = load_iris()\niris_data = pd.DataFrame(iris.data,columns=['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width'])\niris_data['target'] = iris.target\n\n# 3 数据集的划分\nX_train,X_test,y_train,y_test = train_test_split(iris.data,iris.target,test_size=0.2)\n\n# 4 归一化\ntransfer = StandardScaler()\nX_train = transfer.fit_transform(X_train)\nX_test = transfer.transform(X_test)\n\n# 5 机器学习 KNN\n# 5.1 实例化估计器\n#\n#不指定 n_neighbors ,使用网格搜索进行循环训练\nestimator = KNeighborsClassifier()\n# 5.2 模型调优 -- 交叉验证,网格搜素\nestimator = GridSearchCV(estimator,param_grid={\"n_neighbors\":[1,3,5,7]},cv=5) # 5 折\n# 5.2 进行训练\nestimator.fit(X_train,y_train)\n \n# 6 模型评估\ny_pred = estimator.predict(X_test)\nprint(\"预测值:\\n\",y_pred)\nprint(\"预测值与真实值是否相等:\\n\",y_pred==y_test)\naccuracy = accuracy_score(y_test, y_pred)\nprint(f\"\\nKNN 模型的准确率: {accuracy:.4f}\")\n\n# 交叉验证的相关参数\nprint(f\"最好结果:{estimator.best_score_}\")\nprint(f\"最好模型:{estimator.best_estimator_}\")\nprint(f\"最好模型结果:{estimator.cv_results_}\")\n```\n![](/img/machinelearning/cros-valid.png)\n\n### 机器学习的基本步骤\n- 获取数据集\n- 数据集基本处理\n - 去重去空、填充等操作 \n - 确定特征值和目标值\n - 分割数据集\n- 特征工程(特征预处理 标准化等)\n- 机器学习\n- 模型评估\n\n### 数据分割的方法\n- 留出法\n 训练/测试集的划分要尽可能保持数据分布的一致性,避免因数据划分过程引入额外的偏差而对最终结果产生影响。\n 单次使用留出法得到的估计结果往往不够稳定可靠,在使用留出法时,一般要采用若干次随机划分、重复进行实验评估后取平均值作为留出法的评估结果。\n ``` python\n\tfrom sklearn.model_selection import KFold,StratifiedKFold\n\timport pandas as pd\n\tX = np.array([\n\t[1,2,3,4],\n\t[11,12,13,14],\n\t[21,22,23,24],\n\t[31,32,33,34],\n\t[41,42,43,44],\n\t[51,52,53,54],\n\t[61,62,63,64],\n\t[71,72,73,74]\n\t])\n\ty=np.array([1,1,0,0,1,1,0,0])\n\tfolder = KFold(n_splits=4)\n\tsfloder = StratifiedKFold(n_splits=4)\n\tprint(\"KFOLD:\")\n\tfor train,test in folder.split(X,y):\n\t print(f\"train:{train},test:{test}\")\n\tprint(\"SKFOLD:\")\n\tfor train,test in sfloder.split(X,y):\n print(f\"train:{train},test:{test}\")\n\t```\n\t![](/img/machinelearning/kfold-skfold.png)\n- 自助法\n- 交叉验证法","source":"_posts/machinelearning/knn.md","raw":"---\ntitle: k近邻算法(K-Nearest Neighbors)KNN\ntags: KNN\ncategories: machinelearning\nabbrlink: 29139\nmathjax: true\ndate: 2025-01-13 17:20:59\n---\n## **k近邻算法(K-Nearest Neighbors)KNN**\n将当前样本的类别归类于距离最近的**k**个样本的类别\n\n#### **距离公式(2维)**\n\n- 欧式距离\n$$\nd = \\sqrt{(x_1-y_1)^2 + (x_2 - y_2)^2}\n$$\n- 曼哈顿距离\n$$\nd = |x_1 - x_2| + |y_1 - y_2|\n$$\n- 切比雪夫距离\n$$\nd = \\max\\left(|x_1 - x_2|, |y_1 - y_2|\\right)\n$$\n#### k值选择问题\n\n| k值 | 影响 |\n| --- | ------------------ |\n| 越大 | 模型过拟合,准确率波动较大 |\n| 越小 | 模型欠拟合,准确率趋于稳定但可能较低 |\n### 特征预处理\n> 通过一些转换函数将特征数据转换成更加适合算法模型的特征数据过程 \n- 归一化\n 将数据变换到指定区间(默认是\\[0,1\\])\n $$ x' = \\frac{x- x_{\\text {min}}}{x_{\\text{max}} - x_{\\text{min}}} $$\n 若需要缩放到任意区间 \\(\\[a, b\\]\\),公式为: $$ x' = a + \\frac{(x - x_{\\text{min}}) \\cdot (b - a)}{x_{\\text{max}} - x_{\\text{min}}} $$\n 其中:\\( \\[a, b\\] \\):目标区间的范围\n 归一化受到数据集的异常值的影响,需要进行标准化处理(更加合理)\n ``` python\n\t from sklearn.preprocessing import MinMaxScaler # 归一化\n\t```\n- 标准化\n 将数据调整为均值为 0,标准差为 1 的标准正态分布\n $$ z = \\frac{x - \\mu}{\\sigma} $$\n \\( z \\):标准化后的值 \\( x \\):原始数据值 \\( $\\mu$ \\):数据的均值 \\( $\\sigma$\\):数据的标准差\n \n ``` python\n \t from sklearn.preprocessing import StandardScaler # 标准化\n ```\n\n### KNN代码实现\n```python\nimport seaborn as sns\nimport matplotlib.pyplot as plt \nimport pandas as pd\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.datasets import load_iris\nfrom sklearn.preprocessing import MinMaxScaler,StandardScaler\nfrom sklearn.neighbors import KNeighborsClassifier\nfrom sklearn.metrics import accuracy_score\n\n# 1 数据集获取\niris = load_iris()\n# print(iris.feature_names)\niris_data = pd.DataFrame(iris.data,columns=['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width'])\niris_data['target'] = iris.target\n\ndef iris_plot(data,col1,col2):\n sns.lmplot(x=col1,y=col2,data=data,hue=\"target\",fit_reg=False)\n plt.show()\n# 2 数据集可视化\n# iris_plot(iris_data, 'Sepal_Width', 'Petal_Length')\n\n# 3 数据集的划分\nX_train,X_test,y_train,y_test = train_test_split(iris.data,iris.target,test_size=0.2,random_state=44)\n# print(\"训练集的特征值:\\n\",X_train)\n# print(\"训练集的目标值:\\n\",y_train)\n# print(\"测试集的特征值:\\n\",X_test)\n# print(\"测试集的特征值:\\n\",y_test)\n\n# 4 归一化\ntransfer = StandardScaler()\nX_train = transfer.fit_transform(X_train)\nX_test = transfer.transform(X_test)\n# print(\"归一化的,X_train:\\n\",X_train)\n# print(\"归一化的X_test:\\n\",X_test)\n\n# 5 机器学习 KNN\n# 5.1 实例化估计器\nestimator = KNeighborsClassifier(n_neighbors=9)\n# 5.2 进行训练\nestimator.fit(X_train,y_train)\n\n# 6 模型评估\ny_pred = estimator.predict(X_test)\nprint(\"预测值:\\n\",y_pre)\nprint(\"预测值与真实值是否相等:\\n\",y_pred==y_test)\naccuracy = accuracy_score(y_test, y_pred)\nprint(f\"\\nKNN 模型的准确率: {accuracy:.4f}\")\n```\n\n![](/img/machinelearning/knn-01.png)\n### 交叉验证与网格搜索\n```python\nimport seaborn as sns\nimport matplotlib.pyplot as plt \nimport pandas as pd\nfrom sklearn.model_selection import train_test_split,GridSearchCV\nfrom sklearn.datasets import load_iris\nfrom sklearn.preprocessing import MinMaxScaler,StandardScaler\nfrom sklearn.neighbors import KNeighborsClassifier\nfrom sklearn.metrics import accuracy_score\n\n# 1 数据集获取\niris = load_iris()\niris_data = pd.DataFrame(iris.data,columns=['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width'])\niris_data['target'] = iris.target\n\n# 3 数据集的划分\nX_train,X_test,y_train,y_test = train_test_split(iris.data,iris.target,test_size=0.2)\n\n# 4 归一化\ntransfer = StandardScaler()\nX_train = transfer.fit_transform(X_train)\nX_test = transfer.transform(X_test)\n\n# 5 机器学习 KNN\n# 5.1 实例化估计器\n#\n#不指定 n_neighbors ,使用网格搜索进行循环训练\nestimator = KNeighborsClassifier()\n# 5.2 模型调优 -- 交叉验证,网格搜素\nestimator = GridSearchCV(estimator,param_grid={\"n_neighbors\":[1,3,5,7]},cv=5) # 5 折\n# 5.2 进行训练\nestimator.fit(X_train,y_train)\n \n# 6 模型评估\ny_pred = estimator.predict(X_test)\nprint(\"预测值:\\n\",y_pred)\nprint(\"预测值与真实值是否相等:\\n\",y_pred==y_test)\naccuracy = accuracy_score(y_test, y_pred)\nprint(f\"\\nKNN 模型的准确率: {accuracy:.4f}\")\n\n# 交叉验证的相关参数\nprint(f\"最好结果:{estimator.best_score_}\")\nprint(f\"最好模型:{estimator.best_estimator_}\")\nprint(f\"最好模型结果:{estimator.cv_results_}\")\n```\n![](/img/machinelearning/cros-valid.png)\n\n### 机器学习的基本步骤\n- 获取数据集\n- 数据集基本处理\n - 去重去空、填充等操作 \n - 确定特征值和目标值\n - 分割数据集\n- 特征工程(特征预处理 标准化等)\n- 机器学习\n- 模型评估\n\n### 数据分割的方法\n- 留出法\n 训练/测试集的划分要尽可能保持数据分布的一致性,避免因数据划分过程引入额外的偏差而对最终结果产生影响。\n 单次使用留出法得到的估计结果往往不够稳定可靠,在使用留出法时,一般要采用若干次随机划分、重复进行实验评估后取平均值作为留出法的评估结果。\n ``` python\n\tfrom sklearn.model_selection import KFold,StratifiedKFold\n\timport pandas as pd\n\tX = np.array([\n\t[1,2,3,4],\n\t[11,12,13,14],\n\t[21,22,23,24],\n\t[31,32,33,34],\n\t[41,42,43,44],\n\t[51,52,53,54],\n\t[61,62,63,64],\n\t[71,72,73,74]\n\t])\n\ty=np.array([1,1,0,0,1,1,0,0])\n\tfolder = KFold(n_splits=4)\n\tsfloder = StratifiedKFold(n_splits=4)\n\tprint(\"KFOLD:\")\n\tfor train,test in folder.split(X,y):\n\t print(f\"train:{train},test:{test}\")\n\tprint(\"SKFOLD:\")\n\tfor train,test in sfloder.split(X,y):\n print(f\"train:{train},test:{test}\")\n\t```\n\t![](/img/machinelearning/kfold-skfold.png)\n- 自助法\n- 交叉验证法","slug":"machinelearning/knn","published":1,"updated":"2025-01-24T10:16:05.657Z","comments":1,"layout":"post","photos":[],"_id":"cmgsv0jnj000f3wah7exd6oi7","content":"

k近邻算法(K-Nearest Neighbors)KNN

将当前样本的类别归类于距离最近的k个样本的类别

\n

距离公式(2维)

\n

k值选择问题

\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n
k值影响
越大模型过拟合,准确率波动较大
越小模型欠拟合,准确率趋于稳定但可能较低
\n

特征预处理

\n

通过一些转换函数将特征数据转换成更加适合算法模型的特征数据过程

\n
\n\n

KNN代码实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# 1 数据集获取
iris = load_iris()
# print(iris.feature_names)
iris_data = pd.DataFrame(iris.data,columns=['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width'])
iris_data['target'] = iris.target

def iris_plot(data,col1,col2):
sns.lmplot(x=col1,y=col2,data=data,hue="target",fit_reg=False)
plt.show()
# 2 数据集可视化
# iris_plot(iris_data, 'Sepal_Width', 'Petal_Length')

# 3 数据集的划分
X_train,X_test,y_train,y_test = train_test_split(iris.data,iris.target,test_size=0.2,random_state=44)
# print("训练集的特征值:\\n",X_train)
# print("训练集的目标值:\\n",y_train)
# print("测试集的特征值:\\n",X_test)
# print("测试集的特征值:\\n",y_test)

# 4 归一化
transfer = StandardScaler()
X_train = transfer.fit_transform(X_train)
X_test = transfer.transform(X_test)
# print("归一化的,X_train:\\n",X_train)
# print("归一化的X_test:\\n",X_test)

# 5 机器学习 KNN
# 5.1 实例化估计器
estimator = KNeighborsClassifier(n_neighbors=9)
# 5.2 进行训练
estimator.fit(X_train,y_train)

# 6 模型评估
y_pred = estimator.predict(X_test)
print("预测值:\\n",y_pre)
print("预测值与真实值是否相等:\\n",y_pred==y_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"\\nKNN 模型的准确率: {accuracy:.4f}")
\n\n

\n

交叉验证与网格搜索

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# 1 数据集获取
iris = load_iris()
iris_data = pd.DataFrame(iris.data,columns=['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width'])
iris_data['target'] = iris.target

# 3 数据集的划分
X_train,X_test,y_train,y_test = train_test_split(iris.data,iris.target,test_size=0.2)

# 4 归一化
transfer = StandardScaler()
X_train = transfer.fit_transform(X_train)
X_test = transfer.transform(X_test)

# 5 机器学习 KNN
# 5.1 实例化估计器
#
#不指定 <code> n_neighbors </code> ,使用网格搜索进行循环训练
estimator = KNeighborsClassifier()
# 5.2 模型调优 -- 交叉验证,网格搜素
estimator = GridSearchCV(estimator,param_grid={"n_neighbors":[1,3,5,7]},cv=5) # 5 折
# 5.2 进行训练
estimator.fit(X_train,y_train)

# 6 模型评估
y_pred = estimator.predict(X_test)
print("预测值:\\n",y_pred)
print("预测值与真实值是否相等:\\n",y_pred==y_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"\\nKNN 模型的准确率: {accuracy:.4f}")

# 交叉验证的相关参数
print(f"最好结果:{estimator.best_score_}")
print(f"最好模型:{estimator.best_estimator_}")
print(f"最好模型结果:{estimator.cv_results_}")
\n

\n

机器学习的基本步骤

\n

数据分割的方法

\n","cover":false,"excerpt":"","more":"

k近邻算法(K-Nearest Neighbors)KNN

将当前样本的类别归类于距离最近的k个样本的类别

\n

距离公式(2维)

\n

k值选择问题

\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n
k值影响
越大模型过拟合,准确率波动较大
越小模型欠拟合,准确率趋于稳定但可能较低
\n

特征预处理

\n

通过一些转换函数将特征数据转换成更加适合算法模型的特征数据过程

\n
\n\n

KNN代码实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# 1 数据集获取
iris = load_iris()
# print(iris.feature_names)
iris_data = pd.DataFrame(iris.data,columns=['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width'])
iris_data['target'] = iris.target

def iris_plot(data,col1,col2):
sns.lmplot(x=col1,y=col2,data=data,hue="target",fit_reg=False)
plt.show()
# 2 数据集可视化
# iris_plot(iris_data, 'Sepal_Width', 'Petal_Length')

# 3 数据集的划分
X_train,X_test,y_train,y_test = train_test_split(iris.data,iris.target,test_size=0.2,random_state=44)
# print("训练集的特征值:\\n",X_train)
# print("训练集的目标值:\\n",y_train)
# print("测试集的特征值:\\n",X_test)
# print("测试集的特征值:\\n",y_test)

# 4 归一化
transfer = StandardScaler()
X_train = transfer.fit_transform(X_train)
X_test = transfer.transform(X_test)
# print("归一化的,X_train:\\n",X_train)
# print("归一化的X_test:\\n",X_test)

# 5 机器学习 KNN
# 5.1 实例化估计器
estimator = KNeighborsClassifier(n_neighbors=9)
# 5.2 进行训练
estimator.fit(X_train,y_train)

# 6 模型评估
y_pred = estimator.predict(X_test)
print("预测值:\\n",y_pre)
print("预测值与真实值是否相等:\\n",y_pred==y_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"\\nKNN 模型的准确率: {accuracy:.4f}")
\n\n

\n

交叉验证与网格搜索

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# 1 数据集获取
iris = load_iris()
iris_data = pd.DataFrame(iris.data,columns=['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width'])
iris_data['target'] = iris.target

# 3 数据集的划分
X_train,X_test,y_train,y_test = train_test_split(iris.data,iris.target,test_size=0.2)

# 4 归一化
transfer = StandardScaler()
X_train = transfer.fit_transform(X_train)
X_test = transfer.transform(X_test)

# 5 机器学习 KNN
# 5.1 实例化估计器
#
#不指定 <code> n_neighbors </code> ,使用网格搜索进行循环训练
estimator = KNeighborsClassifier()
# 5.2 模型调优 -- 交叉验证,网格搜素
estimator = GridSearchCV(estimator,param_grid={"n_neighbors":[1,3,5,7]},cv=5) # 5 折
# 5.2 进行训练
estimator.fit(X_train,y_train)

# 6 模型评估
y_pred = estimator.predict(X_test)
print("预测值:\\n",y_pred)
print("预测值与真实值是否相等:\\n",y_pred==y_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"\\nKNN 模型的准确率: {accuracy:.4f}")

# 交叉验证的相关参数
print(f"最好结果:{estimator.best_score_}")
print(f"最好模型:{estimator.best_estimator_}")
print(f"最好模型结果:{estimator.cv_results_}")
\n

\n

机器学习的基本步骤

\n

数据分割的方法

\n"},{"title":"线性回归","mathjax":true,"abbrlink":52662,"date":"2025-01-19T08:46:51.000Z","_content":"\n### 线性回归简介\n>用于预测一个连续的目标变量(因变量),与一个或多个特征(自变量)之间存在线性关系。\n\n假设函数: \n$$y = w_1x_1 + w_2x_2 + \\cdot\\cdot\\cdot+w_nx_n$$\n- $y$ 是目标变量(因变量),即我们希望预测的值。\n- $x1​,x2​,…,xn$​ 是特征变量(自变量),即输入的值。\n### 损失函数\n\n为了找到最佳的线性模型,我们需要通过最小化损失函数来优化模型参数。在线性回归中,常用的损失函数是 **均方误差(MSE)**:\n$$J(\\theta) = \\frac{1}{2N} \\sum_{i=1}^{N} (y_i - f_\\theta(x_i))^2$$\n- N 是样本的数量。\n- $y_i$​ 是第 i 个样本的真实值。\n- $f_\\theta(x_i)$ 是模型预测的第 i 个样本的值。\n\n### 线性回归优化\n\n- 梯度下降法\n ```python\n from sklearn.datasets import fetch_california_housing\n from sklearn.model_selection import train_test_split\n from sklearn.preprocessing import StandardScaler\n from sklearn.linear_model import SGDRegressor\n from sklearn.metrics import mean_squared_error\n\n # 1. 获取数据集\n housing = fetch_california_housing()\n\n # 2. 数据集处理\n # 2.1 分割数据集\n X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, test_size=0.25)\n\n # 3. 特征工程\n # 3.1 标准化\n transfer = StandardScaler()\n X_train = transfer.fit_transform(X_train)\n X_test = transfer.transform(X_test) # 使用 transform() 而不是 fit_transform()\n\n # 4.机器学习- 梯度下降法\n estimater = SGDRegressor(max_iter=1000, eta0=0.01)\n estimater.fit(X_train, y_train)\n print(f\"SGD模型的偏置是:{estimater.intercept_}\")\n print(f\"SGD模型的系数是:{estimater.coef_}\")\n\n # 5. 模型评估\n y_pred = estimater.predict(X_test)\n print(f\"SGD模型预测值:{y_pred}\")\n mse = mean_squared_error(y_test, y_pred)\n print(f\"SGD模型均方误差:{mse}\")\n ```\n\n- 正规方程\n ```python\n from sklearn.datasets import fetch_california_housing\n from sklearn.model_selection import train_test_split\n from sklearn.preprocessing import StandardScaler\n from sklearn.linear_model import LinearRegression\n from sklearn.metrics import mean_squared_error\n\n # 1. 获取数据集\n housing = fetch_california_housing()\n\n # 2. 数据集处理\n # 2.1 分割数据集\n X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, test_size=0.25)\n\n # 3. 特征工程\n # 3.1 标准化\n transfer = StandardScaler()\n X_train = transfer.fit_transform(X_train)\n X_test = transfer.fit_transform(X_test)\n\n # 4.机器学习- 线性回归\n estimater = LinearRegression()\n estimater.fit(X_train, y_train)\n print(f\"模型的偏置是:{estimater.intercept_}\")\n print(f\"模型的系数是:{estimater.coef_}\")\n\n # 5. 模型评估\n y_pred = estimater.predict(X_test)\n print(f\"模型预测值:{y_pred}\")\n mse = mean_squared_error(y_test, y_pred)\n print(f\"模型均方误差:{mse}\")\n ```\n\n- 岭回归\n ```python\n from sklearn.datasets import fetch_california_housing\n from sklearn.model_selection import train_test_split\n from sklearn.preprocessing import StandardScaler\n from sklearn.linear_model import Ridge, RidgeCV\n from sklearn.metrics import mean_squared_error\n\n # 1. 获取数据集\n housing = fetch_california_housing()\n\n # 2. 数据集处理\n # 2.1 分割数据集\n X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, test_size=0.25)\n\n # 3. 特征工程\n # 3.1 标准化\n transfer = StandardScaler()\n X_train = transfer.fit_transform(X_train)\n X_test = transfer.transform(X_test) # 使用 transform() 而不是 fit_transform()\n\n # 4.机器学习- 岭回归 使用了Ridge的alpha的搜索\n # estimater = Ridge(alpha=1.0)\n estimater = RidgeCV(alphas=[0.001, 0.01, 0.1, 1, 10, 100])\n estimater.fit(X_train, y_train)\n print(f\"Ridge模型的偏置是:{estimater.intercept_}\")\n print(f\"Ridge模型的系数是:{estimater.coef_}\")\n\n # 查看最佳 alpha\n print(f\"最佳 alpha 值是:{estimater.alpha_}\")\n\n # 5. 模型评估\n y_pred = estimater.predict(X_test)\n print(f\"Ridge模型预测值:{y_pred}\")\n mse = mean_squared_error(y_test, y_pred)\n print(f\"Ridge模型均方误差:{mse}\")\n ```\n\n这样每个代码块的缩进保持一致,便于阅读和理解。如果有其他优化需求,随时告诉我!\n\n\n![](/img/machinelearning/linear.png)\n\n![](/img/machinelearning/fitting.png)\n### 模型保存和加载\n```python\nfrom sklearn.datasets import fetch_california_housing\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.linear_model import Ridge, RidgeCV\nfrom sklearn.metrics import mean_squared_error\nimport joblib\n\ndef save_model():\n # 1. 获取数据集\n housing = fetch_california_housing()\n # 2. 数据集处理\n # 2.1 分割数据集\n X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, test_size=0.25)\n # 3. 特征工程\n # 3.1 标准化\n transfer = StandardScaler()\n X_train = transfer.fit_transform(X_train)\n X_test = transfer.transform(X_test) # 使用 transform() 而不是 fit_transform()\n # 4. 机器学习 - 岭回归 使用了Ridge的alpha的搜索\n estimater = RidgeCV(alphas=[0.001, 0.01, 0.1, 1, 10, 100])\n estimater.fit(X_train, y_train)\n print(f\"Ridge模型的偏置是:{estimater.intercept_}\")\n print(f\"Ridge模型的系数是:{estimater.coef_}\")\n # 保存模型\n joblib.dump(estimater, 'ridge_model.pkl')\n # 查看最佳 alpha\n print(f\"最佳 alpha 值是:{estimater.alpha_}\")\n # 5. 模型评估\n y_pred = estimater.predict(X_test)\n mse = mean_squared_error(y_test, y_pred)\n print(f\"Ridge模型均方误差:{mse}\")\n\ndef load_model():\n # 1. 获取数据集\n housing = fetch_california_housing()\n # 2. 数据集处理\n # 2.1 分割数据集\n X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, test_size=0.25)\n # 3. 特征工程\n # 3.1 标准化\n transfer = StandardScaler()\n X_train = transfer.fit_transform(X_train)\n X_test = transfer.transform(X_test) # 使用 transform() 而不是 fit_transform()\n # 加载模型\n estimater = joblib.load('ridge_model.pkl')\n print(f\"Ridge模型的偏置是:{estimater.intercept_}\")\n print(f\"Ridge模型的系数是:{estimater.coef_}\")\n # 查看最佳 alpha\n print(f\"最佳 alpha 值是:{estimater.alpha_}\")\n # 5. 模型评估\n y_pred = estimater.predict(X_test)\n mse = mean_squared_error(y_test, y_pred)\n print(f\"Ridge模型预测值:{y_pred}\")\n print(f\"Ridge模型均方误差:{mse}\")\n\nprint(\"训练并保存模型:\")\nsave_model()\nprint(\"加载模型\")\nload_model()\n```","source":"_posts/machinelearning/linearreression.md","raw":"---\ntitle: 线性回归\ntags: linear-regression\ncategories: machinelearning\nmathjax: true\nabbrlink: 52662\ndate: 2025-01-19 16:46:51\n---\n\n### 线性回归简介\n>用于预测一个连续的目标变量(因变量),与一个或多个特征(自变量)之间存在线性关系。\n\n假设函数: \n$$y = w_1x_1 + w_2x_2 + \\cdot\\cdot\\cdot+w_nx_n$$\n- $y$ 是目标变量(因变量),即我们希望预测的值。\n- $x1​,x2​,…,xn$​ 是特征变量(自变量),即输入的值。\n### 损失函数\n\n为了找到最佳的线性模型,我们需要通过最小化损失函数来优化模型参数。在线性回归中,常用的损失函数是 **均方误差(MSE)**:\n$$J(\\theta) = \\frac{1}{2N} \\sum_{i=1}^{N} (y_i - f_\\theta(x_i))^2$$\n- N 是样本的数量。\n- $y_i$​ 是第 i 个样本的真实值。\n- $f_\\theta(x_i)$ 是模型预测的第 i 个样本的值。\n\n### 线性回归优化\n\n- 梯度下降法\n ```python\n from sklearn.datasets import fetch_california_housing\n from sklearn.model_selection import train_test_split\n from sklearn.preprocessing import StandardScaler\n from sklearn.linear_model import SGDRegressor\n from sklearn.metrics import mean_squared_error\n\n # 1. 获取数据集\n housing = fetch_california_housing()\n\n # 2. 数据集处理\n # 2.1 分割数据集\n X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, test_size=0.25)\n\n # 3. 特征工程\n # 3.1 标准化\n transfer = StandardScaler()\n X_train = transfer.fit_transform(X_train)\n X_test = transfer.transform(X_test) # 使用 transform() 而不是 fit_transform()\n\n # 4.机器学习- 梯度下降法\n estimater = SGDRegressor(max_iter=1000, eta0=0.01)\n estimater.fit(X_train, y_train)\n print(f\"SGD模型的偏置是:{estimater.intercept_}\")\n print(f\"SGD模型的系数是:{estimater.coef_}\")\n\n # 5. 模型评估\n y_pred = estimater.predict(X_test)\n print(f\"SGD模型预测值:{y_pred}\")\n mse = mean_squared_error(y_test, y_pred)\n print(f\"SGD模型均方误差:{mse}\")\n ```\n\n- 正规方程\n ```python\n from sklearn.datasets import fetch_california_housing\n from sklearn.model_selection import train_test_split\n from sklearn.preprocessing import StandardScaler\n from sklearn.linear_model import LinearRegression\n from sklearn.metrics import mean_squared_error\n\n # 1. 获取数据集\n housing = fetch_california_housing()\n\n # 2. 数据集处理\n # 2.1 分割数据集\n X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, test_size=0.25)\n\n # 3. 特征工程\n # 3.1 标准化\n transfer = StandardScaler()\n X_train = transfer.fit_transform(X_train)\n X_test = transfer.fit_transform(X_test)\n\n # 4.机器学习- 线性回归\n estimater = LinearRegression()\n estimater.fit(X_train, y_train)\n print(f\"模型的偏置是:{estimater.intercept_}\")\n print(f\"模型的系数是:{estimater.coef_}\")\n\n # 5. 模型评估\n y_pred = estimater.predict(X_test)\n print(f\"模型预测值:{y_pred}\")\n mse = mean_squared_error(y_test, y_pred)\n print(f\"模型均方误差:{mse}\")\n ```\n\n- 岭回归\n ```python\n from sklearn.datasets import fetch_california_housing\n from sklearn.model_selection import train_test_split\n from sklearn.preprocessing import StandardScaler\n from sklearn.linear_model import Ridge, RidgeCV\n from sklearn.metrics import mean_squared_error\n\n # 1. 获取数据集\n housing = fetch_california_housing()\n\n # 2. 数据集处理\n # 2.1 分割数据集\n X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, test_size=0.25)\n\n # 3. 特征工程\n # 3.1 标准化\n transfer = StandardScaler()\n X_train = transfer.fit_transform(X_train)\n X_test = transfer.transform(X_test) # 使用 transform() 而不是 fit_transform()\n\n # 4.机器学习- 岭回归 使用了Ridge的alpha的搜索\n # estimater = Ridge(alpha=1.0)\n estimater = RidgeCV(alphas=[0.001, 0.01, 0.1, 1, 10, 100])\n estimater.fit(X_train, y_train)\n print(f\"Ridge模型的偏置是:{estimater.intercept_}\")\n print(f\"Ridge模型的系数是:{estimater.coef_}\")\n\n # 查看最佳 alpha\n print(f\"最佳 alpha 值是:{estimater.alpha_}\")\n\n # 5. 模型评估\n y_pred = estimater.predict(X_test)\n print(f\"Ridge模型预测值:{y_pred}\")\n mse = mean_squared_error(y_test, y_pred)\n print(f\"Ridge模型均方误差:{mse}\")\n ```\n\n这样每个代码块的缩进保持一致,便于阅读和理解。如果有其他优化需求,随时告诉我!\n\n\n![](/img/machinelearning/linear.png)\n\n![](/img/machinelearning/fitting.png)\n### 模型保存和加载\n```python\nfrom sklearn.datasets import fetch_california_housing\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.linear_model import Ridge, RidgeCV\nfrom sklearn.metrics import mean_squared_error\nimport joblib\n\ndef save_model():\n # 1. 获取数据集\n housing = fetch_california_housing()\n # 2. 数据集处理\n # 2.1 分割数据集\n X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, test_size=0.25)\n # 3. 特征工程\n # 3.1 标准化\n transfer = StandardScaler()\n X_train = transfer.fit_transform(X_train)\n X_test = transfer.transform(X_test) # 使用 transform() 而不是 fit_transform()\n # 4. 机器学习 - 岭回归 使用了Ridge的alpha的搜索\n estimater = RidgeCV(alphas=[0.001, 0.01, 0.1, 1, 10, 100])\n estimater.fit(X_train, y_train)\n print(f\"Ridge模型的偏置是:{estimater.intercept_}\")\n print(f\"Ridge模型的系数是:{estimater.coef_}\")\n # 保存模型\n joblib.dump(estimater, 'ridge_model.pkl')\n # 查看最佳 alpha\n print(f\"最佳 alpha 值是:{estimater.alpha_}\")\n # 5. 模型评估\n y_pred = estimater.predict(X_test)\n mse = mean_squared_error(y_test, y_pred)\n print(f\"Ridge模型均方误差:{mse}\")\n\ndef load_model():\n # 1. 获取数据集\n housing = fetch_california_housing()\n # 2. 数据集处理\n # 2.1 分割数据集\n X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, test_size=0.25)\n # 3. 特征工程\n # 3.1 标准化\n transfer = StandardScaler()\n X_train = transfer.fit_transform(X_train)\n X_test = transfer.transform(X_test) # 使用 transform() 而不是 fit_transform()\n # 加载模型\n estimater = joblib.load('ridge_model.pkl')\n print(f\"Ridge模型的偏置是:{estimater.intercept_}\")\n print(f\"Ridge模型的系数是:{estimater.coef_}\")\n # 查看最佳 alpha\n print(f\"最佳 alpha 值是:{estimater.alpha_}\")\n # 5. 模型评估\n y_pred = estimater.predict(X_test)\n mse = mean_squared_error(y_test, y_pred)\n print(f\"Ridge模型预测值:{y_pred}\")\n print(f\"Ridge模型均方误差:{mse}\")\n\nprint(\"训练并保存模型:\")\nsave_model()\nprint(\"加载模型\")\nload_model()\n```","slug":"machinelearning/linearreression","published":1,"updated":"2025-02-17T08:31:26.908Z","comments":1,"layout":"post","photos":[],"_id":"cmgsv0jnk000g3wah2r7mcppb","content":"

线性回归简介

\n

用于预测一个连续的目标变量(因变量),与一个或多个特征(自变量)之间存在线性关系。

\n
\n

假设函数:
$$y = w_1x_1 + w_2x_2 + \\cdot\\cdot\\cdot+w_nx_n$$

\n\n

损失函数

为了找到最佳的线性模型,我们需要通过最小化损失函数来优化模型参数。在线性回归中,常用的损失函数是 均方误差(MSE)
$$J(\\theta) = \\frac{1}{2N} \\sum_{i=1}^{N} (y_i - f_\\theta(x_i))^2$$

\n\n

线性回归优化

\n

这样每个代码块的缩进保持一致,便于阅读和理解。如果有其他优化需求,随时告诉我!

\n

\n

\n

模型保存和加载

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge, RidgeCV
from sklearn.metrics import mean_squared_error
import joblib

def save_model():
# 1. 获取数据集
housing = fetch_california_housing()
# 2. 数据集处理
# 2.1 分割数据集
X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, test_size=0.25)
# 3. 特征工程
# 3.1 标准化
transfer = StandardScaler()
X_train = transfer.fit_transform(X_train)
X_test = transfer.transform(X_test) # 使用 transform() 而不是 fit_transform()
# 4. 机器学习 - 岭回归 使用了Ridge的alpha的搜索
estimater = RidgeCV(alphas=[0.001, 0.01, 0.1, 1, 10, 100])
estimater.fit(X_train, y_train)
print(f"Ridge模型的偏置是:{estimater.intercept_}")
print(f"Ridge模型的系数是:{estimater.coef_}")
# 保存模型
joblib.dump(estimater, 'ridge_model.pkl')
# 查看最佳 alpha
print(f"最佳 alpha 值是:{estimater.alpha_}")
# 5. 模型评估
y_pred = estimater.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Ridge模型均方误差:{mse}")

def load_model():
# 1. 获取数据集
housing = fetch_california_housing()
# 2. 数据集处理
# 2.1 分割数据集
X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, test_size=0.25)
# 3. 特征工程
# 3.1 标准化
transfer = StandardScaler()
X_train = transfer.fit_transform(X_train)
X_test = transfer.transform(X_test) # 使用 transform() 而不是 fit_transform()
# 加载模型
estimater = joblib.load('ridge_model.pkl')
print(f"Ridge模型的偏置是:{estimater.intercept_}")
print(f"Ridge模型的系数是:{estimater.coef_}")
# 查看最佳 alpha
print(f"最佳 alpha 值是:{estimater.alpha_}")
# 5. 模型评估
y_pred = estimater.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Ridge模型预测值:{y_pred}")
print(f"Ridge模型均方误差:{mse}")

print("训练并保存模型:")
save_model()
print("加载模型")
load_model()
","cover":false,"excerpt":"","more":"

线性回归简介

\n

用于预测一个连续的目标变量(因变量),与一个或多个特征(自变量)之间存在线性关系。

\n
\n

假设函数:
$$y = w_1x_1 + w_2x_2 + \\cdot\\cdot\\cdot+w_nx_n$$

\n\n

损失函数

为了找到最佳的线性模型,我们需要通过最小化损失函数来优化模型参数。在线性回归中,常用的损失函数是 均方误差(MSE)
$$J(\\theta) = \\frac{1}{2N} \\sum_{i=1}^{N} (y_i - f_\\theta(x_i))^2$$

\n\n

线性回归优化

\n

这样每个代码块的缩进保持一致,便于阅读和理解。如果有其他优化需求,随时告诉我!

\n

\n

\n

模型保存和加载

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge, RidgeCV
from sklearn.metrics import mean_squared_error
import joblib

def save_model():
# 1. 获取数据集
housing = fetch_california_housing()
# 2. 数据集处理
# 2.1 分割数据集
X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, test_size=0.25)
# 3. 特征工程
# 3.1 标准化
transfer = StandardScaler()
X_train = transfer.fit_transform(X_train)
X_test = transfer.transform(X_test) # 使用 transform() 而不是 fit_transform()
# 4. 机器学习 - 岭回归 使用了Ridge的alpha的搜索
estimater = RidgeCV(alphas=[0.001, 0.01, 0.1, 1, 10, 100])
estimater.fit(X_train, y_train)
print(f"Ridge模型的偏置是:{estimater.intercept_}")
print(f"Ridge模型的系数是:{estimater.coef_}")
# 保存模型
joblib.dump(estimater, 'ridge_model.pkl')
# 查看最佳 alpha
print(f"最佳 alpha 值是:{estimater.alpha_}")
# 5. 模型评估
y_pred = estimater.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Ridge模型均方误差:{mse}")

def load_model():
# 1. 获取数据集
housing = fetch_california_housing()
# 2. 数据集处理
# 2.1 分割数据集
X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, test_size=0.25)
# 3. 特征工程
# 3.1 标准化
transfer = StandardScaler()
X_train = transfer.fit_transform(X_train)
X_test = transfer.transform(X_test) # 使用 transform() 而不是 fit_transform()
# 加载模型
estimater = joblib.load('ridge_model.pkl')
print(f"Ridge模型的偏置是:{estimater.intercept_}")
print(f"Ridge模型的系数是:{estimater.coef_}")
# 查看最佳 alpha
print(f"最佳 alpha 值是:{estimater.alpha_}")
# 5. 模型评估
y_pred = estimater.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Ridge模型预测值:{y_pred}")
print(f"Ridge模型均方误差:{mse}")

print("训练并保存模型:")
save_model()
print("加载模型")
load_model()
"},{"title":"逻辑回归","mathjax":true,"abbrlink":60504,"date":"2025-01-20T07:30:08.000Z","_content":"\n### logistic regression code\n\n```python\nimport pandas as pd\nimport numpy as np\nfrom sklearn.datasets import load_breast_cancer\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.linear_model import LogisticRegression\n# 1. 加载乳腺癌数据集\ndata = load_breast_cancer()\n# 2.1 数据集基本处理\ndf = pd.DataFrame(data.data, columns=data.feature_names)\ndf['target'] = data.target\nfor i in df.columns:\n # 检查列是否有缺失值\n if np.any(pd.isnull(df[i])):\n print(f\"Filling missing values in column: {i}\")\n#2.2 确认特征值、目标值\nX = df.iloc[:,0:df.shape[1] - 1]\ny = df.loc[:,\"target\"]\n# 2.3 分割数据\nX_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3)\n# 显示前几行数据\ndf.head(1)\n\n# 3. 特征工程 标准化\ntransfer = StandardScaler()\nX_train = transfer.fit_transform(X_train)\nX_test = transfer.transform(X_test)\n\n# 4 机器学习 逻辑回归\nestimator = LogisticRegression()\nestimator.fit(X_train,y_train)\n\n# 5. 模型评估\nprint(f\"模型准确率:{estimator.score(X_test,y_test)}\")\nprint(f\"模型预测值为:\\n{estimator.predict(X_test)}\")\n```\n\n### 分类评估的参数\n- 准确率 \n 准确率是所有预测正确的样本占总样本的比例 \n $$Accuracy = \\frac{TP+TN}{TP+FN+FP+TN}$$\n\n- 精准率 \n 精准率(又称查准率)是指所有被预测为正类的样本中,真正为正类的比例 \n $$Precision = \\frac{TP}{TP+FP}$$\n\n- 召回率 \n 召回率(又称查全率)是指所有实际为正类的样本中,被正确预测为正类的比例 \n $$Recall = \\frac{TP}{TP+FN}$$\n\n- F1-score \n F1 值(F1 Score)是精准率和召回率的调和平均数,综合考虑了精准率和召回率的影响。 \n $$ F1 = 2 \\times \\frac{\\text{Precision} \\times \\text{Recall}}{\\text{Precision} + \\text{Recall}} $$\n\n- roc曲线 \n tpr、fpr来衡量不平衡的二分类问题\n\n```python\n import pandas as pd\n import numpy as np\n from sklearn.datasets import load_breast_cancer\n from sklearn.model_selection import train_test_split\n from sklearn.preprocessing import StandardScaler\n from sklearn.linear_model import LogisticRegression\n from sklearn.metrics import classification_report, roc_auc_score\n # 1. 加载乳腺癌数据集\n data = load_breast_cancer()\n # 2.1 数据集基本处理\n df = pd.DataFrame(data.data, columns=data.feature_names)\n df['target'] = data.target\n for i in df.columns:\n # 检查列是否有缺失值\n if np.any(pd.isnull(df[i])):\n print(f\"Filling missing values in column: {i}\")\n # 2.2 确认特征值、目标值\n X = df.iloc[:, 0:df.shape[1] - 1]\n y = df.loc[:, \"target\"]\n # 2.3 分割数据\n X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)\n # 显示前几行数据\n df.head(1)\n \n # 3. 特征工程 标准化\n transfer = StandardScaler()\n X_train = transfer.fit_transform(X_train)\n X_test = transfer.transform(X_test)\n \n # 4 机器学习 逻辑回归\n estimator = LogisticRegression()\n estimator.fit(X_train, y_train)\n \n # 5. 模型评估\n print(f\"模型准确率:{estimator.score(X_test, y_test)}\")\n y_pred = estimator.predict(X_test)\n print(f\"模型预测值为:\\n{y_pred}\")\n # 5.1 精确率、召回率\n ret = classification_report(y_test, y_pred, labels=[1, 0], target_names=[\"良性\", \"恶性\"])\n roc_score = roc_auc_score(y_test, y_pred)\n print(f\"准确率、召回率:{ret}\")\n print(f\"roc_score:{roc_score}\")\n ```\n\n### 类别不平衡的处理\n先准备类别不平衡的数据\n\n```python\nfrom imblearn.over_sampling import RandomOverSampler,SMOTE\nfrom imblearn.under_sampling import RandomUnderSampler\nfrom sklearn.datasets import make_classification\nimport matplotlib.pyplot as plt\nfrom collections import Counter\n\n# 1.准备类别不平衡的数据\nX, y = make_classification(\n n_samples=5000,\n n_features=2,\n n_informative=2,\n n_redundant=0,\n n_repeated=0,\n n_classes=3,\n n_clusters_per_class=1,\n weights=[0.01, 0.05, 0.94],\n random_state=0,\n)\ncounter = Counter(y)\nplt.scatter(X[:,0],X[:,1],c=y)\nplt.show()\n```\n\n - 过采样 \n 增加训练集的少数的类别的样本,使得正反例样本数据接近 \n - 随机过采样(RandomOverSampler)\n ```python\n ros = RandomOverSampler()\n X_resampled,y_resampled = ros.fit_resample(X,y)\n print(Counter(y_resampled))\n plt.scatter(X_resampled[:,0],X_resampled[:,1],c=y_resampled)\n plt.show()\n ```\n ![](/img/machinelearning/over_random_sampling.png)\n - `SMOTE`过采样(SMOTE)\n ```python\n smote = SMOTE()\n X_resampled,y_resampled = smote.fit_resample(X,y)\n print(Counter(y_resampled))\n plt.scatter(X_resampled[:,0],X_resampled[:,1],c=y_resampled)\n plt.show()\n ```\n ![](/img/machinelearning/over_smote_sampling.png)\n - 欠采样 \n 减少训练集的多数的类别的样本,使得正反例样本数据接近\n - 随机欠采样(RandomUnderSampler)\n ```python\n rus = RandomUnderSampler(random_state=0)\n X_resampled,y_resampled = rus.fit_resample(X,y)\n print(Counter(y_resampled))\n plt.scatter(X_resampled[:,0],X_resampled[:,1],c=y_resampled)\n plt.show()\n ```\n ![](/img/machinelearning/under_sampling.png)\n\n","source":"_posts/machinelearning/logisticregression.md","raw":"---\ntitle: 逻辑回归\ntags: logistic-regression\ncategories: machinelearning\nmathjax: true\nabbrlink: 60504\ndate: 2025-01-20 15:30:08\n---\n\n### logistic regression code\n\n```python\nimport pandas as pd\nimport numpy as np\nfrom sklearn.datasets import load_breast_cancer\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.linear_model import LogisticRegression\n# 1. 加载乳腺癌数据集\ndata = load_breast_cancer()\n# 2.1 数据集基本处理\ndf = pd.DataFrame(data.data, columns=data.feature_names)\ndf['target'] = data.target\nfor i in df.columns:\n # 检查列是否有缺失值\n if np.any(pd.isnull(df[i])):\n print(f\"Filling missing values in column: {i}\")\n#2.2 确认特征值、目标值\nX = df.iloc[:,0:df.shape[1] - 1]\ny = df.loc[:,\"target\"]\n# 2.3 分割数据\nX_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3)\n# 显示前几行数据\ndf.head(1)\n\n# 3. 特征工程 标准化\ntransfer = StandardScaler()\nX_train = transfer.fit_transform(X_train)\nX_test = transfer.transform(X_test)\n\n# 4 机器学习 逻辑回归\nestimator = LogisticRegression()\nestimator.fit(X_train,y_train)\n\n# 5. 模型评估\nprint(f\"模型准确率:{estimator.score(X_test,y_test)}\")\nprint(f\"模型预测值为:\\n{estimator.predict(X_test)}\")\n```\n\n### 分类评估的参数\n- 准确率 \n 准确率是所有预测正确的样本占总样本的比例 \n $$Accuracy = \\frac{TP+TN}{TP+FN+FP+TN}$$\n\n- 精准率 \n 精准率(又称查准率)是指所有被预测为正类的样本中,真正为正类的比例 \n $$Precision = \\frac{TP}{TP+FP}$$\n\n- 召回率 \n 召回率(又称查全率)是指所有实际为正类的样本中,被正确预测为正类的比例 \n $$Recall = \\frac{TP}{TP+FN}$$\n\n- F1-score \n F1 值(F1 Score)是精准率和召回率的调和平均数,综合考虑了精准率和召回率的影响。 \n $$ F1 = 2 \\times \\frac{\\text{Precision} \\times \\text{Recall}}{\\text{Precision} + \\text{Recall}} $$\n\n- roc曲线 \n tpr、fpr来衡量不平衡的二分类问题\n\n```python\n import pandas as pd\n import numpy as np\n from sklearn.datasets import load_breast_cancer\n from sklearn.model_selection import train_test_split\n from sklearn.preprocessing import StandardScaler\n from sklearn.linear_model import LogisticRegression\n from sklearn.metrics import classification_report, roc_auc_score\n # 1. 加载乳腺癌数据集\n data = load_breast_cancer()\n # 2.1 数据集基本处理\n df = pd.DataFrame(data.data, columns=data.feature_names)\n df['target'] = data.target\n for i in df.columns:\n # 检查列是否有缺失值\n if np.any(pd.isnull(df[i])):\n print(f\"Filling missing values in column: {i}\")\n # 2.2 确认特征值、目标值\n X = df.iloc[:, 0:df.shape[1] - 1]\n y = df.loc[:, \"target\"]\n # 2.3 分割数据\n X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)\n # 显示前几行数据\n df.head(1)\n \n # 3. 特征工程 标准化\n transfer = StandardScaler()\n X_train = transfer.fit_transform(X_train)\n X_test = transfer.transform(X_test)\n \n # 4 机器学习 逻辑回归\n estimator = LogisticRegression()\n estimator.fit(X_train, y_train)\n \n # 5. 模型评估\n print(f\"模型准确率:{estimator.score(X_test, y_test)}\")\n y_pred = estimator.predict(X_test)\n print(f\"模型预测值为:\\n{y_pred}\")\n # 5.1 精确率、召回率\n ret = classification_report(y_test, y_pred, labels=[1, 0], target_names=[\"良性\", \"恶性\"])\n roc_score = roc_auc_score(y_test, y_pred)\n print(f\"准确率、召回率:{ret}\")\n print(f\"roc_score:{roc_score}\")\n ```\n\n### 类别不平衡的处理\n先准备类别不平衡的数据\n\n```python\nfrom imblearn.over_sampling import RandomOverSampler,SMOTE\nfrom imblearn.under_sampling import RandomUnderSampler\nfrom sklearn.datasets import make_classification\nimport matplotlib.pyplot as plt\nfrom collections import Counter\n\n# 1.准备类别不平衡的数据\nX, y = make_classification(\n n_samples=5000,\n n_features=2,\n n_informative=2,\n n_redundant=0,\n n_repeated=0,\n n_classes=3,\n n_clusters_per_class=1,\n weights=[0.01, 0.05, 0.94],\n random_state=0,\n)\ncounter = Counter(y)\nplt.scatter(X[:,0],X[:,1],c=y)\nplt.show()\n```\n\n - 过采样 \n 增加训练集的少数的类别的样本,使得正反例样本数据接近 \n - 随机过采样(RandomOverSampler)\n ```python\n ros = RandomOverSampler()\n X_resampled,y_resampled = ros.fit_resample(X,y)\n print(Counter(y_resampled))\n plt.scatter(X_resampled[:,0],X_resampled[:,1],c=y_resampled)\n plt.show()\n ```\n ![](/img/machinelearning/over_random_sampling.png)\n - `SMOTE`过采样(SMOTE)\n ```python\n smote = SMOTE()\n X_resampled,y_resampled = smote.fit_resample(X,y)\n print(Counter(y_resampled))\n plt.scatter(X_resampled[:,0],X_resampled[:,1],c=y_resampled)\n plt.show()\n ```\n ![](/img/machinelearning/over_smote_sampling.png)\n - 欠采样 \n 减少训练集的多数的类别的样本,使得正反例样本数据接近\n - 随机欠采样(RandomUnderSampler)\n ```python\n rus = RandomUnderSampler(random_state=0)\n X_resampled,y_resampled = rus.fit_resample(X,y)\n print(Counter(y_resampled))\n plt.scatter(X_resampled[:,0],X_resampled[:,1],c=y_resampled)\n plt.show()\n ```\n ![](/img/machinelearning/under_sampling.png)\n\n","slug":"machinelearning/logisticregression","published":1,"updated":"2025-01-24T10:16:05.659Z","comments":1,"layout":"post","photos":[],"_id":"cmgsv0jnk000j3wah4i4q6sze","content":"

logistic regression code

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
# 1. 加载乳腺癌数据集
data = load_breast_cancer()
# 2.1 数据集基本处理
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target
for i in df.columns:
# 检查列是否有缺失值
if np.any(pd.isnull(df[i])):
print(f"Filling missing values in column: {i}")
#2.2 确认特征值、目标值
X = df.iloc[:,0:df.shape[1] - 1]
y = df.loc[:,"target"]
# 2.3 分割数据
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3)
# 显示前几行数据
df.head(1)

# 3. 特征工程 标准化
transfer = StandardScaler()
X_train = transfer.fit_transform(X_train)
X_test = transfer.transform(X_test)

# 4 机器学习 逻辑回归
estimator = LogisticRegression()
estimator.fit(X_train,y_train)

# 5. 模型评估
print(f"模型准确率:{estimator.score(X_test,y_test)}")
print(f"模型预测值为:\\n{estimator.predict(X_test)}")
\n\n

分类评估的参数

\n
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score
# 1. 加载乳腺癌数据集
data = load_breast_cancer()
# 2.1 数据集基本处理
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target
for i in df.columns:
# 检查列是否有缺失值
if np.any(pd.isnull(df[i])):
print(f"Filling missing values in column: {i}")
# 2.2 确认特征值、目标值
X = df.iloc[:, 0:df.shape[1] - 1]
y = df.loc[:, "target"]
# 2.3 分割数据
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
# 显示前几行数据
df.head(1)

# 3. 特征工程 标准化
transfer = StandardScaler()
X_train = transfer.fit_transform(X_train)
X_test = transfer.transform(X_test)

# 4 机器学习 逻辑回归
estimator = LogisticRegression()
estimator.fit(X_train, y_train)

# 5. 模型评估
print(f"模型准确率:{estimator.score(X_test, y_test)}")
y_pred = estimator.predict(X_test)
print(f"模型预测值为:\\n{y_pred}")
# 5.1 精确率、召回率
ret = classification_report(y_test, y_pred, labels=[1, 0], target_names=["良性", "恶性"])
roc_score = roc_auc_score(y_test, y_pred)
print(f"准确率、召回率:{ret}")
print(f"roc_score:{roc_score}")
\n\n

类别不平衡的处理

先准备类别不平衡的数据

\n
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
from imblearn.over_sampling import RandomOverSampler,SMOTE
from imblearn.under_sampling import RandomUnderSampler
from sklearn.datasets import make_classification
import matplotlib.pyplot as plt
from collections import Counter

# 1.准备类别不平衡的数据
X, y = make_classification(
n_samples=5000,
n_features=2,
n_informative=2,
n_redundant=0,
n_repeated=0,
n_classes=3,
n_clusters_per_class=1,
weights=[0.01, 0.05, 0.94],
random_state=0,
)
counter = Counter(y)
plt.scatter(X[:,0],X[:,1],c=y)
plt.show()
\n\n\n","cover":false,"excerpt":"","more":"

logistic regression code

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
# 1. 加载乳腺癌数据集
data = load_breast_cancer()
# 2.1 数据集基本处理
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target
for i in df.columns:
# 检查列是否有缺失值
if np.any(pd.isnull(df[i])):
print(f"Filling missing values in column: {i}")
#2.2 确认特征值、目标值
X = df.iloc[:,0:df.shape[1] - 1]
y = df.loc[:,"target"]
# 2.3 分割数据
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3)
# 显示前几行数据
df.head(1)

# 3. 特征工程 标准化
transfer = StandardScaler()
X_train = transfer.fit_transform(X_train)
X_test = transfer.transform(X_test)

# 4 机器学习 逻辑回归
estimator = LogisticRegression()
estimator.fit(X_train,y_train)

# 5. 模型评估
print(f"模型准确率:{estimator.score(X_test,y_test)}")
print(f"模型预测值为:\\n{estimator.predict(X_test)}")
\n\n

分类评估的参数

\n
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score
# 1. 加载乳腺癌数据集
data = load_breast_cancer()
# 2.1 数据集基本处理
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target
for i in df.columns:
# 检查列是否有缺失值
if np.any(pd.isnull(df[i])):
print(f"Filling missing values in column: {i}")
# 2.2 确认特征值、目标值
X = df.iloc[:, 0:df.shape[1] - 1]
y = df.loc[:, "target"]
# 2.3 分割数据
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
# 显示前几行数据
df.head(1)

# 3. 特征工程 标准化
transfer = StandardScaler()
X_train = transfer.fit_transform(X_train)
X_test = transfer.transform(X_test)

# 4 机器学习 逻辑回归
estimator = LogisticRegression()
estimator.fit(X_train, y_train)

# 5. 模型评估
print(f"模型准确率:{estimator.score(X_test, y_test)}")
y_pred = estimator.predict(X_test)
print(f"模型预测值为:\\n{y_pred}")
# 5.1 精确率、召回率
ret = classification_report(y_test, y_pred, labels=[1, 0], target_names=["良性", "恶性"])
roc_score = roc_auc_score(y_test, y_pred)
print(f"准确率、召回率:{ret}")
print(f"roc_score:{roc_score}")
\n\n

类别不平衡的处理

先准备类别不平衡的数据

\n
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
from imblearn.over_sampling import RandomOverSampler,SMOTE
from imblearn.under_sampling import RandomUnderSampler
from sklearn.datasets import make_classification
import matplotlib.pyplot as plt
from collections import Counter

# 1.准备类别不平衡的数据
X, y = make_classification(
n_samples=5000,
n_features=2,
n_informative=2,
n_redundant=0,
n_repeated=0,
n_classes=3,
n_clusters_per_class=1,
weights=[0.01, 0.05, 0.94],
random_state=0,
)
counter = Counter(y)
plt.scatter(X[:,0],X[:,1],c=y)
plt.show()
\n\n\n"},{"title":"组件使用","abbrlink":33957,"date":"2024-08-05T06:07:01.000Z","_content":"\n### 组件自动导入\n```json\n\t\"easycom\":{\n\t \"autoscan\": true,\n\t \"custom\": {\n\t \"^tui-(.*)\": \"@/components/thorui/tui-$1/tui-$1.vue\" // 匹配components目录内的vue文件\n\t }\n\t}\n```\n\n### `tui-sticky 吸顶容器` \n\n> 包含 以下 `tui` 组件 :\n> - tui-sticky\n> - tui-list-view\n> - tui-list-cell\n> \n\n```html\n\n \n \n \n \n \n \n\n\n\n```\n","source":"_posts/frontend/uniapp/component1.md","raw":"---\ntitle: 组件使用\ntags: uniapp\nabbrlink: 33957\ndate: 2024-08-05 14:07:01\n---\n\n### 组件自动导入\n```json\n\t\"easycom\":{\n\t \"autoscan\": true,\n\t \"custom\": {\n\t \"^tui-(.*)\": \"@/components/thorui/tui-$1/tui-$1.vue\" // 匹配components目录内的vue文件\n\t }\n\t}\n```\n\n### `tui-sticky 吸顶容器` \n\n> 包含 以下 `tui` 组件 :\n> - tui-sticky\n> - tui-list-view\n> - tui-list-cell\n> \n\n```html\n\n \n \n \n \n \n \n\n\n\n```\n","slug":"frontend/uniapp/component1","published":1,"updated":"2024-08-09T12:21:50.042Z","comments":1,"layout":"post","photos":[],"_id":"cmgsv0jno00103wahaue5bi4q","content":"

组件自动导入

1
2
3
4
5
6
"easycom":{
"autoscan": true,
"custom": {
"^tui-(.*)": "@/components/thorui/tui-$1/tui-$1.vue" // 匹配components目录内的vue文件
}
}
\n\n

tui-sticky 吸顶容器

\n

包含 以下 tui 组件 :

\n
    \n
  • tui-sticky
  • \n
  • tui-list-view
  • \n
  • tui-list-cell
  • \n
\n
\n
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
<tui-sticky :scrollTop="scrollTop" stickyHeight="104rpx" container>
<!-- header start -->
<template v-slot:header>
<view class="sticky-item">
<view class="setting">设置</view>
</view>
</template>
<!-- header end -->
<!--内容 start-->
<template v-slot:content>
<tui-list-view class="content">
<tui-list-cell :arrow="false">
<switch class='switch' checked color="#FFCC33" />
</tui-list-cell>
</tui-list-view>
</template>
<!--内容 end-->
</tui-sticky>

<script setup>
import { ref } from 'vue'
import { onPageScroll } from '@dcloudio/uni-app'

// 定义 scrollTop 响应式变量
const scrollTop = ref(0)
// 监听页面滚动事件
onPageScroll((e) => {
scrollTop.value = e.scrollTop
})
</script>
\n","cover":false,"excerpt":"","more":"

组件自动导入

1
2
3
4
5
6
"easycom":{
"autoscan": true,
"custom": {
"^tui-(.*)": "@/components/thorui/tui-$1/tui-$1.vue" // 匹配components目录内的vue文件
}
}
\n\n

tui-sticky 吸顶容器

\n

包含 以下 tui 组件 :

\n
    \n
  • tui-sticky
  • \n
  • tui-list-view
  • \n
  • tui-list-cell
  • \n
\n
\n
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
<tui-sticky :scrollTop="scrollTop" stickyHeight="104rpx" container>
<!-- header start -->
<template v-slot:header>
<view class="sticky-item">
<view class="setting">设置</view>
</view>
</template>
<!-- header end -->
<!--内容 start-->
<template v-slot:content>
<tui-list-view class="content">
<tui-list-cell :arrow="false">
<switch class='switch' checked color="#FFCC33" />
</tui-list-cell>
</tui-list-view>
</template>
<!--内容 end-->
</tui-sticky>

<script setup>
import { ref } from 'vue'
import { onPageScroll } from '@dcloudio/uni-app'

// 定义 scrollTop 响应式变量
const scrollTop = ref(0)
// 监听页面滚动事件
onPageScroll((e) => {
scrollTop.value = e.scrollTop
})
</script>
\n"},{"title":"Hadoop集群搭建基础环境","top_img":"/img/site01.jpg","top_img_height":"800px","abbrlink":61253,"date":"2024-09-11T14:45:40.000Z","_content":"\n### 防火墙关闭\n```bash\n# 在 6 台主机执行\nsystemctl stop firewalld\nsystemctl disable firewalld\n```\n### 配置yum源\n- 下载 repo 文件:\n [Centos-7.repo](http://mirrors.aliyun.com/repo/Centos-7.repo)\n 并上传到`/tmp`,进入到`/tmp`\n- 备份并且替换系统的repo文件\n ``` bash\n \tcp Centos-7.repo /etc/yum.repos.d/ \n\tcd /etc/yum.repos.d/ \n\tmv CentOS-Base.repo CentOS-Base.repo.bak \n\tmv Centos-7.repo CentOS-Base.repo\n\t ```\n- 将`nn1`上的`CentOS-Base.repo`拷贝到其他主机\n ```bash\n scp /etc/yum.repos.d/CentOS-Base.repo root@nn2:/etc/yum.repos.d\n scp /etc/yum.repos.d/CentOS-Base.repo root@nn3:/etc/yum.repos.d\n scp /etc/yum.repos.d/CentOS-Base.repo root@s1:/etc/yum.repos.d\n scp /etc/yum.repos.d/CentOS-Base.repo root@s2:/etc/yum.repos.d\n scp /etc/yum.repos.d/CentOS-Base.repo root@s3:/etc/yum.repos.d\n ```\n- 执行yum源更新命令\n ```bash\n\t yum clean all\n\t yum makecache \n\t yum update -y \n\t```\n- 安装常用软件\n ```bash\n yum install -y openssh-server vim gcc gcc-c++ glibc-headers bzip2-devel lzo-devel curl wget openssh-clients zlib-devel autoconf automake cmake libtool openssl-devel fuse-devel snappy-devel telnet unzip zip net-tools.x86_64 firewalld systemd ntp unrar bzip2\n ```\n### JDK安装\n>注意需要在六台机器依次执行\n- 上传到`/tmp`目录下,安装\n ```bash\n cd /tmp\n rpm -ivh jdk-8u144-linux-x64.rpm\n ```\n- 配置环境变量\n ```bash\n ln -s /usr/java/jdk1.8.0_144/ /usr/java/jdk1.8\n echo 'export JAVA_HOME=/usr/java/jdk1.8' >> /etc/profile.d/myEnv.sh \n echo 'export PATH=$PATH:$JAVA_HOME/bin' >> /etc/profile.d/myEnv.sh \n source /etc/profile \n java -version\n ```\n### 修改主机名和主机名映射\n\n```bash\nvim /etc/hostname\n```\n6台机器分别为nn1、nn2、nn3、s1、s2、s3\n\n```bash\nvim /etc/hosts\n```\n\n修改为\n```text\n192.168.1.30 nn1\n192.168.1.31 nn2\n192.168.1.32 nn3\n192.168.1.33 s1\n192.168.1.34 s2\n192.168.1.35 s3\n```\n### 创建hadoop用户\n```bash\n#创建hadoop用户 \nuseradd hadoop \n#给hadoop用户设置密码: 12345678 \npasswd hadoop\n```\n### 禁止非 wheel 组用户切换到root,配置免密切换root\n- 修改/etc/pam.d/su配置\n ```bash\n sed -i 's/#auth\\t\\trequired\\tpam_wheel.so/auth\\t\\trequired\\tpam_wheel.so/g' '/etc/pam.d/su' \n sed -i 's/#auth\\t\\tsufficient\\tpam_wheel.so/auth\\t\\tsufficient\\tpam_wheel.so/g' '/etc/pam.d/su'\n ```\n- 修改/etc/login.defs文件\n ```bash\n echo \"SU_WHEEL_ONLY yes\" >> /etc/login.defs\n ```\n- 添加用户到管理员,禁止普通用户su 到 root\n ```bash\n #把hadoop用户加到wheel组里\n gpasswd -a hadoop wheel\n #查看wheel组里是否有hadoop用户\n cat /etc/group | grep wheel\n ```\n### 给hadoop用户,配置SSH密钥\n#### 配置hadoop用户ssh免密码登录到hadoop\n- 仅在`nn1`执行这段脚本命令即可\n 但是 `su - hadoop ` ,` mkdir ~/.ssh` 需要在其他主机执行一下\n ```bash\n #切换到hadoop用户 \n su - hadoop\n #生成ssh公私钥 \n ssh-keygen -t rsa -f ~/.ssh/id_rsa -P ''\n ssh-copy-id nn1\n ssh-copy-id nn2\n ssh-copy-id nn3\n ssh-copy-id s1\n ssh-copy-id s2\n ssh-copy-id s3\n scp /home/hadoop/.ssh/id_rsa hadoop@nn2:/home/hadoop/.ssh\n scp /home/hadoop/.ssh/id_rsa hadoop@nn3:/home/hadoop/.ssh\n scp /home/hadoop/.ssh/id_rsa hadoop@s1:/home/hadoop/.ssh\n scp /home/hadoop/.ssh/id_rsa hadoop@s2:/home/hadoop/.ssh\n scp /home/hadoop/.ssh/id_rsa hadoop@s3:/home/hadoop/.ssh\n ```\n#### 配置hadoop用户ssh免密码登录到root\n- 同上\n ```bash\n ssh-copy-id root@nn1\n ssh-copy-id root@ nn2\n ssh-copy-id root@nn3\n ssh-copy-id root@s1\n ssh-copy-id root@s2\n ssh-copy-id root@s3\n scp /home/hadoop/.ssh/id_rsa root@nn2:/root/.ssh\n scp /home/hadoop/.ssh/id_rsa root@nn3:/root/.ssh\n scp /home/hadoop/.ssh/id_rsa root@s1:/root/.ssh\n scp /home/hadoop/.ssh/id_rsa root@s2:/root/.ssh\n scp /home/hadoop/.ssh/id_rsa root@s3:/root/.ssh\n ```\n### 脚本配置\n- **ips**\n ```bash\n vim /home/hadoop/bin/ips\n ```\n \n ```bash\n nn1 \n nn2\n nn3\n s1 \n s2 \n s3\n ```\n- **ssh_all.sh**\n ```bash\n vim /home/hadoop/bin/ssh_all.sh\n ```\n\n ```bash\n #! /bin/bash\n # 进入到当前脚本所在目录\n cd `dirname $0`\n # 获取当前脚本所在目录\n dir_path=`pwd`\n #echo $dir_path\n # 读ips文件得到数组(里面是一堆主机名)\n ip_arr=(`cat $dir_path/ips`)\n # 遍历数组里的主机名\n for ip in ${ip_arr[*]}\n do\n # 拼接ssh命令: ssh hadoop@nn1.hadoop ls\n cmd_=\"ssh hadoop@${ip} \\\"$*\\\" \"\n echo $cmd_\n # 通过eval命令 执行 拼接的ssh 命令\n if eval ${cmd_} ; then\n echo \"OK\"\n else\n echo \"FAIL\"\n fi\n done\n ```\n- **ssh_root.sh**\n ```bash\n #! /bin/bash\n # 进入到当前脚本所在目录\n cd `dirname $0`\n # 获取当前脚本所在目录\n dir_path=`pwd`\n #echo $dir_path\n # 读ips文件得到数组(里面是一堆主机名)\n ip_arr=(`cat $dir_path/ips`)\n # 遍历数组里的主机名\n for ip in ${ip_arr[*]}\n do\n # 拼接ssh命令: ssh hadoop@nn1.hadoop ls\n cmd_=\"ssh hadoop@${ip} ~/bin/exe.sh \\\"$*\\\"\"\n echo $cmd_\n # 通过eval命令 执行 拼接的ssh 命令\n if eval ${cmd_} ; then\n echo \"OK\"\n else\n echo \"FAIL\"\n fi\n done\n ```\n- **scp_all.sh**\n ```bash\n #! /bin/bash\n # 进入到当前脚本所在目录\n cd `dirname $0`\n # 获取当前脚本所在目录\n dir_path=`pwd`\n #echo $dir_path\n # 读ips文件得到数组(里面是一堆主机名)\n ip_arr=(`cat $dir_path/ips`)\n # 源\n source_=$1\n # 目标\n target=$2\n # 遍历数组里的主机名\n for ip in ${ip_arr[*]}\n do\n # 拼接scp命令: scp 源 hadoop@nn1.hadoop:目标\n cmd_=\"scp -r ${source_} hadoop@${ip}:${target}\"\n echo $cmd_\n # 通过eval命令 执行 拼接的scp 命令\n if eval ${cmd_} ; then\n echo \"OK\"\n else\n echo \"FAIL\"\n fi\n done\n ```\n- **exe.sh**\n ```bash\n #切换到root用户执行cmd命令\n cmd=$*\n su - << EOF\n $cmd\n EOF\n ```\n- 赋予执行权限\n ```bash\n chmod +x ssh_all.sh \n chmod +x scp_all.sh\n chmod +x ssh_root.sh\n chmod +x exe.sh\n ```\n- 分发到其他主机\n ```bash\n ./ssh_all.sh mkdir /home/hadoop/bin\n ./scp_all.sh /home/hadoop/bin/ips /home/hadoop/bin/\n ./scp_all.sh /home/hadoop/bin/exe.sh /home/hadoop/bin/\n ./scp_all.sh /home/hadoop/bin/ssh_all.sh /home/hadoop/bin/\n ./scp_all.sh /home/hadoop/bin/scp_all.sh /home/hadoop/bin/\n ./scp_all.sh /home/hadoop/bin/ssh_root.sh /home/hadoop/bin/\n ```\n\n- 将 `/home/hadoop/bin`添加到hadoop用户的环境变量,需要切换到`hadoop`用户\n\n ```bash\n echo 'export PATH=$PATH:/home/hadoop/bin' >> ~/.bashrc && source ~/.bashrc\n scp_all.sh /home/hadoop/.bashrc /home/hadoop/\n ssh_all.sh source ~/.bashrc \n ```","source":"_posts/bigdata/hadoop/env.md","raw":"---\ntitle: Hadoop集群搭建基础环境\ntop_img: /img/site01.jpg\ntop_img_height: 800px\nabbrlink: 61253\ndate: 2024-09-011 22:45:40\n---\n\n### 防火墙关闭\n```bash\n# 在 6 台主机执行\nsystemctl stop firewalld\nsystemctl disable firewalld\n```\n### 配置yum源\n- 下载 repo 文件:\n [Centos-7.repo](http://mirrors.aliyun.com/repo/Centos-7.repo)\n 并上传到`/tmp`,进入到`/tmp`\n- 备份并且替换系统的repo文件\n ``` bash\n \tcp Centos-7.repo /etc/yum.repos.d/ \n\tcd /etc/yum.repos.d/ \n\tmv CentOS-Base.repo CentOS-Base.repo.bak \n\tmv Centos-7.repo CentOS-Base.repo\n\t ```\n- 将`nn1`上的`CentOS-Base.repo`拷贝到其他主机\n ```bash\n scp /etc/yum.repos.d/CentOS-Base.repo root@nn2:/etc/yum.repos.d\n scp /etc/yum.repos.d/CentOS-Base.repo root@nn3:/etc/yum.repos.d\n scp /etc/yum.repos.d/CentOS-Base.repo root@s1:/etc/yum.repos.d\n scp /etc/yum.repos.d/CentOS-Base.repo root@s2:/etc/yum.repos.d\n scp /etc/yum.repos.d/CentOS-Base.repo root@s3:/etc/yum.repos.d\n ```\n- 执行yum源更新命令\n ```bash\n\t yum clean all\n\t yum makecache \n\t yum update -y \n\t```\n- 安装常用软件\n ```bash\n yum install -y openssh-server vim gcc gcc-c++ glibc-headers bzip2-devel lzo-devel curl wget openssh-clients zlib-devel autoconf automake cmake libtool openssl-devel fuse-devel snappy-devel telnet unzip zip net-tools.x86_64 firewalld systemd ntp unrar bzip2\n ```\n### JDK安装\n>注意需要在六台机器依次执行\n- 上传到`/tmp`目录下,安装\n ```bash\n cd /tmp\n rpm -ivh jdk-8u144-linux-x64.rpm\n ```\n- 配置环境变量\n ```bash\n ln -s /usr/java/jdk1.8.0_144/ /usr/java/jdk1.8\n echo 'export JAVA_HOME=/usr/java/jdk1.8' >> /etc/profile.d/myEnv.sh \n echo 'export PATH=$PATH:$JAVA_HOME/bin' >> /etc/profile.d/myEnv.sh \n source /etc/profile \n java -version\n ```\n### 修改主机名和主机名映射\n\n```bash\nvim /etc/hostname\n```\n6台机器分别为nn1、nn2、nn3、s1、s2、s3\n\n```bash\nvim /etc/hosts\n```\n\n修改为\n```text\n192.168.1.30 nn1\n192.168.1.31 nn2\n192.168.1.32 nn3\n192.168.1.33 s1\n192.168.1.34 s2\n192.168.1.35 s3\n```\n### 创建hadoop用户\n```bash\n#创建hadoop用户 \nuseradd hadoop \n#给hadoop用户设置密码: 12345678 \npasswd hadoop\n```\n### 禁止非 wheel 组用户切换到root,配置免密切换root\n- 修改/etc/pam.d/su配置\n ```bash\n sed -i 's/#auth\\t\\trequired\\tpam_wheel.so/auth\\t\\trequired\\tpam_wheel.so/g' '/etc/pam.d/su' \n sed -i 's/#auth\\t\\tsufficient\\tpam_wheel.so/auth\\t\\tsufficient\\tpam_wheel.so/g' '/etc/pam.d/su'\n ```\n- 修改/etc/login.defs文件\n ```bash\n echo \"SU_WHEEL_ONLY yes\" >> /etc/login.defs\n ```\n- 添加用户到管理员,禁止普通用户su 到 root\n ```bash\n #把hadoop用户加到wheel组里\n gpasswd -a hadoop wheel\n #查看wheel组里是否有hadoop用户\n cat /etc/group | grep wheel\n ```\n### 给hadoop用户,配置SSH密钥\n#### 配置hadoop用户ssh免密码登录到hadoop\n- 仅在`nn1`执行这段脚本命令即可\n 但是 `su - hadoop ` ,` mkdir ~/.ssh` 需要在其他主机执行一下\n ```bash\n #切换到hadoop用户 \n su - hadoop\n #生成ssh公私钥 \n ssh-keygen -t rsa -f ~/.ssh/id_rsa -P ''\n ssh-copy-id nn1\n ssh-copy-id nn2\n ssh-copy-id nn3\n ssh-copy-id s1\n ssh-copy-id s2\n ssh-copy-id s3\n scp /home/hadoop/.ssh/id_rsa hadoop@nn2:/home/hadoop/.ssh\n scp /home/hadoop/.ssh/id_rsa hadoop@nn3:/home/hadoop/.ssh\n scp /home/hadoop/.ssh/id_rsa hadoop@s1:/home/hadoop/.ssh\n scp /home/hadoop/.ssh/id_rsa hadoop@s2:/home/hadoop/.ssh\n scp /home/hadoop/.ssh/id_rsa hadoop@s3:/home/hadoop/.ssh\n ```\n#### 配置hadoop用户ssh免密码登录到root\n- 同上\n ```bash\n ssh-copy-id root@nn1\n ssh-copy-id root@ nn2\n ssh-copy-id root@nn3\n ssh-copy-id root@s1\n ssh-copy-id root@s2\n ssh-copy-id root@s3\n scp /home/hadoop/.ssh/id_rsa root@nn2:/root/.ssh\n scp /home/hadoop/.ssh/id_rsa root@nn3:/root/.ssh\n scp /home/hadoop/.ssh/id_rsa root@s1:/root/.ssh\n scp /home/hadoop/.ssh/id_rsa root@s2:/root/.ssh\n scp /home/hadoop/.ssh/id_rsa root@s3:/root/.ssh\n ```\n### 脚本配置\n- **ips**\n ```bash\n vim /home/hadoop/bin/ips\n ```\n \n ```bash\n nn1 \n nn2\n nn3\n s1 \n s2 \n s3\n ```\n- **ssh_all.sh**\n ```bash\n vim /home/hadoop/bin/ssh_all.sh\n ```\n\n ```bash\n #! /bin/bash\n # 进入到当前脚本所在目录\n cd `dirname $0`\n # 获取当前脚本所在目录\n dir_path=`pwd`\n #echo $dir_path\n # 读ips文件得到数组(里面是一堆主机名)\n ip_arr=(`cat $dir_path/ips`)\n # 遍历数组里的主机名\n for ip in ${ip_arr[*]}\n do\n # 拼接ssh命令: ssh hadoop@nn1.hadoop ls\n cmd_=\"ssh hadoop@${ip} \\\"$*\\\" \"\n echo $cmd_\n # 通过eval命令 执行 拼接的ssh 命令\n if eval ${cmd_} ; then\n echo \"OK\"\n else\n echo \"FAIL\"\n fi\n done\n ```\n- **ssh_root.sh**\n ```bash\n #! /bin/bash\n # 进入到当前脚本所在目录\n cd `dirname $0`\n # 获取当前脚本所在目录\n dir_path=`pwd`\n #echo $dir_path\n # 读ips文件得到数组(里面是一堆主机名)\n ip_arr=(`cat $dir_path/ips`)\n # 遍历数组里的主机名\n for ip in ${ip_arr[*]}\n do\n # 拼接ssh命令: ssh hadoop@nn1.hadoop ls\n cmd_=\"ssh hadoop@${ip} ~/bin/exe.sh \\\"$*\\\"\"\n echo $cmd_\n # 通过eval命令 执行 拼接的ssh 命令\n if eval ${cmd_} ; then\n echo \"OK\"\n else\n echo \"FAIL\"\n fi\n done\n ```\n- **scp_all.sh**\n ```bash\n #! /bin/bash\n # 进入到当前脚本所在目录\n cd `dirname $0`\n # 获取当前脚本所在目录\n dir_path=`pwd`\n #echo $dir_path\n # 读ips文件得到数组(里面是一堆主机名)\n ip_arr=(`cat $dir_path/ips`)\n # 源\n source_=$1\n # 目标\n target=$2\n # 遍历数组里的主机名\n for ip in ${ip_arr[*]}\n do\n # 拼接scp命令: scp 源 hadoop@nn1.hadoop:目标\n cmd_=\"scp -r ${source_} hadoop@${ip}:${target}\"\n echo $cmd_\n # 通过eval命令 执行 拼接的scp 命令\n if eval ${cmd_} ; then\n echo \"OK\"\n else\n echo \"FAIL\"\n fi\n done\n ```\n- **exe.sh**\n ```bash\n #切换到root用户执行cmd命令\n cmd=$*\n su - << EOF\n $cmd\n EOF\n ```\n- 赋予执行权限\n ```bash\n chmod +x ssh_all.sh \n chmod +x scp_all.sh\n chmod +x ssh_root.sh\n chmod +x exe.sh\n ```\n- 分发到其他主机\n ```bash\n ./ssh_all.sh mkdir /home/hadoop/bin\n ./scp_all.sh /home/hadoop/bin/ips /home/hadoop/bin/\n ./scp_all.sh /home/hadoop/bin/exe.sh /home/hadoop/bin/\n ./scp_all.sh /home/hadoop/bin/ssh_all.sh /home/hadoop/bin/\n ./scp_all.sh /home/hadoop/bin/scp_all.sh /home/hadoop/bin/\n ./scp_all.sh /home/hadoop/bin/ssh_root.sh /home/hadoop/bin/\n ```\n\n- 将 `/home/hadoop/bin`添加到hadoop用户的环境变量,需要切换到`hadoop`用户\n\n ```bash\n echo 'export PATH=$PATH:/home/hadoop/bin' >> ~/.bashrc && source ~/.bashrc\n scp_all.sh /home/hadoop/.bashrc /home/hadoop/\n ssh_all.sh source ~/.bashrc \n ```","slug":"bigdata/hadoop/env","published":1,"updated":"2024-09-11T14:45:28.095Z","comments":1,"layout":"post","photos":[],"_id":"cmgsv0jnp00113wahdb9hankh","content":"

防火墙关闭

1
2
3
# 在 6 台主机执行
systemctl stop firewalld
systemctl disable firewalld
\n

配置yum源

\n

JDK安装

\n

注意需要在六台机器依次执行

\n
\n\n

修改主机名和主机名映射

1
vim /etc/hostname
\n

6台机器分别为nn1、nn2、nn3、s1、s2、s3

\n
1
vim /etc/hosts
\n\n

修改为

\n
1
2
3
4
5
6
192.168.1.30 nn1
192.168.1.31 nn2
192.168.1.32 nn3
192.168.1.33 s1
192.168.1.34 s2
192.168.1.35 s3
\n

创建hadoop用户

1
2
3
4
#创建hadoop用户 
useradd hadoop
#给hadoop用户设置密码: 12345678
passwd hadoop
\n

禁止非 wheel 组用户切换到root,配置免密切换root

\n

给hadoop用户,配置SSH密钥

配置hadoop用户ssh免密码登录到hadoop

\n

配置hadoop用户ssh免密码登录到root

\n

脚本配置

\n","cover":false,"excerpt":"","more":"

防火墙关闭

1
2
3
# 在 6 台主机执行
systemctl stop firewalld
systemctl disable firewalld
\n

配置yum源

\n

JDK安装

\n

注意需要在六台机器依次执行

\n
\n\n

修改主机名和主机名映射

1
vim /etc/hostname
\n

6台机器分别为nn1、nn2、nn3、s1、s2、s3

\n
1
vim /etc/hosts
\n\n

修改为

\n
1
2
3
4
5
6
192.168.1.30 nn1
192.168.1.31 nn2
192.168.1.32 nn3
192.168.1.33 s1
192.168.1.34 s2
192.168.1.35 s3
\n

创建hadoop用户

1
2
3
4
#创建hadoop用户 
useradd hadoop
#给hadoop用户设置密码: 12345678
passwd hadoop
\n

禁止非 wheel 组用户切换到root,配置免密切换root

\n

给hadoop用户,配置SSH密钥

配置hadoop用户ssh免密码登录到hadoop

\n

配置hadoop用户ssh免密码登录到root

\n

脚本配置

\n"},{"title":"Hadoop集群HDFS配置","top_img":"/img/site01.jpg","top_img_height":"800px","abbrlink":61252,"date":"2024-09-11T14:45:40.000Z","_content":"\n### 上传`hadoop-3.1.4.tar.gz`到`/tmp`,解压\n>注意在六台机器均上传到`/tmp`\n```bash\n# 在6台机器执行\nsudo tar -zxvf /tmp/hadoop-3.1.4.tar.gz -C /usr/local/\n# 分发到其他主机\nssh_root.sh chown -R hadoop:hadoop /usr/local/hadoop-3.1.4\nssh_root.sh ln -s /usr/local/hadoop-3.1.4/ /usr/local/hadoop\n```\n### 配置环境变量\n```bash\necho 'export HADOOP_HOME=/usr/local/hadoop' >> /etc/profile.d/myEnv.sh\necho 'export PATH=$PATH:$HADOOP_HOME/bin' >> /etc/profile.d/myEnv.sh\necho 'export PATH=$PATH:$HADOOP_HOME/sbin' >> /etc/profile.d/myEnv.sh\n```\n\n```bash\n# 分发到nn2、nn3、s1、s2、s3\nscp_all.sh /etc/profile.d/myEnv.sh /etc/profile.d/\n# source 环境变量\nssh_root.sh source /etc/profile\n```\n>还需要创建 `/data`这个目录,由于nn1、nn2、nn3已经创建`/data`,其他三台需要创建一下\n```bash\n### 在s1、s2、s3执行\nsudo mkdir /data\nsudo chown -R hadoop:hadoop /data\n```\n\n### 修改core-site.xml\n```bash\nvim /usr/local/hadoop/etc/hadoop/core-site.xml \n```\n\n```xml\n\n\t\n\t fs.defaultFS\n\t hdfs://ns1\n\t 默认文件服务的协议和NS逻辑名称,和hdfs-site.xml里的对应此配置替代了1.0里的fs.default.name\n\t\n\t\n\t\n\t hadoop.tmp.dir\n\t /data/tmp\n\t 数据存储目录\n\t\n\t\n\t\n\t hadoop.proxyuser.root.groups\n\t hadoop\n\t \n\t hdfs dfsadmin –refreshSuperUserGroupsConfiguration,\n\t yarn rmadmin –refreshSuperUserGroupsConfiguration\n\t 使用这两个命令不用重启就能刷新\n\t \n\t\n\t\n\t\n\t hadoop.proxyuser.root.hosts\n\t localhost\n\t 本地代理\n\t\n\t\n\t\n\t \n\t\tha.zookeeper.quorum \n\t\tnn1:2181,nn2:2181,nn3:2181 \n\t\tHA使用的zookeeper地址 \n\t\n\n```\n### 修改`hdfs-site.xml`\n```bash\nvim /usr/local/hadoop/etc/hadoop/hdfs-site.xml \n```\n\n```xml\n\n \n dfs.namenode.name.dir\n /data/namenode\n namenode本地文件存放地址\n \n \n \n dfs.nameservices\n ns1\n 提供服务的NS逻辑名称,与core-site.xml里的对应\n \n \n \n \n \n dfs.ha.namenodes.ns1\n nn1,nn2,nn3\n 列出该逻辑名称下的NameNode逻辑名称\n \n \n \n dfs.namenode.rpc-address.ns1.nn1\n nn1:9000\n 指定NameNode的RPC位置\n \n \n \n dfs.namenode.http-address.ns1.nn1\n nn1:50070\n 指定NameNode的Web Server位置\n \n \n \n dfs.namenode.rpc-address.ns1.nn2\n nn2:9000\n 指定NameNode的RPC位置\n \n \n \n dfs.namenode.http-address.ns1.nn2\n nn2:50070\n 指定NameNode的Web Server位置\n \n \n \n dfs.namenode.rpc-address.ns1.nn3\n nn3:9000\n 指定NameNode的RPC位置\n \n \n \n dfs.namenode.http-address.ns1.nn3\n nn3:50070\n 指定NameNode的Web Server位置\n \n \n \n dfs.namenode.handler.count\n 77\n namenode的工作线程数\n \n\n \n \n dfs.namenode.shared.edits.dir\n qjournal://nn1:8485;nn2:8485;nn3:8485/ns1\n 指定用于HA存放edits的共享存储,通常是namenode的所在机器\n \n \n \n dfs.journalnode.edits.dir\n /data/journaldata/\n journaldata服务存放文件的地址\n \n \n \n ipc.client.connect.max.retries\n 10\n namenode和journalnode的链接重试次数10次\n \n \n \n ipc.client.connect.retry.interval\n 10000\n 重试的间隔时间10s\n \n\n \n \n dfs.ha.fencing.methods\n sshfence\n 指定HA做隔离的方法,缺省是ssh,可设为shell,稍后详述\n \n \n \n dfs.ha.fencing.ssh.private-key-files\n /home/hadoop/.ssh/id_rsa\n 杀死命令脚本的免密配置秘钥\n \n \n \n dfs.client.failover.proxy.provider.ns1\n org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider\n 指定客户端用于HA切换的代理类,不同的NS可以用不同的代理类以上示例为Hadoop 2.0自带的缺省代理类\n \n \n \n dfs.client.failover.proxy.provider.auto-ha\n org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider\n \n \n \n dfs.ha.automatic-failover.enabled\n true\n \n\t\n\t\n\t dfs.datanode.data.dir\n\t /data/datanode\n\t datanode本地文件存放地址\n\t\n\t\n\t dfs.replication\n\t 3\n\t 文件复本数\n\t\n\t\n\t dfs.namenode.datanode.registration.ip-hostname-check\n\t false\n\t\n\t\n\t dfs.client.use.datanode.hostname\n\t true\n\t\n\t\n\t dfs.datanode.use.datanode.hostname\n\t true\n\t\n\n```\n### 修改`hadoop-env.sh`\n```bash\nvim /usr/local/hadoop/etc/hadoop/hadoop-env.sh\n```\n\n```bash\n# 添加这两行\nsource /etc/profile \nexport HADOOP_HEAPSIZE_MAX=512\n```\n### 分发这些配置文件\n```bash\nscp_all.sh /usr/local/hadoop/etc/hadoop/core-site.xml /usr/local/hadoop/etc/hadoop/\nscp_all.sh /usr/local/hadoop/etc/hadoop/hdfs-site.xml /usr/local/hadoop/etc/hadoop/\nscp_all.sh /usr/local/hadoop/etc/hadoop/hadoop-env.sh /usr/local/hadoop/etc/hadoop/\n```\n### 集群初始化\n- 需要先启动zookeeper集群\n ```bash\n ssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh start\n ```\n\n ```bash\n # 第一次启动先启动journalnode,便于3个namenode的元数据同步\n ssh_all_zookeeper.sh hadoop-daemon.sh start journalnode\n ```\n- `zkfc`搭建\n ```bash\n #在nn1节点执行 \n hdfs zkfc -formatZK\n #nn1 nn2 nn3启动zkfc \n hadoop-daemon.sh start zkfc\n ```\n- 初始化nn1的namenode,在nn1执行\n ```bash\n hdfs namenode -format \n hadoop-daemon.sh start namenode\n ```\n- 格式化第二台和第三台namenode,并且启动namenode,在nn2、nn3执行\n ```bash\n hdfs namenode -bootstrapStandby \n hadoop-daemon.sh start namenode\n ```\n- 修改**workers**\n ```bash\n vim /usr/local/hadoop/etc/hadoop/workers\n ```\n 修改为\n ```text\n s1\n s2\n s3\n ```\n 分发给其他机器\n ```bash\n scp_all.sh /usr/local/hadoop/etc/hadoop/workers /usr/local/hadoop/etc/hadoop\n ```\n- 启动datanode节点,在s1、s2、s3执行\n\n ```bash\n #启动各个节点的datanode\n hadoop-daemons.sh start datanode\n ```\n### 集群启动\n```bash\nstart-dfs.sh\n```","source":"_posts/bigdata/hadoop/hdfs.md","raw":"---\ntitle: Hadoop集群HDFS配置\ntop_img: /img/site01.jpg\ntop_img_height: 800px\nabbrlink: 61252\ndate: 2024-09-011 22:45:40\n---\n\n### 上传`hadoop-3.1.4.tar.gz`到`/tmp`,解压\n>注意在六台机器均上传到`/tmp`\n```bash\n# 在6台机器执行\nsudo tar -zxvf /tmp/hadoop-3.1.4.tar.gz -C /usr/local/\n# 分发到其他主机\nssh_root.sh chown -R hadoop:hadoop /usr/local/hadoop-3.1.4\nssh_root.sh ln -s /usr/local/hadoop-3.1.4/ /usr/local/hadoop\n```\n### 配置环境变量\n```bash\necho 'export HADOOP_HOME=/usr/local/hadoop' >> /etc/profile.d/myEnv.sh\necho 'export PATH=$PATH:$HADOOP_HOME/bin' >> /etc/profile.d/myEnv.sh\necho 'export PATH=$PATH:$HADOOP_HOME/sbin' >> /etc/profile.d/myEnv.sh\n```\n\n```bash\n# 分发到nn2、nn3、s1、s2、s3\nscp_all.sh /etc/profile.d/myEnv.sh /etc/profile.d/\n# source 环境变量\nssh_root.sh source /etc/profile\n```\n>还需要创建 `/data`这个目录,由于nn1、nn2、nn3已经创建`/data`,其他三台需要创建一下\n```bash\n### 在s1、s2、s3执行\nsudo mkdir /data\nsudo chown -R hadoop:hadoop /data\n```\n\n### 修改core-site.xml\n```bash\nvim /usr/local/hadoop/etc/hadoop/core-site.xml \n```\n\n```xml\n\n\t\n\t fs.defaultFS\n\t hdfs://ns1\n\t 默认文件服务的协议和NS逻辑名称,和hdfs-site.xml里的对应此配置替代了1.0里的fs.default.name\n\t\n\t\n\t\n\t hadoop.tmp.dir\n\t /data/tmp\n\t 数据存储目录\n\t\n\t\n\t\n\t hadoop.proxyuser.root.groups\n\t hadoop\n\t \n\t hdfs dfsadmin –refreshSuperUserGroupsConfiguration,\n\t yarn rmadmin –refreshSuperUserGroupsConfiguration\n\t 使用这两个命令不用重启就能刷新\n\t \n\t\n\t\n\t\n\t hadoop.proxyuser.root.hosts\n\t localhost\n\t 本地代理\n\t\n\t\n\t\n\t \n\t\tha.zookeeper.quorum \n\t\tnn1:2181,nn2:2181,nn3:2181 \n\t\tHA使用的zookeeper地址 \n\t\n\n```\n### 修改`hdfs-site.xml`\n```bash\nvim /usr/local/hadoop/etc/hadoop/hdfs-site.xml \n```\n\n```xml\n\n \n dfs.namenode.name.dir\n /data/namenode\n namenode本地文件存放地址\n \n \n \n dfs.nameservices\n ns1\n 提供服务的NS逻辑名称,与core-site.xml里的对应\n \n \n \n \n \n dfs.ha.namenodes.ns1\n nn1,nn2,nn3\n 列出该逻辑名称下的NameNode逻辑名称\n \n \n \n dfs.namenode.rpc-address.ns1.nn1\n nn1:9000\n 指定NameNode的RPC位置\n \n \n \n dfs.namenode.http-address.ns1.nn1\n nn1:50070\n 指定NameNode的Web Server位置\n \n \n \n dfs.namenode.rpc-address.ns1.nn2\n nn2:9000\n 指定NameNode的RPC位置\n \n \n \n dfs.namenode.http-address.ns1.nn2\n nn2:50070\n 指定NameNode的Web Server位置\n \n \n \n dfs.namenode.rpc-address.ns1.nn3\n nn3:9000\n 指定NameNode的RPC位置\n \n \n \n dfs.namenode.http-address.ns1.nn3\n nn3:50070\n 指定NameNode的Web Server位置\n \n \n \n dfs.namenode.handler.count\n 77\n namenode的工作线程数\n \n\n \n \n dfs.namenode.shared.edits.dir\n qjournal://nn1:8485;nn2:8485;nn3:8485/ns1\n 指定用于HA存放edits的共享存储,通常是namenode的所在机器\n \n \n \n dfs.journalnode.edits.dir\n /data/journaldata/\n journaldata服务存放文件的地址\n \n \n \n ipc.client.connect.max.retries\n 10\n namenode和journalnode的链接重试次数10次\n \n \n \n ipc.client.connect.retry.interval\n 10000\n 重试的间隔时间10s\n \n\n \n \n dfs.ha.fencing.methods\n sshfence\n 指定HA做隔离的方法,缺省是ssh,可设为shell,稍后详述\n \n \n \n dfs.ha.fencing.ssh.private-key-files\n /home/hadoop/.ssh/id_rsa\n 杀死命令脚本的免密配置秘钥\n \n \n \n dfs.client.failover.proxy.provider.ns1\n org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider\n 指定客户端用于HA切换的代理类,不同的NS可以用不同的代理类以上示例为Hadoop 2.0自带的缺省代理类\n \n \n \n dfs.client.failover.proxy.provider.auto-ha\n org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider\n \n \n \n dfs.ha.automatic-failover.enabled\n true\n \n\t\n\t\n\t dfs.datanode.data.dir\n\t /data/datanode\n\t datanode本地文件存放地址\n\t\n\t\n\t dfs.replication\n\t 3\n\t 文件复本数\n\t\n\t\n\t dfs.namenode.datanode.registration.ip-hostname-check\n\t false\n\t\n\t\n\t dfs.client.use.datanode.hostname\n\t true\n\t\n\t\n\t dfs.datanode.use.datanode.hostname\n\t true\n\t\n\n```\n### 修改`hadoop-env.sh`\n```bash\nvim /usr/local/hadoop/etc/hadoop/hadoop-env.sh\n```\n\n```bash\n# 添加这两行\nsource /etc/profile \nexport HADOOP_HEAPSIZE_MAX=512\n```\n### 分发这些配置文件\n```bash\nscp_all.sh /usr/local/hadoop/etc/hadoop/core-site.xml /usr/local/hadoop/etc/hadoop/\nscp_all.sh /usr/local/hadoop/etc/hadoop/hdfs-site.xml /usr/local/hadoop/etc/hadoop/\nscp_all.sh /usr/local/hadoop/etc/hadoop/hadoop-env.sh /usr/local/hadoop/etc/hadoop/\n```\n### 集群初始化\n- 需要先启动zookeeper集群\n ```bash\n ssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh start\n ```\n\n ```bash\n # 第一次启动先启动journalnode,便于3个namenode的元数据同步\n ssh_all_zookeeper.sh hadoop-daemon.sh start journalnode\n ```\n- `zkfc`搭建\n ```bash\n #在nn1节点执行 \n hdfs zkfc -formatZK\n #nn1 nn2 nn3启动zkfc \n hadoop-daemon.sh start zkfc\n ```\n- 初始化nn1的namenode,在nn1执行\n ```bash\n hdfs namenode -format \n hadoop-daemon.sh start namenode\n ```\n- 格式化第二台和第三台namenode,并且启动namenode,在nn2、nn3执行\n ```bash\n hdfs namenode -bootstrapStandby \n hadoop-daemon.sh start namenode\n ```\n- 修改**workers**\n ```bash\n vim /usr/local/hadoop/etc/hadoop/workers\n ```\n 修改为\n ```text\n s1\n s2\n s3\n ```\n 分发给其他机器\n ```bash\n scp_all.sh /usr/local/hadoop/etc/hadoop/workers /usr/local/hadoop/etc/hadoop\n ```\n- 启动datanode节点,在s1、s2、s3执行\n\n ```bash\n #启动各个节点的datanode\n hadoop-daemons.sh start datanode\n ```\n### 集群启动\n```bash\nstart-dfs.sh\n```","slug":"bigdata/hadoop/hdfs","published":1,"updated":"2024-09-11T14:51:42.712Z","comments":1,"layout":"post","photos":[],"_id":"cmgsv0jnq00133wahetp950z2","content":"

上传hadoop-3.1.4.tar.gz/tmp,解压

\n

注意在六台机器均上传到/tmp

\n
\n
1
2
3
4
5
# 在6台机器执行
sudo tar -zxvf /tmp/hadoop-3.1.4.tar.gz -C /usr/local/
# 分发到其他主机
ssh_root.sh chown -R hadoop:hadoop /usr/local/hadoop-3.1.4
ssh_root.sh ln -s /usr/local/hadoop-3.1.4/ /usr/local/hadoop
\n

配置环境变量

1
2
3
echo 'export HADOOP_HOME=/usr/local/hadoop' >> /etc/profile.d/myEnv.sh
echo 'export PATH=$PATH:$HADOOP_HOME/bin' >> /etc/profile.d/myEnv.sh
echo 'export PATH=$PATH:$HADOOP_HOME/sbin' >> /etc/profile.d/myEnv.sh
\n\n
1
2
3
4
# 分发到nn2、nn3、s1、s2、s3
scp_all.sh /etc/profile.d/myEnv.sh /etc/profile.d/
# source 环境变量
ssh_root.sh source /etc/profile
\n
\n

还需要创建 /data这个目录,由于nn1、nn2、nn3已经创建/data,其他三台需要创建一下

\n
\n
1
2
3
### 在s1、s2、s3执行
sudo mkdir /data
sudo chown -R hadoop:hadoop /data
\n\n

修改core-site.xml

1
vim /usr/local/hadoop/etc/hadoop/core-site.xml 
\n\n
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
<configuration>
\t<property>
\t <name>fs.defaultFS</name>
\t <value>hdfs://ns1</value>
\t <description>默认文件服务的协议和NS逻辑名称,和hdfs-site.xml里的对应此配置替代了1.0里的fs.default.name</description>
\t</property>
\t
\t<property>
\t <name>hadoop.tmp.dir</name>
\t <value>/data/tmp</value>
\t <description>数据存储目录</description>
\t</property>
\t
\t<property>
\t <name>hadoop.proxyuser.root.groups</name>
\t <value>hadoop</value>
\t <description>
\t hdfs dfsadmin –refreshSuperUserGroupsConfiguration,
\t yarn rmadmin –refreshSuperUserGroupsConfiguration
\t 使用这两个命令不用重启就能刷新
\t </description>
\t</property>
\t
\t<property>
\t <name>hadoop.proxyuser.root.hosts</name>
\t <value>localhost</value>
\t <description>本地代理</description>
\t</property>
\t
\t<!-- zkfc的配置 -->
\t<property>
\t\t<name>ha.zookeeper.quorum</name>
\t\t<value>nn1:2181,nn2:2181,nn3:2181</value>
\t\t<description>HA使用的zookeeper地址</description>
\t</property>
</configuration>
\n

修改hdfs-site.xml

1
vim /usr/local/hadoop/etc/hadoop/hdfs-site.xml 
\n\n
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>/data/namenode</value>
<description>namenode本地文件存放地址</description>
</property>

<property>
<name>dfs.nameservices</name>
<value>ns1</value>
<description>提供服务的NS逻辑名称,与core-site.xml里的对应</description>
</property>

<!-- namenode的配置 -->
<!-- 主要的 -->
<property>
<name>dfs.ha.namenodes.ns1</name>
<value>nn1,nn2,nn3</value>
<description>列出该逻辑名称下的NameNode逻辑名称</description>
</property>

<property>
<name>dfs.namenode.rpc-address.ns1.nn1</name>
<value>nn1:9000</value>
<description>指定NameNode的RPC位置</description>
</property>

<property>
<name>dfs.namenode.http-address.ns1.nn1</name>
<value>nn1:50070</value>
<description>指定NameNode的Web Server位置</description>
</property>

<property>
<name>dfs.namenode.rpc-address.ns1.nn2</name>
<value>nn2:9000</value>
<description>指定NameNode的RPC位置</description>
</property>

<property>
<name>dfs.namenode.http-address.ns1.nn2</name>
<value>nn2:50070</value>
<description>指定NameNode的Web Server位置</description>
</property>

<property>
<name>dfs.namenode.rpc-address.ns1.nn3</name>
<value>nn3:9000</value>
<description>指定NameNode的RPC位置</description>
</property>

<property>
<name>dfs.namenode.http-address.ns1.nn3</name>
<value>nn3:50070</value>
<description>指定NameNode的Web Server位置</description>
</property>

<property>
<name>dfs.namenode.handler.count</name>
<value>77</value>
<description>namenode的工作线程数</description>
</property>

<!-- journaldata配置,使得其他两个namenode同步第一个namenode数据 -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://nn1:8485;nn2:8485;nn3:8485/ns1</value>
<description>指定用于HA存放edits的共享存储,通常是namenode的所在机器</description>
</property>

<property>
<name>dfs.journalnode.edits.dir</name>
<value>/data/journaldata/</value>
<description>journaldata服务存放文件的地址</description>
</property>

<property>
<name>ipc.client.connect.max.retries</name>
<value>10</value>
<description>namenode和journalnode的链接重试次数10次</description>
</property>

<property>
<name>ipc.client.connect.retry.interval</name>
<value>10000</value>
<description>重试的间隔时间10s</description>
</property>

<!-- zkfc的配置 -->
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
<description>指定HA做隔离的方法,缺省是ssh,可设为shell,稍后详述</description>
</property>

<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/hadoop/.ssh/id_rsa</value>
<description>杀死命令脚本的免密配置秘钥</description>
</property>

<property>
<name>dfs.client.failover.proxy.provider.ns1</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
<description>指定客户端用于HA切换的代理类,不同的NS可以用不同的代理类以上示例为Hadoop 2.0自带的缺省代理类</description>
</property>

<property>
<name>dfs.client.failover.proxy.provider.auto-ha</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>

<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
\t<!-- datanode配置 -->
\t<property>
\t <name>dfs.datanode.data.dir</name>
\t <value>/data/datanode</value>
\t <description>datanode本地文件存放地址</description>
\t</property>
\t<property>
\t <name>dfs.replication</name>
\t <value>3</value>
\t <description>文件复本数</description>
\t</property>
\t<property>
\t <name>dfs.namenode.datanode.registration.ip-hostname-check</name>
\t <value>false</value>
\t</property>
\t<property>
\t <name>dfs.client.use.datanode.hostname</name>
\t <value>true</value>
\t</property>
\t<property>
\t <name>dfs.datanode.use.datanode.hostname</name>
\t <value>true</value>
\t</property>
</configuration>
\n

修改hadoop-env.sh

1
vim /usr/local/hadoop/etc/hadoop/hadoop-env.sh
\n\n
1
2
3
# 添加这两行
source /etc/profile
export HADOOP_HEAPSIZE_MAX=512
\n

分发这些配置文件

1
2
3
scp_all.sh /usr/local/hadoop/etc/hadoop/core-site.xml /usr/local/hadoop/etc/hadoop/
scp_all.sh /usr/local/hadoop/etc/hadoop/hdfs-site.xml /usr/local/hadoop/etc/hadoop/
scp_all.sh /usr/local/hadoop/etc/hadoop/hadoop-env.sh /usr/local/hadoop/etc/hadoop/
\n

集群初始化

\n

集群启动

1
start-dfs.sh
","cover":false,"excerpt":"","more":"

上传hadoop-3.1.4.tar.gz/tmp,解压

\n

注意在六台机器均上传到/tmp

\n
\n
1
2
3
4
5
# 在6台机器执行
sudo tar -zxvf /tmp/hadoop-3.1.4.tar.gz -C /usr/local/
# 分发到其他主机
ssh_root.sh chown -R hadoop:hadoop /usr/local/hadoop-3.1.4
ssh_root.sh ln -s /usr/local/hadoop-3.1.4/ /usr/local/hadoop
\n

配置环境变量

1
2
3
echo 'export HADOOP_HOME=/usr/local/hadoop' >> /etc/profile.d/myEnv.sh
echo 'export PATH=$PATH:$HADOOP_HOME/bin' >> /etc/profile.d/myEnv.sh
echo 'export PATH=$PATH:$HADOOP_HOME/sbin' >> /etc/profile.d/myEnv.sh
\n\n
1
2
3
4
# 分发到nn2、nn3、s1、s2、s3
scp_all.sh /etc/profile.d/myEnv.sh /etc/profile.d/
# source 环境变量
ssh_root.sh source /etc/profile
\n
\n

还需要创建 /data这个目录,由于nn1、nn2、nn3已经创建/data,其他三台需要创建一下

\n
\n
1
2
3
### 在s1、s2、s3执行
sudo mkdir /data
sudo chown -R hadoop:hadoop /data
\n\n

修改core-site.xml

1
vim /usr/local/hadoop/etc/hadoop/core-site.xml 
\n\n
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
<configuration>
\t<property>
\t <name>fs.defaultFS</name>
\t <value>hdfs://ns1</value>
\t <description>默认文件服务的协议和NS逻辑名称,和hdfs-site.xml里的对应此配置替代了1.0里的fs.default.name</description>
\t</property>
\t
\t<property>
\t <name>hadoop.tmp.dir</name>
\t <value>/data/tmp</value>
\t <description>数据存储目录</description>
\t</property>
\t
\t<property>
\t <name>hadoop.proxyuser.root.groups</name>
\t <value>hadoop</value>
\t <description>
\t hdfs dfsadmin –refreshSuperUserGroupsConfiguration,
\t yarn rmadmin –refreshSuperUserGroupsConfiguration
\t 使用这两个命令不用重启就能刷新
\t </description>
\t</property>
\t
\t<property>
\t <name>hadoop.proxyuser.root.hosts</name>
\t <value>localhost</value>
\t <description>本地代理</description>
\t</property>
\t
\t<!-- zkfc的配置 -->
\t<property>
\t\t<name>ha.zookeeper.quorum</name>
\t\t<value>nn1:2181,nn2:2181,nn3:2181</value>
\t\t<description>HA使用的zookeeper地址</description>
\t</property>
</configuration>
\n

修改hdfs-site.xml

1
vim /usr/local/hadoop/etc/hadoop/hdfs-site.xml 
\n\n
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>/data/namenode</value>
<description>namenode本地文件存放地址</description>
</property>

<property>
<name>dfs.nameservices</name>
<value>ns1</value>
<description>提供服务的NS逻辑名称,与core-site.xml里的对应</description>
</property>

<!-- namenode的配置 -->
<!-- 主要的 -->
<property>
<name>dfs.ha.namenodes.ns1</name>
<value>nn1,nn2,nn3</value>
<description>列出该逻辑名称下的NameNode逻辑名称</description>
</property>

<property>
<name>dfs.namenode.rpc-address.ns1.nn1</name>
<value>nn1:9000</value>
<description>指定NameNode的RPC位置</description>
</property>

<property>
<name>dfs.namenode.http-address.ns1.nn1</name>
<value>nn1:50070</value>
<description>指定NameNode的Web Server位置</description>
</property>

<property>
<name>dfs.namenode.rpc-address.ns1.nn2</name>
<value>nn2:9000</value>
<description>指定NameNode的RPC位置</description>
</property>

<property>
<name>dfs.namenode.http-address.ns1.nn2</name>
<value>nn2:50070</value>
<description>指定NameNode的Web Server位置</description>
</property>

<property>
<name>dfs.namenode.rpc-address.ns1.nn3</name>
<value>nn3:9000</value>
<description>指定NameNode的RPC位置</description>
</property>

<property>
<name>dfs.namenode.http-address.ns1.nn3</name>
<value>nn3:50070</value>
<description>指定NameNode的Web Server位置</description>
</property>

<property>
<name>dfs.namenode.handler.count</name>
<value>77</value>
<description>namenode的工作线程数</description>
</property>

<!-- journaldata配置,使得其他两个namenode同步第一个namenode数据 -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://nn1:8485;nn2:8485;nn3:8485/ns1</value>
<description>指定用于HA存放edits的共享存储,通常是namenode的所在机器</description>
</property>

<property>
<name>dfs.journalnode.edits.dir</name>
<value>/data/journaldata/</value>
<description>journaldata服务存放文件的地址</description>
</property>

<property>
<name>ipc.client.connect.max.retries</name>
<value>10</value>
<description>namenode和journalnode的链接重试次数10次</description>
</property>

<property>
<name>ipc.client.connect.retry.interval</name>
<value>10000</value>
<description>重试的间隔时间10s</description>
</property>

<!-- zkfc的配置 -->
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
<description>指定HA做隔离的方法,缺省是ssh,可设为shell,稍后详述</description>
</property>

<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/hadoop/.ssh/id_rsa</value>
<description>杀死命令脚本的免密配置秘钥</description>
</property>

<property>
<name>dfs.client.failover.proxy.provider.ns1</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
<description>指定客户端用于HA切换的代理类,不同的NS可以用不同的代理类以上示例为Hadoop 2.0自带的缺省代理类</description>
</property>

<property>
<name>dfs.client.failover.proxy.provider.auto-ha</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>

<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
\t<!-- datanode配置 -->
\t<property>
\t <name>dfs.datanode.data.dir</name>
\t <value>/data/datanode</value>
\t <description>datanode本地文件存放地址</description>
\t</property>
\t<property>
\t <name>dfs.replication</name>
\t <value>3</value>
\t <description>文件复本数</description>
\t</property>
\t<property>
\t <name>dfs.namenode.datanode.registration.ip-hostname-check</name>
\t <value>false</value>
\t</property>
\t<property>
\t <name>dfs.client.use.datanode.hostname</name>
\t <value>true</value>
\t</property>
\t<property>
\t <name>dfs.datanode.use.datanode.hostname</name>
\t <value>true</value>
\t</property>
</configuration>
\n

修改hadoop-env.sh

1
vim /usr/local/hadoop/etc/hadoop/hadoop-env.sh
\n\n
1
2
3
# 添加这两行
source /etc/profile
export HADOOP_HEAPSIZE_MAX=512
\n

分发这些配置文件

1
2
3
scp_all.sh /usr/local/hadoop/etc/hadoop/core-site.xml /usr/local/hadoop/etc/hadoop/
scp_all.sh /usr/local/hadoop/etc/hadoop/hdfs-site.xml /usr/local/hadoop/etc/hadoop/
scp_all.sh /usr/local/hadoop/etc/hadoop/hadoop-env.sh /usr/local/hadoop/etc/hadoop/
\n

集群初始化

\n

集群启动

1
start-dfs.sh
"},{"title":"uniapp 开发","abbrlink":58817,"date":"2024-08-05T06:07:01.000Z","_content":"- [uniapp component](../component1)","source":"_posts/frontend/uniapp/uniapp.md","raw":"---\ntitle: uniapp 开发\ntags: uniapp\nabbrlink: 58817\ndate: 2024-08-05 14:07:01\n---\n- [uniapp component](../component1)","slug":"frontend/uniapp/uniapp","published":1,"updated":"2024-08-09T12:21:50.039Z","comments":1,"layout":"post","photos":[],"_id":"cmgsv0jnq00143wahef4aabqf","content":"\n","cover":false,"excerpt":"","more":"\n"},{"title":"Hadoop集群Zookeeper配置","top_img":"/img/site01.jpg","top_img_height":"800px","abbrlink":61251,"date":"2024-09-11T14:45:40.000Z","_content":"\n### Zookeeper脚本配置\n- 拷贝ips\n ```bash\n cp ips ips_zookeeper\n ```\n 修改为\n ```bash\n nn1\n nn2\n nn3\n ```\n- 拷贝三个脚本\n ```bash\n cp scp_all.sh scp_all_zookeeper.sh \n cp ssh_all.sh ssh_all_zookeeper.sh \n cp ssh_root.sh ssh_root_zookeeper.sh\n ```\n- 修改脚本\n ```shell\n vim scp_all_zookeeper.sh \n vim ssh_all_zookeeper.sh \n vim ssh_root_zookeeper.sh \n ```\n\n 将三个脚本中的ips改为ips_zookeeper\n### Zookeeper安装\n- 上传到`/tmp`目录下,解压\n ```bash\n sudo tar -zxvf /tmp/zookeeper-3.4.8.tar.gz -C /usr/local/\n scp -r /usr/local/zookeeper-3.4.8/ root@nn2:/usr/local/\n scp -r /usr/local/zookeeper-3.4.8/ root@nn3:/usr/local/\n ssh_root_zookeeper.sh chown -R hadoop:hadoop /usr/local/zookeeper-3.4.8\n ssh_root_zookeeper.s ln -s /usr/local/zookeeper-3.4.8/ /usr/local/zookeeper\n ```\n### Zookeeper配置\n - zoo.cfg配置\n ```bash\n cd /usr/local/zookeeper/conf/\n cp zoo_sample.cfg zoo.cfg\n ```\n 然后`vim zoo.cfg`,修改如下:\n ```properties\n # 修改dataDir\n dataDir=/data/zookeeper\n # 添加一下内容\n server.1=nn1:2888:3888 \n server.2=nn2:2888:3888 \n server.3=nn3:2888:3888\n ```\n 分发给nn2、nn3\n `scp_all_zookeeper.sh /usr/local/zookeeper/conf/zoo.cfg /usr/local/zookeeper/conf/`\n\n- `zkEnv.sh`配置\n `vim /usr/local/zookeeper/bin/zkEnv.sh`\n ![000001.png](..%2F..%2F..%2Fimg%2F000001.png)\n 分发到nn2、nn3\n```bash\nscp_all_zookeeper.sh /usr/local/zookeeper/bin/zkEnv.sh /usr/local/zookeeper/bin/\n```\n- 创建zookeeper数据目录\n```bash\nssh_root_zookeeper.sh mkdir -p /data/zookeeper\nssh_root_zookeeper.sh chown -R hadoop:hadoop /data\n```\n- 创建myid文件\n ```bash\n ssh nn1 'echo \"1\" > /data/zookeeper/myid'\n ssh nn2 'echo \"2\" > /data/zookeeper/myid'\n ssh nn3 'echo \"3\" > /data/zookeeper/myid'\n ```\n- 配置Zookeeper环境变量\n ```bash\n # 在其他所有主机也执行\n sudo chown -R hadoop:hadoop /etc/profile.d/myEnv.sh\n ```\n\n ```bash\n echo 'export ZOOKEEPER_HOME=/usr/local/zookeeper' >> /etc/profile.d/myEnv.sh\n echo 'export PATH=$PATH:$ZOOKEEPER_HOME/bin' >> /etc/profile.d/myEnv.sh\n ```\n\n ```bash\n # 分发到nn2、nn3\n scp_all_zookeeper.sh /etc/profile.d/myEnv.sh /etc/profile.d/\n # source 环境变量\n ssh_all_zookeeper.sh source /etc/profile\n ```\n### Zookeeper的命令\n```bash\n#启动zk服务\nssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh start\n#查看每个机器ZK运行的状态\nssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh status\n#整体停止服务\nssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh stop \n#重启zk服务\nssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh restart\n```\n\n```shell\n#启动zookeeper客户端,并连接zookeeper集群\n/usr/local/zookeeper/bin/zkCli.sh -server nn1:2181\n# 可以简化为:\nzkCli.sh\n```\n\n","source":"_posts/bigdata/hadoop/zookeper.md","raw":"---\ntitle: Hadoop集群Zookeeper配置\ntop_img: /img/site01.jpg\ntop_img_height: 800px\nabbrlink: 61251\ndate: 2024-09-011 22:45:40\n---\n\n### Zookeeper脚本配置\n- 拷贝ips\n ```bash\n cp ips ips_zookeeper\n ```\n 修改为\n ```bash\n nn1\n nn2\n nn3\n ```\n- 拷贝三个脚本\n ```bash\n cp scp_all.sh scp_all_zookeeper.sh \n cp ssh_all.sh ssh_all_zookeeper.sh \n cp ssh_root.sh ssh_root_zookeeper.sh\n ```\n- 修改脚本\n ```shell\n vim scp_all_zookeeper.sh \n vim ssh_all_zookeeper.sh \n vim ssh_root_zookeeper.sh \n ```\n\n 将三个脚本中的ips改为ips_zookeeper\n### Zookeeper安装\n- 上传到`/tmp`目录下,解压\n ```bash\n sudo tar -zxvf /tmp/zookeeper-3.4.8.tar.gz -C /usr/local/\n scp -r /usr/local/zookeeper-3.4.8/ root@nn2:/usr/local/\n scp -r /usr/local/zookeeper-3.4.8/ root@nn3:/usr/local/\n ssh_root_zookeeper.sh chown -R hadoop:hadoop /usr/local/zookeeper-3.4.8\n ssh_root_zookeeper.s ln -s /usr/local/zookeeper-3.4.8/ /usr/local/zookeeper\n ```\n### Zookeeper配置\n - zoo.cfg配置\n ```bash\n cd /usr/local/zookeeper/conf/\n cp zoo_sample.cfg zoo.cfg\n ```\n 然后`vim zoo.cfg`,修改如下:\n ```properties\n # 修改dataDir\n dataDir=/data/zookeeper\n # 添加一下内容\n server.1=nn1:2888:3888 \n server.2=nn2:2888:3888 \n server.3=nn3:2888:3888\n ```\n 分发给nn2、nn3\n `scp_all_zookeeper.sh /usr/local/zookeeper/conf/zoo.cfg /usr/local/zookeeper/conf/`\n\n- `zkEnv.sh`配置\n `vim /usr/local/zookeeper/bin/zkEnv.sh`\n ![000001.png](..%2F..%2F..%2Fimg%2F000001.png)\n 分发到nn2、nn3\n```bash\nscp_all_zookeeper.sh /usr/local/zookeeper/bin/zkEnv.sh /usr/local/zookeeper/bin/\n```\n- 创建zookeeper数据目录\n```bash\nssh_root_zookeeper.sh mkdir -p /data/zookeeper\nssh_root_zookeeper.sh chown -R hadoop:hadoop /data\n```\n- 创建myid文件\n ```bash\n ssh nn1 'echo \"1\" > /data/zookeeper/myid'\n ssh nn2 'echo \"2\" > /data/zookeeper/myid'\n ssh nn3 'echo \"3\" > /data/zookeeper/myid'\n ```\n- 配置Zookeeper环境变量\n ```bash\n # 在其他所有主机也执行\n sudo chown -R hadoop:hadoop /etc/profile.d/myEnv.sh\n ```\n\n ```bash\n echo 'export ZOOKEEPER_HOME=/usr/local/zookeeper' >> /etc/profile.d/myEnv.sh\n echo 'export PATH=$PATH:$ZOOKEEPER_HOME/bin' >> /etc/profile.d/myEnv.sh\n ```\n\n ```bash\n # 分发到nn2、nn3\n scp_all_zookeeper.sh /etc/profile.d/myEnv.sh /etc/profile.d/\n # source 环境变量\n ssh_all_zookeeper.sh source /etc/profile\n ```\n### Zookeeper的命令\n```bash\n#启动zk服务\nssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh start\n#查看每个机器ZK运行的状态\nssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh status\n#整体停止服务\nssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh stop \n#重启zk服务\nssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh restart\n```\n\n```shell\n#启动zookeeper客户端,并连接zookeeper集群\n/usr/local/zookeeper/bin/zkCli.sh -server nn1:2181\n# 可以简化为:\nzkCli.sh\n```\n\n","slug":"bigdata/hadoop/zookeper","published":1,"updated":"2024-09-11T14:51:42.706Z","comments":1,"layout":"post","photos":[],"_id":"cmgsv0jnq00153wahgjyk8zy3","content":"

Zookeeper脚本配置

\n

Zookeeper安装

\n

Zookeeper配置

\n

Zookeeper的命令

1
2
3
4
5
6
7
8
#启动zk服务
ssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh start
#查看每个机器ZK运行的状态
ssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh status
#整体停止服务
ssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh stop
#重启zk服务
ssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh restart
\n\n
1
2
3
4
#启动zookeeper客户端,并连接zookeeper集群
/usr/local/zookeeper/bin/zkCli.sh -server nn1:2181
# 可以简化为:
zkCli.sh
\n\n","cover":false,"excerpt":"","more":"

Zookeeper脚本配置

\n

Zookeeper安装

\n

Zookeeper配置

\n

Zookeeper的命令

1
2
3
4
5
6
7
8
#启动zk服务
ssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh start
#查看每个机器ZK运行的状态
ssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh status
#整体停止服务
ssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh stop
#重启zk服务
ssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh restart
\n\n
1
2
3
4
#启动zookeeper客户端,并连接zookeeper集群
/usr/local/zookeeper/bin/zkCli.sh -server nn1:2181
# 可以简化为:
zkCli.sh
\n\n"},{"title":"无法访问外网?需要订阅代理服务?","abbrlink":14011,"date":"2024-08-07T02:06:08.000Z","_content":"\n{% note info %}\n**由于中国大陆的GFW(防火墙)限制,无法访问外网网络,因此需要访问像GitHub、YouTube这样的\n的网站将被屏蔽拦截,接下来我将给出一种使用`VPN`服务的可行的方案来保证服务的可靠性。**\n{% endnote %}\n\n### 介绍\n> 根据测试,许多提供服务的提供商所在的网站需要使用`外部网络`才能打开,仅有少部分的网站(**比较贵**)可以直接使用\n> 国内网络环境打开直接购买订阅服务。\n>\n\n那么你现在可以有两个选择:\n1. **方案一**:使用无需`外部网络`便能开通订阅服务的VPN,费用高,如果你选择此方案,那么你可自行搜索解决,此处仅仅讨论方案二。\n2. **方案二**:如果使用此方案,详见下方。\n\n\n### 解决方案\n> 采用**方案二**方式\n> \n> 这是一些订阅服务推广的链接: https://9.234456.xyz/abc.html?t=638586217737356738 (此链接打开无需使用VPN,但进入对应的机场页面却仍无法打开)\n> \n> 此教程中我们使用的机场是 \n> 1. `一元机场`: https://xn--4gq62f52gdss.com/\n> 2. `赔钱机场`:https://xn--mes358aby2apfg.com/\n\n### 机场选择的建议:\n\n- `一元机场`\n ![](/img/yiyuan.png)\n 可以看到\n - `12元/年`,每月50GB的可用额度,下个月重置流量额度\n - `15元/季`,即为`60元/年`,每月有4000GB的可用额度,下个月重置流量额度\n - `7元/月`,即为`84元/年`,每个月8000GB的可用额度,下个月重置流量额度\n 根据我个人的使用情况,大多数情况下我每月的流量使用未超过50GB,如果没有频繁的流量使用,\n 建议选择`12元/年`,否则可以选择`15元/季`,这意味着每月将有4000GB的可用额度\n\n- `赔钱机场`\n\n ![](/img/peiqian.png)\n 可以看到\n - `18元/年`,每月100GB的可用额度,允许最多10个设备同时在线,下个月重置流量额度\n - `34.99元/年`,每月有500GB的可用额度,允许最多15个设备同时在线,下个月重置流量额度\n - `68.99元/年`,每个月1000GB的可用额度,允许最多20个设备同时在线,下个月重置流量额度\n - 其余可以自行查看\n\n `赔钱机场`的订阅共有5种方案(按周期付费),这里我仅显示自己正在使用的,个人认为十分优惠:\n - `34.99元/年`,每月500GB的可用额度,根据我观察和使用,这个订阅方案比`一元机场`的性价比更高,且流量使用额度也不用担心\n \n### 如何订阅?\n{% note success %}\n由于需要外部网络才能完成订阅服务的购买,你可以向我的邮箱`15202078626@163.com`发送你的订阅计划方案,\n扫描付款二维码,我将为你开通订阅(您只需要付款对应的订阅金额即可)\n{% endnote %}\n\n\n### 完成订阅后如何使用?\n> 你可以在 `Windows`、`Mac`、`Android`等平台使用此服务\n> 使用订阅的对应链接: https://flowus.cn/shenjian/22f76d4f-e7b3-4b8a-8a89-561566f6eb60\n\n\n ","source":"_posts/net/jichang/jichang.md","raw":"---\ntitle: 无法访问外网?需要订阅代理服务?\ntags: 网络代理\nabbrlink: 14011\ndate: 2024-08-07 10:06:08\n---\n\n{% note info %}\n**由于中国大陆的GFW(防火墙)限制,无法访问外网网络,因此需要访问像GitHub、YouTube这样的\n的网站将被屏蔽拦截,接下来我将给出一种使用`VPN`服务的可行的方案来保证服务的可靠性。**\n{% endnote %}\n\n### 介绍\n> 根据测试,许多提供服务的提供商所在的网站需要使用`外部网络`才能打开,仅有少部分的网站(**比较贵**)可以直接使用\n> 国内网络环境打开直接购买订阅服务。\n>\n\n那么你现在可以有两个选择:\n1. **方案一**:使用无需`外部网络`便能开通订阅服务的VPN,费用高,如果你选择此方案,那么你可自行搜索解决,此处仅仅讨论方案二。\n2. **方案二**:如果使用此方案,详见下方。\n\n\n### 解决方案\n> 采用**方案二**方式\n> \n> 这是一些订阅服务推广的链接: https://9.234456.xyz/abc.html?t=638586217737356738 (此链接打开无需使用VPN,但进入对应的机场页面却仍无法打开)\n> \n> 此教程中我们使用的机场是 \n> 1. `一元机场`: https://xn--4gq62f52gdss.com/\n> 2. `赔钱机场`:https://xn--mes358aby2apfg.com/\n\n### 机场选择的建议:\n\n- `一元机场`\n ![](/img/yiyuan.png)\n 可以看到\n - `12元/年`,每月50GB的可用额度,下个月重置流量额度\n - `15元/季`,即为`60元/年`,每月有4000GB的可用额度,下个月重置流量额度\n - `7元/月`,即为`84元/年`,每个月8000GB的可用额度,下个月重置流量额度\n 根据我个人的使用情况,大多数情况下我每月的流量使用未超过50GB,如果没有频繁的流量使用,\n 建议选择`12元/年`,否则可以选择`15元/季`,这意味着每月将有4000GB的可用额度\n\n- `赔钱机场`\n\n ![](/img/peiqian.png)\n 可以看到\n - `18元/年`,每月100GB的可用额度,允许最多10个设备同时在线,下个月重置流量额度\n - `34.99元/年`,每月有500GB的可用额度,允许最多15个设备同时在线,下个月重置流量额度\n - `68.99元/年`,每个月1000GB的可用额度,允许最多20个设备同时在线,下个月重置流量额度\n - 其余可以自行查看\n\n `赔钱机场`的订阅共有5种方案(按周期付费),这里我仅显示自己正在使用的,个人认为十分优惠:\n - `34.99元/年`,每月500GB的可用额度,根据我观察和使用,这个订阅方案比`一元机场`的性价比更高,且流量使用额度也不用担心\n \n### 如何订阅?\n{% note success %}\n由于需要外部网络才能完成订阅服务的购买,你可以向我的邮箱`15202078626@163.com`发送你的订阅计划方案,\n扫描付款二维码,我将为你开通订阅(您只需要付款对应的订阅金额即可)\n{% endnote %}\n\n\n### 完成订阅后如何使用?\n> 你可以在 `Windows`、`Mac`、`Android`等平台使用此服务\n> 使用订阅的对应链接: https://flowus.cn/shenjian/22f76d4f-e7b3-4b8a-8a89-561566f6eb60\n\n\n ","slug":"net/jichang/jichang","published":1,"updated":"2025-10-16T03:18:44.708Z","comments":1,"layout":"post","photos":[],"_id":"cmgsv0jnr00173wah6pwef0gc","content":"

由于中国大陆的GFW(防火墙)限制,无法访问外网网络,因此需要访问像GitHub、YouTube这样的
的网站将被屏蔽拦截,接下来我将给出一种使用VPN服务的可行的方案来保证服务的可靠性。

\n
\n\n

介绍

\n

根据测试,许多提供服务的提供商所在的网站需要使用外部网络才能打开,仅有少部分的网站(比较贵)可以直接使用
国内网络环境打开直接购买订阅服务。

\n
\n

那么你现在可以有两个选择:

\n
    \n
  1. 方案一:使用无需外部网络便能开通订阅服务的VPN,费用高,如果你选择此方案,那么你可自行搜索解决,此处仅仅讨论方案二。
  2. \n
  3. 方案二:如果使用此方案,详见下方。
  4. \n
\n

解决方案

\n

采用方案二方式

\n

这是一些订阅服务推广的链接: https://9.234456.xyz/abc.html?t=638586217737356738 (此链接打开无需使用VPN,但进入对应的机场页面却仍无法打开)

\n

此教程中我们使用的机场是

\n
    \n
  1. 一元机场: https://xn--4gq62f52gdss.com/
  2. \n
  3. 赔钱机场https://xn--mes358aby2apfg.com/
  4. \n
\n
\n

机场选择的建议:

\n

如何订阅?

由于需要外部网络才能完成订阅服务的购买,你可以向我的邮箱15202078626@163.com发送你的订阅计划方案,
扫描付款二维码,我将为你开通订阅(您只需要付款对应的订阅金额即可)

\n
\n\n\n

完成订阅后如何使用?

\n

你可以在 WindowsMacAndroid等平台使用此服务
使用订阅的对应链接: https://flowus.cn/shenjian/22f76d4f-e7b3-4b8a-8a89-561566f6eb60

\n
\n","cover":false,"excerpt":"","more":"

由于中国大陆的GFW(防火墙)限制,无法访问外网网络,因此需要访问像GitHub、YouTube这样的
的网站将被屏蔽拦截,接下来我将给出一种使用VPN服务的可行的方案来保证服务的可靠性。

\n
\n\n

介绍

\n

根据测试,许多提供服务的提供商所在的网站需要使用外部网络才能打开,仅有少部分的网站(比较贵)可以直接使用
国内网络环境打开直接购买订阅服务。

\n
\n

那么你现在可以有两个选择:

\n
    \n
  1. 方案一:使用无需外部网络便能开通订阅服务的VPN,费用高,如果你选择此方案,那么你可自行搜索解决,此处仅仅讨论方案二。
  2. \n
  3. 方案二:如果使用此方案,详见下方。
  4. \n
\n

解决方案

\n

采用方案二方式

\n

这是一些订阅服务推广的链接: https://9.234456.xyz/abc.html?t=638586217737356738 (此链接打开无需使用VPN,但进入对应的机场页面却仍无法打开)

\n

此教程中我们使用的机场是

\n
    \n
  1. 一元机场: https://xn--4gq62f52gdss.com/
  2. \n
  3. 赔钱机场https://xn--mes358aby2apfg.com/
  4. \n
\n
\n

机场选择的建议:

\n

如何订阅?

由于需要外部网络才能完成订阅服务的购买,你可以向我的邮箱15202078626@163.com发送你的订阅计划方案,
扫描付款二维码,我将为你开通订阅(您只需要付款对应的订阅金额即可)

\n
\n\n\n

完成订阅后如何使用?

\n

你可以在 WindowsMacAndroid等平台使用此服务
使用订阅的对应链接: https://flowus.cn/shenjian/22f76d4f-e7b3-4b8a-8a89-561566f6eb60

\n
\n"},{"title":"郑伯克段于鄢","abbrlink":58638,"date":"2024-08-09T12:00:13.000Z","_content":"原文如下:\n\n      初,郑武公娶于申【申国】,曰武姜【武为武公谥号,姜为其宗族之性】。生庄公及共叔段【共表示其曾出逃到共,叔为老三,段为名】。庄公寤生【出生时头先出,难产】,惊姜氏,故名曰“寤生”, 遂恶之,爱【喜爱】共叔段,欲立【立为储君】之,亟(qì)【多次】请于武公,及庄公即位,为之【共叔段】请制【一个叫制的封地,虎牢关所在】。公曰:“制,岩邑【险要的城邑】也,虢叔死焉,佗【通“他”,其他】邑唯命(是听)。”请京,使居之,谓之“京城大叔”。","source":"_posts/ancient/guwenguanzhi/1.md","raw":"---\ntitle: 郑伯克段于鄢\ntags: 古文观止\ncategories:\n - 古文观止\nabbrlink: 58638\ndate: 2024-08-09 20:00:13\n---\n原文如下:\n\n      初,郑武公娶于申【申国】,曰武姜【武为武公谥号,姜为其宗族之性】。生庄公及共叔段【共表示其曾出逃到共,叔为老三,段为名】。庄公寤生【出生时头先出,难产】,惊姜氏,故名曰“寤生”, 遂恶之,爱【喜爱】共叔段,欲立【立为储君】之,亟(qì)【多次】请于武公,及庄公即位,为之【共叔段】请制【一个叫制的封地,虎牢关所在】。公曰:“制,岩邑【险要的城邑】也,虢叔死焉,佗【通“他”,其他】邑唯命(是听)。”请京,使居之,谓之“京城大叔”。","slug":"ancient/guwenguanzhi/1","published":1,"updated":"2024-08-10T02:31:03.678Z","comments":1,"layout":"post","photos":[],"_id":"cmgsv0jnr00193wah5y5z0hai","content":"

原文如下:

\n

      初,郑武公娶于申【申国】,曰武姜【武为武公谥号,姜为其宗族之性】。生庄公及共叔段【共表示其曾出逃到共,叔为老三,段为名】。庄公寤生【出生时头先出,难产】,惊姜氏,故名曰“寤生”, 遂恶之,爱【喜爱】共叔段,欲立【立为储君】之,亟(qì)【多次】请于武公,及庄公即位,为之【共叔段】请制【一个叫制的封地,虎牢关所在】。公曰:“制,岩邑【险要的城邑】也,虢叔死焉,佗【通“他”,其他】邑唯命(是听)。”请京,使居之,谓之“京城大叔”。

\n","cover":false,"excerpt":"","more":"

原文如下:

\n

      初,郑武公娶于申【申国】,曰武姜【武为武公谥号,姜为其宗族之性】。生庄公及共叔段【共表示其曾出逃到共,叔为老三,段为名】。庄公寤生【出生时头先出,难产】,惊姜氏,故名曰“寤生”, 遂恶之,爱【喜爱】共叔段,欲立【立为储君】之,亟(qì)【多次】请于武公,及庄公即位,为之【共叔段】请制【一个叫制的封地,虎牢关所在】。公曰:“制,岩邑【险要的城邑】也,虢叔死焉,佗【通“他”,其他】邑唯命(是听)。”请京,使居之,谓之“京城大叔”。

\n"},{"title":"Docker被墙,如何继续使用?","top_img":"/img/site01.jpg","top_img_height":"800px","abbrlink":47807,"date":"2024-08-01T01:10:40.000Z","_content":"\n## Docker Download\n> 自从docker官方仓库在中国大陆被墙后,docker的部署方式也发生了改变。\n> 解决docker安装问题:https://github.com/shenjianZ/docker_installer\n\n1. 安装docker \n ```shell\n sudo curl -fsSL https://gitee.com/tech-shrimp/docker_installer/releases/download/latest/linux.sh| bash -s docker --mirror Aliyun\n ```\n \n2. 启动docker\n ```shell\n sudo systemctl start docker\n ```\n \n3. 设置开机自启\n ```shell\n sudo systemctl enable docker\n ```\n \n4. Docker pull images\n > 将image下载到阿里云镜像仓库中\n > 解决docker pull 镜像问题:https://github.com/shenjianZ/docker_image_pusher\n \n 1. **登录阿里云镜像服务** https://cr.console.aliyun.com/,\n\n 启用个人实例,创建一个命名空间(`ALIYUN_NAME_SPACE`)\n\n 2. 在**访问凭证** 可以看到账号 用户名(`ALIYUN_REGISTRY_USER`)\n\n 密码(`ALIYUN_REGISTRY_PASSWORD`)\n\n 仓库地址(`ALIYUN_REGISTRY`)\n\n 3. **启动Action**\n 进入您自己的项目,点击`Action`,启用`Github Action`功能\n \n 4. **配置环境变量**\n 进入Settings->Secret and variables->Actions->New Repository secret\n 将上一步的四个值\n `ALIYUN_NAME_SPACE`,\n \n ` ALIYUN_REGISTRY_USER`,\n \n `ALIYUN_REGISTRY_PASSWORD`,\n \n `ALIYUN_REGISTRY`\n 配置成环境变量\n \n 5. **添加镜像**\n 打开`images.txt`文件,添加你想要的镜像 可以加tag\n \n 6. 使用镜像\n 回到阿里云,镜像仓库,点击任意镜像,可查看镜像状态。(可以改成公开,拉取镜像免登录)","source":"_posts/frontend/deploy/deploy.md","raw":"---\ntitle: Docker被墙,如何继续使用?\ntop_img: /img/site01.jpg\ntop_img_height: 800px\nabbrlink: 47807\ndate: 2024-08-01 09:10:40\ntags:\n---\n\n## Docker Download\n> 自从docker官方仓库在中国大陆被墙后,docker的部署方式也发生了改变。\n> 解决docker安装问题:https://github.com/shenjianZ/docker_installer\n\n1. 安装docker \n ```shell\n sudo curl -fsSL https://gitee.com/tech-shrimp/docker_installer/releases/download/latest/linux.sh| bash -s docker --mirror Aliyun\n ```\n \n2. 启动docker\n ```shell\n sudo systemctl start docker\n ```\n \n3. 设置开机自启\n ```shell\n sudo systemctl enable docker\n ```\n \n4. Docker pull images\n > 将image下载到阿里云镜像仓库中\n > 解决docker pull 镜像问题:https://github.com/shenjianZ/docker_image_pusher\n \n 1. **登录阿里云镜像服务** https://cr.console.aliyun.com/,\n\n 启用个人实例,创建一个命名空间(`ALIYUN_NAME_SPACE`)\n\n 2. 在**访问凭证** 可以看到账号 用户名(`ALIYUN_REGISTRY_USER`)\n\n 密码(`ALIYUN_REGISTRY_PASSWORD`)\n\n 仓库地址(`ALIYUN_REGISTRY`)\n\n 3. **启动Action**\n 进入您自己的项目,点击`Action`,启用`Github Action`功能\n \n 4. **配置环境变量**\n 进入Settings->Secret and variables->Actions->New Repository secret\n 将上一步的四个值\n `ALIYUN_NAME_SPACE`,\n \n ` ALIYUN_REGISTRY_USER`,\n \n `ALIYUN_REGISTRY_PASSWORD`,\n \n `ALIYUN_REGISTRY`\n 配置成环境变量\n \n 5. **添加镜像**\n 打开`images.txt`文件,添加你想要的镜像 可以加tag\n \n 6. 使用镜像\n 回到阿里云,镜像仓库,点击任意镜像,可查看镜像状态。(可以改成公开,拉取镜像免登录)","slug":"frontend/deploy/deploy","published":1,"updated":"2024-08-09T12:21:50.045Z","comments":1,"layout":"post","photos":[],"_id":"cmgsv0jns001b3wah7t2y19hx","content":"

Docker Download

\n

自从docker官方仓库在中国大陆被墙后,docker的部署方式也发生了改变。
解决docker安装问题:https://github.com/shenjianZ/docker_installer

\n
\n
    \n
  1. 安装docker

    \n
    1
    sudo curl -fsSL https://gitee.com/tech-shrimp/docker_installer/releases/download/latest/linux.sh| bash -s docker --mirror Aliyun
    \n
  2. \n
  3. 启动docker

    \n
    1
    sudo systemctl start docker
    \n
  4. \n
  5. 设置开机自启

    \n
    1
    sudo systemctl enable docker
    \n
  6. \n
  7. Docker pull images

    \n
    \n

    将image下载到阿里云镜像仓库中
    解决docker pull 镜像问题:https://github.com/shenjianZ/docker_image_pusher

    \n
    \n
      \n
    1. 登录阿里云镜像服务 https://cr.console.aliyun.com/,

      \n

      启用个人实例,创建一个命名空间(ALIYUN_NAME_SPACE

      \n
    2. \n
    3. 访问凭证 可以看到账号 用户名(ALIYUN_REGISTRY_USER)

      \n

      密码(ALIYUN_REGISTRY_PASSWORD)

      \n

      仓库地址(ALIYUN_REGISTRY

      \n
    4. \n
    5. 启动Action
      进入您自己的项目,点击Action,启用Github Action功能

      \n
    6. \n
    7. 配置环境变量
      进入Settings->Secret and variables->Actions->New Repository secret
      将上一步的四个值
      ALIYUN_NAME_SPACE,

      \n

      ALIYUN_REGISTRY_USER

      \n

      ALIYUN_REGISTRY_PASSWORD

      \n

      ALIYUN_REGISTRY
      配置成环境变量

      \n
    8. \n
    9. 添加镜像
      打开images.txt文件,添加你想要的镜像 可以加tag

      \n
    10. \n
    11. 使用镜像
      回到阿里云,镜像仓库,点击任意镜像,可查看镜像状态。(可以改成公开,拉取镜像免登录)

      \n
    12. \n
    \n
  8. \n
\n","cover":false,"excerpt":"","more":"

Docker Download

\n

自从docker官方仓库在中国大陆被墙后,docker的部署方式也发生了改变。
解决docker安装问题:https://github.com/shenjianZ/docker_installer

\n
\n
    \n
  1. 安装docker

    \n
    1
    sudo curl -fsSL https://gitee.com/tech-shrimp/docker_installer/releases/download/latest/linux.sh| bash -s docker --mirror Aliyun
    \n
  2. \n
  3. 启动docker

    \n
    1
    sudo systemctl start docker
    \n
  4. \n
  5. 设置开机自启

    \n
    1
    sudo systemctl enable docker
    \n
  6. \n
  7. Docker pull images

    \n
    \n

    将image下载到阿里云镜像仓库中
    解决docker pull 镜像问题:https://github.com/shenjianZ/docker_image_pusher

    \n
    \n
      \n
    1. 登录阿里云镜像服务 https://cr.console.aliyun.com/,

      \n

      启用个人实例,创建一个命名空间(ALIYUN_NAME_SPACE

      \n
    2. \n
    3. 访问凭证 可以看到账号 用户名(ALIYUN_REGISTRY_USER)

      \n

      密码(ALIYUN_REGISTRY_PASSWORD)

      \n

      仓库地址(ALIYUN_REGISTRY

      \n
    4. \n
    5. 启动Action
      进入您自己的项目,点击Action,启用Github Action功能

      \n
    6. \n
    7. 配置环境变量
      进入Settings->Secret and variables->Actions->New Repository secret
      将上一步的四个值
      ALIYUN_NAME_SPACE,

      \n

      ALIYUN_REGISTRY_USER

      \n

      ALIYUN_REGISTRY_PASSWORD

      \n

      ALIYUN_REGISTRY
      配置成环境变量

      \n
    8. \n
    9. 添加镜像
      打开images.txt文件,添加你想要的镜像 可以加tag

      \n
    10. \n
    11. 使用镜像
      回到阿里云,镜像仓库,点击任意镜像,可查看镜像状态。(可以改成公开,拉取镜像免登录)

      \n
    12. \n
    \n
  8. \n
\n"}],"PostAsset":[],"PostCategory":[{"post_id":"cmgsv0jnk000g3wah2r7mcppb","category_id":"cmgsv0jnj000e3wahedhm0vqs","_id":"cmgsv0jnm000m3wahd881dlbe"},{"post_id":"cmgsv0jnf000b3wahhxw69j7q","category_id":"cmgsv0jnj000e3wahedhm0vqs","_id":"cmgsv0jnn000p3wahh6yka5b6"},{"post_id":"cmgsv0jnk000j3wah4i4q6sze","category_id":"cmgsv0jnj000e3wahedhm0vqs","_id":"cmgsv0jnn000r3wah6usv5wwn"},{"post_id":"cmgsv0jni000d3wah7wwg0zp1","category_id":"cmgsv0jnj000e3wahedhm0vqs","_id":"cmgsv0jnn000t3wah2eufgco3"},{"post_id":"cmgsv0jnj000f3wah7exd6oi7","category_id":"cmgsv0jnj000e3wahedhm0vqs","_id":"cmgsv0jnn000v3wah5k8s24wg"},{"post_id":"cmgsv0jnr00193wah5y5z0hai","category_id":"cmgsv0jns001c3wahdvavch4e","_id":"cmgsv0jnt001f3wahazr38gm9"}],"PostTag":[{"post_id":"cmgsv0jne00093wahhwfe0rze","tag_id":"cmgsv0jng000c3wah04uo0ly0","_id":"cmgsv0jnk000i3wahaakoh3vr"},{"post_id":"cmgsv0jnf000b3wahhxw69j7q","tag_id":"cmgsv0jnk000h3wahf4t8fikb","_id":"cmgsv0jnm000o3wahc24s0n3o"},{"post_id":"cmgsv0jni000d3wah7wwg0zp1","tag_id":"cmgsv0jnm000l3wahdjzb9rco","_id":"cmgsv0jnn000s3wahf0769lak"},{"post_id":"cmgsv0jnj000f3wah7exd6oi7","tag_id":"cmgsv0jnn000q3wah26zl0nas","_id":"cmgsv0jnn000w3wah9nz27iee"},{"post_id":"cmgsv0jnk000g3wah2r7mcppb","tag_id":"cmgsv0jnn000u3wah5mnw75qh","_id":"cmgsv0jnn000y3wahgn1ve8t2"},{"post_id":"cmgsv0jnk000j3wah4i4q6sze","tag_id":"cmgsv0jnn000x3wah68a82gkb","_id":"cmgsv0jnn000z3wahev4o94c8"},{"post_id":"cmgsv0jnq00143wahef4aabqf","tag_id":"cmgsv0jnp00123wah6qikd29b","_id":"cmgsv0jnr00163wah3wmv2ih3"},{"post_id":"cmgsv0jno00103wahaue5bi4q","tag_id":"cmgsv0jnp00123wah6qikd29b","_id":"cmgsv0jnr00183wah4e0tf2gc"},{"post_id":"cmgsv0jnr00173wah6pwef0gc","tag_id":"cmgsv0jnr001a3wah3ikcey8g","_id":"cmgsv0jnt001e3wah0rkrex0d"},{"post_id":"cmgsv0jnr00193wah5y5z0hai","tag_id":"cmgsv0jns001d3wah27su13dc","_id":"cmgsv0jnt001g3wahazx4d7xb"}],"Tag":[{"name":"C C++","_id":"cmgsv0jng000c3wah04uo0ly0"},{"name":"decisiontree","_id":"cmgsv0jnk000h3wahf4t8fikb"},{"name":"ensemble-learning","_id":"cmgsv0jnm000l3wahdjzb9rco"},{"name":"KNN","_id":"cmgsv0jnn000q3wah26zl0nas"},{"name":"linear-regression","_id":"cmgsv0jnn000u3wah5mnw75qh"},{"name":"logistic-regression","_id":"cmgsv0jnn000x3wah68a82gkb"},{"name":"uniapp","_id":"cmgsv0jnp00123wah6qikd29b"},{"name":"网络代理","_id":"cmgsv0jnr001a3wah3ikcey8g"},{"name":"古文观止","_id":"cmgsv0jns001d3wah27su13dc"}]}}