use git in obsidian

This commit is contained in:
2024-09-18 12:50:40 +08:00
commit 7407f9ebec
30 changed files with 50841 additions and 0 deletions

11
.obsidian/app.json vendored Normal file
View File

@@ -0,0 +1,11 @@
{
"showLineNumber": true,
"pdfExportSettings": {
"includeName": true,
"pageSize": "A4",
"landscape": false,
"margin": "0",
"downscalePercent": 100
},
"tabSize": 2
}

5
.obsidian/appearance.json vendored Normal file
View File

@@ -0,0 +1,5 @@
{
"cssTheme": "Vanilla AMOLED",
"accentColor": "",
"theme": "obsidian"
}

3
.obsidian/community-plugins.json vendored Normal file
View File

@@ -0,0 +1,3 @@
[
"obsidian-git"
]

30
.obsidian/core-plugins-migration.json vendored Normal file
View File

@@ -0,0 +1,30 @@
{
"file-explorer": true,
"global-search": true,
"switcher": true,
"graph": true,
"backlink": true,
"canvas": true,
"outgoing-link": true,
"tag-pane": true,
"properties": false,
"page-preview": true,
"daily-notes": true,
"templates": true,
"note-composer": true,
"command-palette": true,
"slash-command": false,
"editor-status": true,
"bookmarks": true,
"markdown-importer": false,
"zk-prefixer": false,
"random-note": false,
"outline": true,
"word-count": true,
"slides": false,
"audio-recorder": false,
"workspaces": false,
"file-recovery": true,
"publish": false,
"sync": false
}

20
.obsidian/core-plugins.json vendored Normal file
View File

@@ -0,0 +1,20 @@
[
"file-explorer",
"global-search",
"switcher",
"graph",
"backlink",
"canvas",
"outgoing-link",
"tag-pane",
"page-preview",
"daily-notes",
"templates",
"note-composer",
"command-palette",
"editor-status",
"bookmarks",
"outline",
"word-count",
"file-recovery"
]

22
.obsidian/graph.json vendored Normal file
View File

@@ -0,0 +1,22 @@
{
"collapse-filter": false,
"search": "",
"showTags": false,
"showAttachments": false,
"hideUnresolved": false,
"showOrphans": true,
"collapse-color-groups": false,
"colorGroups": [],
"collapse-display": false,
"showArrow": false,
"textFadeMultiplier": 0,
"nodeSizeMultiplier": 1,
"lineSizeMultiplier": 1,
"collapse-forces": true,
"centerStrength": 0.518713248970312,
"repelStrength": 10,
"linkStrength": 1,
"linkDistance": 250,
"scale": 1.000000000000001,
"close": true
}

View File

@@ -0,0 +1,56 @@
{
"commitMessage": "vault backup: {{date}}",
"commitDateFormat": "YYYY-MM-DD HH:mm:ss",
"autoSaveInterval": 0,
"autoPushInterval": 0,
"autoPullInterval": 0,
"autoPullOnBoot": false,
"disablePush": false,
"pullBeforePush": true,
"disablePopups": false,
"disablePopupsForNoChanges": false,
"listChangedFilesInMessageBody": false,
"showStatusBar": true,
"updateSubmodules": false,
"syncMethod": "merge",
"customMessageOnAutoBackup": false,
"autoBackupAfterFileChange": false,
"treeStructure": false,
"refreshSourceControl": true,
"basePath": "",
"differentIntervalCommitAndPush": false,
"changedFilesInStatusBar": false,
"showedMobileNotice": true,
"refreshSourceControlTimer": 7000,
"showBranchStatusBar": true,
"setLastSaveToLastCommit": false,
"submoduleRecurseCheckout": false,
"gitDir": "",
"showFileMenu": true,
"authorInHistoryView": "hide",
"dateInHistoryView": false,
"lineAuthor": {
"show": false,
"followMovement": "inactive",
"authorDisplay": "initials",
"showCommitHash": false,
"dateTimeFormatOptions": "date",
"dateTimeFormatCustomString": "YYYY-MM-DD HH:mm",
"dateTimeTimezone": "viewer-local",
"coloringMaxAge": "1y",
"colorNew": {
"r": 255,
"g": 150,
"b": 150
},
"colorOld": {
"r": 120,
"g": 160,
"b": 255
},
"textColorCss": "var(--text-muted)",
"ignoreWhitespace": false,
"gutterSpacingFallbackLength": 5
},
"autoCommitMessage": "vault backup: {{date}}"
}

44951
.obsidian/plugins/obsidian-git/main.js vendored Normal file

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,10 @@
{
"author": "Vinzent",
"authorUrl": "https://github.com/Vinzent03",
"id": "obsidian-git",
"name": "Git",
"description": "Integrate Git version control with automatic backup and other advanced features.",
"isDesktopOnly": false,
"fundingUrl": "https://ko-fi.com/vinzent",
"version": "2.26.0"
}

View File

@@ -0,0 +1,562 @@
@keyframes loading {
0% {
transform: rotate(0deg);
}
100% {
transform: rotate(360deg);
}
}
.workspace-leaf-content[data-type="git-view"] .button-border {
border: 2px solid var(--interactive-accent);
border-radius: var(--radius-s);
}
.workspace-leaf-content[data-type="git-view"] .view-content {
padding: 0;
}
.workspace-leaf-content[data-type="git-history-view"] .view-content {
padding: 0;
}
.loading > svg {
animation: 2s linear infinite loading;
transform-origin: 50% 50%;
display: inline-block;
}
.obsidian-git-center {
margin: auto;
text-align: center;
width: 50%;
}
.obsidian-git-textarea {
display: block;
margin-left: auto;
margin-right: auto;
}
.obsidian-git-center-button {
display: block;
margin: 20px auto;
}
.tooltip.mod-left {
overflow-wrap: break-word;
}
.tooltip.mod-right {
overflow-wrap: break-word;
}
.git-tools {
display: flex;
margin-left: auto;
}
.git-tools .type {
padding-left: var(--size-2-1);
display: flex;
align-items: center;
justify-content: center;
width: 11px;
}
.git-tools .type[data-type="M"] {
color: orange;
}
.git-tools .type[data-type="D"] {
color: red;
}
.git-tools .buttons {
display: flex;
}
.git-tools .buttons > * {
padding: 0 0;
height: auto;
}
.is-active .git-tools .buttons > * {
color: var(--nav-item-color-active);
}
.git-author {
color: var(--text-accent);
}
.git-date {
color: var(--text-accent);
}
.git-ref {
color: var(--text-accent);
}
.workspace-leaf-content[data-type="diff-view"] .d2h-d-none {
display: none;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-wrapper {
text-align: left;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-file-header {
background-color: var(--background-primary);
border-bottom: 1px solid var(--interactive-accent);
font-family: var(--font-monospace);
height: 35px;
padding: 5px 10px;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-file-header,
.workspace-leaf-content[data-type="diff-view"] .d2h-file-stats {
display: -webkit-box;
display: -ms-flexbox;
display: flex;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-file-stats {
font-size: 14px;
margin-left: auto;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-lines-added {
border: 1px solid #b4e2b4;
border-radius: 5px 0 0 5px;
color: #399839;
padding: 2px;
text-align: right;
vertical-align: middle;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-lines-deleted {
border: 1px solid #e9aeae;
border-radius: 0 5px 5px 0;
color: #c33;
margin-left: 1px;
padding: 2px;
text-align: left;
vertical-align: middle;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-file-name-wrapper {
-webkit-box-align: center;
-ms-flex-align: center;
align-items: center;
display: -webkit-box;
display: -ms-flexbox;
display: flex;
font-size: 15px;
width: 100%;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-file-name {
overflow-x: hidden;
text-overflow: ellipsis;
white-space: nowrap;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-file-wrapper {
border: 1px solid var(--background-modifier-border);
border-radius: 3px;
margin-bottom: 1em;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-file-collapse {
-webkit-box-pack: end;
-ms-flex-pack: end;
-webkit-box-align: center;
-ms-flex-align: center;
align-items: center;
border: 1px solid var(--background-modifier-border);
border-radius: 3px;
cursor: pointer;
display: none;
font-size: 12px;
justify-content: flex-end;
padding: 4px 8px;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-file-collapse.d2h-selected {
background-color: #c8e1ff;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-file-collapse-input {
margin: 0 4px 0 0;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-diff-table {
border-collapse: collapse;
font-family: Menlo, Consolas, monospace;
font-size: 13px;
width: 100%;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-files-diff {
width: 100%;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-file-diff {
overflow-y: hidden;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-file-side-diff {
display: inline-block;
margin-bottom: -8px;
margin-right: -4px;
overflow-x: scroll;
overflow-y: hidden;
width: 50%;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-code-line {
padding: 0 8em;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-code-line,
.workspace-leaf-content[data-type="diff-view"] .d2h-code-side-line {
display: inline-block;
-webkit-user-select: none;
-moz-user-select: none;
-ms-user-select: none;
user-select: none;
white-space: nowrap;
width: 100%;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-code-side-line {
padding: 0 4.5em;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-code-line-ctn {
word-wrap: normal;
background: none;
display: inline-block;
padding: 0;
-webkit-user-select: text;
-moz-user-select: text;
-ms-user-select: text;
user-select: text;
vertical-align: middle;
white-space: pre;
width: 100%;
}
.theme-light .workspace-leaf-content[data-type="diff-view"] .d2h-code-line del,
.theme-light
.workspace-leaf-content[data-type="diff-view"]
.d2h-code-side-line
del {
background-color: #ffb6ba;
}
.theme-dark .workspace-leaf-content[data-type="diff-view"] .d2h-code-line del,
.theme-dark
.workspace-leaf-content[data-type="diff-view"]
.d2h-code-side-line
del {
background-color: #8d232881;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-code-line del,
.workspace-leaf-content[data-type="diff-view"] .d2h-code-line ins,
.workspace-leaf-content[data-type="diff-view"] .d2h-code-side-line del,
.workspace-leaf-content[data-type="diff-view"] .d2h-code-side-line ins {
border-radius: 0.2em;
display: inline-block;
margin-top: -1px;
text-decoration: none;
vertical-align: middle;
}
.theme-light .workspace-leaf-content[data-type="diff-view"] .d2h-code-line ins,
.theme-light
.workspace-leaf-content[data-type="diff-view"]
.d2h-code-side-line
ins {
background-color: #97f295;
text-align: left;
}
.theme-dark .workspace-leaf-content[data-type="diff-view"] .d2h-code-line ins,
.theme-dark
.workspace-leaf-content[data-type="diff-view"]
.d2h-code-side-line
ins {
background-color: #1d921996;
text-align: left;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-code-line-prefix {
word-wrap: normal;
background: none;
display: inline;
padding: 0;
white-space: pre;
}
.workspace-leaf-content[data-type="diff-view"] .line-num1 {
float: left;
}
.workspace-leaf-content[data-type="diff-view"] .line-num1,
.workspace-leaf-content[data-type="diff-view"] .line-num2 {
-webkit-box-sizing: border-box;
box-sizing: border-box;
overflow: hidden;
padding: 0 0.5em;
text-overflow: ellipsis;
width: 3.5em;
}
.workspace-leaf-content[data-type="diff-view"] .line-num2 {
float: right;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-code-linenumber {
background-color: var(--background-primary);
border: solid var(--background-modifier-border);
border-width: 0 1px;
-webkit-box-sizing: border-box;
box-sizing: border-box;
color: var(--text-muted);
cursor: pointer;
display: inline-block;
position: absolute;
text-align: right;
width: 7.5em;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-code-linenumber:after {
content: "\200b";
}
.workspace-leaf-content[data-type="diff-view"] .d2h-code-side-linenumber {
background-color: var(--background-primary);
border: solid var(--background-modifier-border);
border-width: 0 1px;
-webkit-box-sizing: border-box;
box-sizing: border-box;
color: var(--text-muted);
cursor: pointer;
display: inline-block;
overflow: hidden;
padding: 0 0.5em;
position: absolute;
text-align: right;
text-overflow: ellipsis;
width: 4em;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-diff-tbody tr {
position: relative;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-code-side-linenumber:after {
content: "\200b";
}
.workspace-leaf-content[data-type="diff-view"] .d2h-code-side-emptyplaceholder,
.workspace-leaf-content[data-type="diff-view"] .d2h-emptyplaceholder {
background-color: var(--background-primary);
border-color: var(--background-modifier-border);
}
.workspace-leaf-content[data-type="diff-view"] .d2h-code-line-prefix,
.workspace-leaf-content[data-type="diff-view"] .d2h-code-linenumber,
.workspace-leaf-content[data-type="diff-view"] .d2h-code-side-linenumber,
.workspace-leaf-content[data-type="diff-view"] .d2h-emptyplaceholder {
-webkit-user-select: none;
-moz-user-select: none;
-ms-user-select: none;
user-select: none;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-code-linenumber,
.workspace-leaf-content[data-type="diff-view"] .d2h-code-side-linenumber {
direction: rtl;
}
.theme-light .workspace-leaf-content[data-type="diff-view"] .d2h-del {
background-color: #fee8e9;
border-color: #e9aeae;
}
.theme-light .workspace-leaf-content[data-type="diff-view"] .d2h-ins {
background-color: #dfd;
border-color: #b4e2b4;
}
.theme-dark .workspace-leaf-content[data-type="diff-view"] .d2h-del {
background-color: #521b1d83;
border-color: #691d1d73;
}
.theme-dark .workspace-leaf-content[data-type="diff-view"] .d2h-ins {
background-color: rgba(30, 71, 30, 0.5);
border-color: #13501381;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-info {
background-color: var(--background-primary);
border-color: var(--background-modifier-border);
color: var(--text-normal);
}
.theme-light
.workspace-leaf-content[data-type="diff-view"]
.d2h-file-diff
.d2h-del.d2h-change {
background-color: #fdf2d0;
}
.theme-dark
.workspace-leaf-content[data-type="diff-view"]
.d2h-file-diff
.d2h-del.d2h-change {
background-color: #55492480;
}
.theme-light
.workspace-leaf-content[data-type="diff-view"]
.d2h-file-diff
.d2h-ins.d2h-change {
background-color: #ded;
}
.theme-dark
.workspace-leaf-content[data-type="diff-view"]
.d2h-file-diff
.d2h-ins.d2h-change {
background-color: rgba(37, 78, 37, 0.418);
}
.workspace-leaf-content[data-type="diff-view"] .d2h-file-list-wrapper {
margin-bottom: 10px;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-file-list-wrapper a {
color: #3572b0;
text-decoration: none;
}
.workspace-leaf-content[data-type="diff-view"]
.d2h-file-list-wrapper
a:visited {
color: #3572b0;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-file-list-header {
text-align: left;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-file-list-title {
font-weight: 700;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-file-list-line {
display: -webkit-box;
display: -ms-flexbox;
display: flex;
text-align: left;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-file-list {
display: block;
list-style: none;
margin: 0;
padding: 0;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-file-list > li {
border-bottom: 1px solid var(--background-modifier-border);
margin: 0;
padding: 5px 10px;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-file-list > li:last-child {
border-bottom: none;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-file-switch {
cursor: pointer;
display: none;
font-size: 10px;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-icon {
fill: currentColor;
margin-right: 10px;
vertical-align: middle;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-deleted {
color: #c33;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-added {
color: #399839;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-changed {
color: #d0b44c;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-moved {
color: #3572b0;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-tag {
background-color: var(--background-primary);
display: -webkit-box;
display: -ms-flexbox;
display: flex;
font-size: 10px;
margin-left: 5px;
padding: 0 2px;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-deleted-tag {
border: 2px solid #c33;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-added-tag {
border: 1px solid #399839;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-changed-tag {
border: 1px solid #d0b44c;
}
.workspace-leaf-content[data-type="diff-view"] .d2h-moved-tag {
border: 1px solid #3572b0;
}
/* ====================== Line Authoring Information ====================== */
.cm-gutterElement.obs-git-blame-gutter {
/* Add background color to spacing inbetween and around the gutter for better aesthetics */
border-width: 0px 2px 0.2px 2px;
border-style: solid;
border-color: var(--background-secondary);
background-color: var(--background-secondary);
}
.cm-gutterElement.obs-git-blame-gutter > div,
.line-author-settings-preview {
/* delegate text color to settings */
color: var(--obs-git-gutter-text);
font-family: monospace;
height: 100%; /* ensure, that age-based background color occupies entire parent */
text-align: right;
padding: 0px 6px 0px 6px;
white-space: pre; /* Keep spaces and do not collapse them. */
}
@media (max-width: 800px) {
/* hide git blame gutter not to superpose text */
.cm-gutterElement.obs-git-blame-gutter {
display: none;
}
}

View File

@@ -0,0 +1,7 @@
{
"name": "GitHub Theme",
"version": "1.1.5",
"minAppVersion": "1.0.0",
"author": "@krios2146",
"authorUrl": "https://github.com/krios2146"
}

1220
.obsidian/themes/GitHub Theme/theme.css vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,8 @@
{
"name": "Minimal",
"version": "7.7.18",
"minAppVersion": "1.6.1",
"author": "@kepano",
"authorUrl": "https://twitter.com/kepano",
"fundingUrl": "https://www.buymeacoffee.com/kepano"
}

2224
.obsidian/themes/Minimal/theme.css vendored Normal file

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,7 @@
{
"name": "Vanilla AMOLED",
"version": "1.0.0",
"minAppVersion": "1.0.0",
"author": "Sakura Akeno Isayeki",
"authorUrl": "https//github.com/SakuraIsayeki"
}

View File

@@ -0,0 +1,25 @@
.theme-dark {
--color-base-00: #000;
--color-base-05: #080808;
--color-base-10: #111;
--color-base-20: #1a1a1a;
--color-base-25: #222;
--color-base-30: #333;
--color-base-35: #363636;
--color-base-40: #4a4a4a;
--color-base-50: #666;
--color-base-60: #999
--color-base-70: #bbb
--color-base-100: #fff;
--file-line-width:1050px;
--background-primary: var(--color-base-00);
--background-secondary: var(--color-base-05);
--titlebar-background: var(--color-base-00);
--titlebar-background-focused: var(--color-base-10);
}
/*
No, seriously, that's as simple as it gets.
- Sakura
*/

186
.obsidian/workspace.json vendored Normal file
View File

@@ -0,0 +1,186 @@
{
"main": {
"id": "823c29674882146a",
"type": "split",
"children": [
{
"id": "61c75dd30ac10b94",
"type": "tabs",
"children": [
{
"id": "9bac209b0fd7c3e1",
"type": "leaf",
"state": {
"type": "markdown",
"state": {
"file": "Hadoop/Zookeeper.md",
"mode": "source",
"source": false
}
}
}
]
}
],
"direction": "vertical"
},
"left": {
"id": "853742ecfb28b64f",
"type": "split",
"children": [
{
"id": "2ef5522b43727d44",
"type": "tabs",
"children": [
{
"id": "c615f431a27fc4e9",
"type": "leaf",
"state": {
"type": "file-explorer",
"state": {
"sortOrder": "byCreatedTimeReverse"
}
}
},
{
"id": "411916e226937ef8",
"type": "leaf",
"state": {
"type": "search",
"state": {
"query": "",
"matchingCase": false,
"explainSearch": false,
"collapseAll": false,
"extraContext": false,
"sortOrder": "alphabetical"
}
}
},
{
"id": "8365acddff8cdb36",
"type": "leaf",
"state": {
"type": "bookmarks",
"state": {}
}
}
]
}
],
"direction": "horizontal",
"width": 200
},
"right": {
"id": "168ca6f162c4ff97",
"type": "split",
"children": [
{
"id": "aa923cd483bdd7db",
"type": "tabs",
"children": [
{
"id": "c2317fc1134b9c42",
"type": "leaf",
"state": {
"type": "backlink",
"state": {
"file": "Hadoop/Zookeeper.md",
"collapseAll": false,
"extraContext": false,
"sortOrder": "alphabetical",
"showSearch": false,
"searchQuery": "",
"backlinkCollapsed": false,
"unlinkedCollapsed": true
}
}
},
{
"id": "9529fbdf92d4654f",
"type": "leaf",
"state": {
"type": "outgoing-link",
"state": {
"file": "Hadoop/Zookeeper.md",
"linksCollapsed": false,
"unlinkedCollapsed": true
}
}
},
{
"id": "9d34002acfbf1c58",
"type": "leaf",
"state": {
"type": "tag",
"state": {
"sortOrder": "frequency",
"useHierarchy": true
}
}
},
{
"id": "78996eb0be188c37",
"type": "leaf",
"state": {
"type": "outline",
"state": {
"file": "Hadoop/Zookeeper.md"
}
}
},
{
"id": "a6359902bf66f6d5",
"type": "leaf",
"state": {
"type": "git-view",
"state": {}
}
}
],
"currentTab": 4
}
],
"direction": "horizontal",
"width": 375.5
},
"left-ribbon": {
"hiddenItems": {
"switcher:打开快速切换": false,
"graph:查看关系图谱": false,
"canvas:新建白板": false,
"daily-notes:打开/创建今天的日记": false,
"templates:插入模板": false,
"command-palette:打开命令面板": false,
"obsidian-git:Open Git source control": false
}
},
"active": "c615f431a27fc4e9",
"lastOpenFiles": [
"2024-09-04.md",
"欢迎.md",
"Spark搭建/01.md",
"Hadoop/HDFS搭建.md",
"HaiNiuHadoop搭建/1.环境安装.md",
"HaiNiuHadoop搭建/2.ZooKeeper配置.md",
"HaiNiuHadoop搭建/未命名.md",
"HaiNiuHadoop搭建/4.Yarn配置.md",
"HaiNiuHadoop搭建/3.HDFS搭建.md",
"Spark搭建/images/workers.png",
"HaiNiuHadoop搭建/images/000001.png",
"Spark搭建/images",
"Spark搭建",
"HaiNiuHadoop搭建/Yarn配置.md",
"HaiNiuHadoop搭建/images/Pasted image 20240910224258.png",
"Hadoop/脚本.md",
"Hadoop/Zookeeper.md",
"Pasted image 20240910224258.png",
"HaiNiuHadoop搭建/images",
"HaiNiuHadoop搭建/新建文件夹",
"HaiNiuHadoop搭建",
"Hadoop",
"mysql.md",
"打开.md",
"未命名.canvas"
]
}

0
2024-09-04.md Normal file
View File

194
Hadoop/HDFS搭建.md Normal file
View File

@@ -0,0 +1,194 @@
#### 相关命令
- hadoop-daemon.sh start journalnode 或 hdfs --daemon start journalnode
- hadoop-daemon.sh start namenode 或 hdfs --daemon start namenode
- hdfs namenode -format 第一个namenode格式化
- hdfs namenode -bootstrapStandby 第二、三个namenode格式化
- hdfs zkfc -formatZK 格式化zkfc ,仅在nn1执行
- hdfs --daemon start zkfc 在 nn1、nn2、nn3执行
>`core-site.xml`
```xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://ns1</value>
<description>默认文件服务的协议和NS逻辑名称和hdfs-site.xml里的对应此配置替代了1.0里的fs.default.name</description>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/data/tmp</value>
<description>数据存储目录</description>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>hadoop</value>
<description>
hdfs dfsadmin refreshSuperUserGroupsConfiguration,
yarn rmadmin refreshSuperUserGroupsConfiguration
使用这两个命令不用重启就能刷新
</description>
</property>
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>localhost</value>
<description>本地代理</description>
</property>
<!-- zkfc的配置 -->
<property>
<name>ha.zookeeper.quorum</name>
<value>nn1:2181,nn2:2181,nn3:2181</value>
<description>HA使用的zookeeper地址</description>
</property>
</configuration>
```
>`hdfs-site.xml`
```xml
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>/data/namenode</value>
<description>namenode本地文件存放地址</description>
</property>
<property>
<name>dfs.nameservices</name>
<value>ns1</value>
<description>提供服务的NS逻辑名称与core-site.xml里的对应</description>
</property>
<!-- namenode的配置 -->
<!-- 主要的 -->
<property>
<name>dfs.ha.namenodes.ns1</name>
<value>nn1,nn2,nn3</value>
<description>列出该逻辑名称下的NameNode逻辑名称</description>
</property>
<property>
<name>dfs.namenode.rpc-address.ns1.nn1</name>
<value>nn1:9000</value>
<description>指定NameNode的RPC位置</description>
</property>
<property>
<name>dfs.namenode.http-address.ns1.nn1</name>
<value>nn1:50070</value>
<description>指定NameNode的Web Server位置</description>
</property>
<property>
<name>dfs.namenode.rpc-address.ns1.nn2</name>
<value>nn2:9000</value>
<description>指定NameNode的RPC位置</description>
</property>
<property>
<name>dfs.namenode.http-address.ns1.nn2</name>
<value>nn2:50070</value>
<description>指定NameNode的Web Server位置</description>
</property>
<property>
<name>dfs.namenode.rpc-address.ns1.nn3</name>
<value>nn3:9000</value>
<description>指定NameNode的RPC位置</description>
</property>
<property>
<name>dfs.namenode.http-address.ns1.nn3</name>
<value>nn3:50070</value>
<description>指定NameNode的Web Server位置</description>
</property>
<property>
<name>dfs.namenode.handler.count</name>
<value>77</value>
<description>namenode的工作线程数</description>
</property>
<!-- journaldata配置,使得其他两个namenode同步第一个namenode数据 -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://nn1:8485;nn2:8485;nn3:8485/ns1</value>
<description>指定用于HA存放edits的共享存储通常是namenode的所在机器</description>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/data/journaldata/</value>
<description>journaldata服务存放文件的地址</description>
</property>
<property>
<name>ipc.client.connect.max.retries</name>
<value>10</value>
<description>namenode和journalnode的链接重试次数10次</description>
</property>
<property>
<name>ipc.client.connect.retry.interval</name>
<value>10000</value>
<description>重试的间隔时间10s</description>
</property>
<!-- zkfc的配置 -->
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
<description>指定HA做隔离的方法缺省是ssh可设为shell稍后详述</description>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/hadoop/.ssh/id_rsa</value>
<description>杀死命令脚本的免密配置秘钥</description>
</property>
<property>
<name>dfs.client.failover.proxy.provider.ns1</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
<description>指定客户端用于HA切换的代理类不同的NS可以用不同的代理类以上示例为Hadoop 2.0自带的缺省代理类</description>
</property>
<property>
<name>dfs.client.failover.proxy.provider.auto-ha</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<!-- datanode配置 -->
<property>
<name>dfs.datanode.data.dir</name>
<value>/data/datanode</value>
<description>datanode本地文件存放地址</description>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
<description>文件复本数</description>
</property>
<property>
<name>dfs.namenode.datanode.registration.ip-hostname-check</name>
<value>false</value>
</property>
<property>
<name>dfs.client.use.datanode.hostname</name>
<value>true</value>
</property>
<property>
<name>dfs.datanode.use.datanode.hostname</name>
<value>true</value>
</property>
</configuration>
```

32
Hadoop/Zookeeper.md Normal file
View File

@@ -0,0 +1,32 @@
> **选举机制**
ZooKeeper 不是一个传统意义上的文件系统。它是一个分布式协调服务,主要用于分布式系统中解决数据一致性、分布式锁管理、集群配置管理等问题。
- 过半选举
>linux的前台和后台任务
- `&` 后台运行
```bash
./f.sh &
```
`nohup` 将结果存放到 日志文件,而不是在控制台输出
```bash
nohup ./f.sh &
```
`2&>1` 将stdin、stdout、stderror输出到 `/tmp/log.txt`
```bash
nohup ./f.sh >> /tmp/log.txt 2>&1
```
`/dev/null` 将结果忽略
```bash
nohup ./f.sh >> /dev/null 2>&1
```
程序运行的`stdin、stdout、stderror`查看
先使用`jps`查看zookeeper的进程ID
然后`cd /proc/进程ID/fd` 即可
>相关命令
- `ssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh start`
- `ssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh status`
- `ssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh stop`
- `zkCli.sh start`

27
Hadoop/脚本.md Normal file
View File

@@ -0,0 +1,27 @@
### 集群启动脚本
> 也可以直接使用 `start-dfs.sh` ,需要先启动`zookeeper`
```bash
ssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh start
ssh_all_zookeeper.sh hdfs --daemon start journalnode
ssh_all_zookeeper.sh hdfs --daemon start namenode
ssh_all_zookeeper.sh hdfs --daemon start zkfc
ssh hadoop@s1 hdfs --daemon start datanode
ssh hadoop@s2 hdfs --daemon start datanode
ssh hadoop@s3 hdfs --daemon start datanode
```
### 集群停止脚本
> 也可以直接使用 `stop-dfs.sh`
```bash
ssh_all_zookeeper.sh hdfs --daemon stop namenode
ssh hadoop@s1 hdfs --daemon stop datanode
ssh hadoop@s2 hdfs --daemon stop datanode
ssh hadoop@s3 hdfs --daemon stop datanode
ssh_all_zookeeper.sh hdfs --daemon stop journalnode
ssh_all_zookeeper.sh hdfs --daemon stop zkfc
ssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh start
```
集群搭建注意事项
- `/data`这个目录六台机器均需要创建,且将所有者、所属组改为`hadoop`
-

View File

@@ -0,0 +1,253 @@
### 防火墙关闭
```bash
# 在 6 台主机执行
sudo systemctl stop firewalld
sudo systemctl disable firewalld
```
### 配置yum源
- 下载 repo 文件:
[Centos-7.repo](http://mirrors.aliyun.com/repo/Centos-7.repo)
并上传到`/tmp`,进入到`/tmp`
- 备份并且替换系统的repo文件
``` bash
cp Centos-7.repo /etc/yum.repos.d/
cd /etc/yum.repos.d/
mv CentOS-Base.repo CentOS-Base.repo.bak
mv Centos-7.repo CentOS-Base.repo
```
- 将`nn1`上的`CentOS-Base.repo`拷贝到其他主机
```bash
scp /etc/yum.repos.d/CentOS-Base.repo root@nn2:/etc/yum.repos.d
scp /etc/yum.repos.d/CentOS-Base.repo root@nn3:/etc/yum.repos.d
scp /etc/yum.repos.d/CentOS-Base.repo root@s1:/etc/yum.repos.d
scp /etc/yum.repos.d/CentOS-Base.repo root@s2:/etc/yum.repos.d
scp /etc/yum.repos.d/CentOS-Base.repo root@s3:/etc/yum.repos.d
```
- 执行yum源更新命令
```bash
yum clean all
yum makecache
yum update -y
```
- 安装常用软件
```bash
yum install -y openssh-server vim gcc gcc-c++ glibc-headers bzip2-devel lzo-devel curl wget openssh-clients zlib-devel autoconf automake cmake libtool openssl-devel fuse-devel snappy-devel telnet unzip zip net-tools.x86_64 firewalld systemd ntp unrar bzip2
```
### JDK安装
>注意需要在六台机器依次执行
- 上传到`/tmp`目录下,安装
```bash
cd /tmp
rpm -ivh jdk-8u144-linux-x64.rpm
```
- 配置环境变量
```bash
ln -s /usr/java/jdk1.8.0_144/ /usr/java/jdk1.8
echo 'export JAVA_HOME=/usr/java/jdk1.8' >> /etc/profile.d/myEnv.sh
echo 'export PATH=$PATH:$JAVA_HOME/bin' >> /etc/profile.d/myEnv.sh
source /etc/profile
java -version
```
### 修改主机名和主机名映射
```bash
vim /etc/hostname
```
6台机器分别为nn1、nn2、nn3、s1、s2、s3
```bash
vim /etc/hosts
```
修改为
```text
192.168.1.30 nn1
192.168.1.31 nn2
192.168.1.32 nn3
192.168.1.33 s1
192.168.1.34 s2
192.168.1.35 s3
```
### 创建hadoop用户
```bash
#创建hadoop用户
useradd hadoop
#给hadoop用户设置密码: 12345678
passwd hadoop
```
### 禁止非 wheel 组用户切换到root配置免密切换root
- 修改/etc/pam.d/su配置
```bash
sed -i 's/#auth\t\trequired\tpam_wheel.so/auth\t\trequired\tpam_wheel.so/g' '/etc/pam.d/su'
sed -i 's/#auth\t\tsufficient\tpam_wheel.so/auth\t\tsufficient\tpam_wheel.so/g' '/etc/pam.d/su'
```
- 修改/etc/login.defs文件
```bash
echo "SU_WHEEL_ONLY yes" >> /etc/login.defs
```
- 添加用户到管理员禁止普通用户su 到 root
```bash
#把hadoop用户加到wheel组里
gpasswd -a hadoop wheel
#查看wheel组里是否有hadoop用户
cat /etc/group | grep wheel
```
### 给hadoop用户配置SSH密钥
#### 配置hadoop用户ssh免密码登录到hadoop
- 仅在`nn1`执行这段脚本命令即可
但是 `su - hadoop ` ,` mkdir ~/.ssh` 需要在其他主机执行一下
```bash
#切换到hadoop用户
su - hadoop
#生成ssh公私钥
ssh-keygen -t rsa -f ~/.ssh/id_rsa -P ''
ssh-copy-id nn1
ssh-copy-id nn2
ssh-copy-id nn3
ssh-copy-id s1
ssh-copy-id s2
ssh-copy-id s3
scp /home/hadoop/.ssh/id_rsa hadoop@nn2:/home/hadoop/.ssh
scp /home/hadoop/.ssh/id_rsa hadoop@nn3:/home/hadoop/.ssh
scp /home/hadoop/.ssh/id_rsa hadoop@s1:/home/hadoop/.ssh
scp /home/hadoop/.ssh/id_rsa hadoop@s2:/home/hadoop/.ssh
scp /home/hadoop/.ssh/id_rsa hadoop@s3:/home/hadoop/.ssh
```
#### 配置hadoop用户ssh免密码登录到root
```bash
ssh-copy-id root@nn1
ssh-copy-id root@ nn2
ssh-copy-id root@nn3
ssh-copy-id root@s1
ssh-copy-id root@s2
ssh-copy-id root@s3
scp /home/hadoop/.ssh/id_rsa root@nn2:/root/.ssh
scp /home/hadoop/.ssh/id_rsa root@nn3:/root/.ssh
scp /home/hadoop/.ssh/id_rsa root@s1:/root/.ssh
scp /home/hadoop/.ssh/id_rsa root@s2:/root/.ssh
scp /home/hadoop/.ssh/id_rsa root@s3:/root/.ssh
```
### 脚本配置
- **ips**
```bash
vim /home/hadoop/bin/ips
```
```bash
nn1
nn2
nn3
s1
s2
s3
```
- **ssh_all.sh**
```bash
vim /home/hadoop/bin/ssh_all.sh
```
```bash
#! /bin/bash
# 进入到当前脚本所在目录
cd `dirname $0`
# 获取当前脚本所在目录
dir_path=`pwd`
#echo $dir_path
# 读ips文件得到数组(里面是一堆主机名)
ip_arr=(`cat $dir_path/ips`)
# 遍历数组里的主机名
for ip in ${ip_arr[*]}
do
# 拼接ssh命令: ssh hadoop@nn1.hadoop ls
cmd_="ssh hadoop@${ip} \"$*\" "
echo $cmd_
# 通过eval命令 执行 拼接的ssh 命令
if eval ${cmd_} ; then
echo "OK"
else
echo "FAIL"
fi
done
```
- **ssh_root.sh**
```bash
#! /bin/bash
# 进入到当前脚本所在目录
cd `dirname $0`
# 获取当前脚本所在目录
dir_path=`pwd`
#echo $dir_path
# 读ips文件得到数组(里面是一堆主机名)
ip_arr=(`cat $dir_path/ips`)
# 遍历数组里的主机名
for ip in ${ip_arr[*]}
do
# 拼接ssh命令: ssh hadoop@nn1.hadoop ls
cmd_="ssh hadoop@${ip} ~/bin/exe.sh \"$*\""
echo $cmd_
# 通过eval命令 执行 拼接的ssh 命令
if eval ${cmd_} ; then
echo "OK"
else
echo "FAIL"
fi
done
```
- **scp_all.sh**
```bash
#! /bin/bash
# 进入到当前脚本所在目录
cd `dirname $0`
# 获取当前脚本所在目录
dir_path=`pwd`
#echo $dir_path
# 读ips文件得到数组(里面是一堆主机名)
ip_arr=(`cat $dir_path/ips`)
# 源
source_=$1
# 目标
target=$2
# 遍历数组里的主机名
for ip in ${ip_arr[*]}
do
# 拼接scp命令: scp 源 hadoop@nn1.hadoop:目标
cmd_="scp -r ${source_} hadoop@${ip}:${target}"
echo $cmd_
# 通过eval命令 执行 拼接的scp 命令
if eval ${cmd_} ; then
echo "OK"
else
echo "FAIL"
fi
done
```
- **exe.sh**
```bash
#切换到root用户执行cmd命令
cmd=$*
su - << EOF
$cmd
EOF
```
- 赋予执行权限
```bash
chmod +x ssh_all.sh
chmod +x scp_all.sh
chmod +x ssh_root.sh
chmod +x exe.sh
```
- 分发到其他主机
```bash
./ssh_all.sh mkdir /home/hadoop/bin
./scp_all.sh /home/hadoop/bin/ips /home/hadoop/bin/
./scp_all.sh /home/hadoop/bin/exe.sh /home/hadoop/bin/
./scp_all.sh /home/hadoop/bin/ssh_all.sh /home/hadoop/bin/
./scp_all.sh /home/hadoop/bin/scp_all.sh /home/hadoop/bin/
./scp_all.sh /home/hadoop/bin/ssh_root.sh /home/hadoop/bin/
```
- 将 `/home/hadoop/bin`添加到hadoop用户的环境变量需要切换到`hadoop`用户
```bash
echo 'export PATH=$PATH:/home/hadoop/bin' >> ~/.bashrc && source ~/.bashrc
scp_all.sh /home/hadoop/.bashrc /home/hadoop/
source ~/.bashrc # 在所有主机节点执行
```

View File

@@ -0,0 +1,106 @@
### Zookeeper脚本配置
- 拷贝ips
```bash
cp ips ips_zookeeper
```
修改为
```bash
nn1
nn2
nn3
```
- 拷贝三个脚本
```bash
cp scp_all.sh scp_all_zookeeper.sh
cp ssh_all.sh ssh_all_zookeeper.sh
cp ssh_root.sh ssh_root_zookeeper.sh
```
- 修改脚本
```shell
vim scp_all_zookeeper.sh
vim ssh_all_zookeeper.sh
vim ssh_root_zookeeper.sh
```
将三个脚本中的ips改为ips_zookeeper
### Zookeeper安装
- 上传到`/tmp`目录下,解压
```bash
sudo tar -zxvf /tmp/zookeeper-3.4.8.tar.gz -C /usr/local/
scp -r /usr/local/zookeeper-3.4.8/ root@nn2:/usr/local/
scp -r /usr/local/zookeeper-3.4.8/ root@nn3:/usr/local/
ssh_root_zookeeper.sh chown -R hadoop:hadoop /usr/local/zookeeper-3.4.8
ssh_root_zookeeper.s ln -s /usr/local/zookeeper-3.4.8/ /usr/local/zookeeper
```
### Zookeeper配置
- zoo.cfg配置
```bash
cd /usr/local/zookeeper/conf/
cp zoo_sample.cfg zoo.cfg
```
然后`vim zoo.cfg`,修改如下:
```properties
# 修改dataDir
dataDir=/data/zookeeper
# 添加一下内容
server.1=nn1:2888:3888
server.2=nn2:2888:3888
server.3=nn3:2888:3888
```
分发给nn2、nn3
`scp_all_zookeeper.sh /usr/local/zookeeper/conf/zoo.cfg /usr/local/zookeeper/conf/`
- `zkEnv.sh`配置
`vim /usr/local/zookeeper/bin/zkEnv.sh`
![[images/000001.png]]
分发到nn2、nn3
```bash
scp_all_zookeeper.sh /usr/local/zookeeper/bin/zkEnv.sh /usr/local/zookeeper/bin/
```
- 创建zookeeper数据目录
```bash
ssh_root_zookeeper.sh mkdir -p /data/zookeeper
ssh_root_zookeeper.sh chown -R hadoop:hadoop /data
```
- 创建myid文件
```bash
ssh nn1 'echo "1" > /data/zookeeper/myid'
ssh nn2 'echo "2" > /data/zookeeper/myid'
ssh nn3 'echo "3" > /data/zookeeper/myid'
```
- 配置Zookeeper环境变量
```bash
# 在其他所有主机也执行
sudo chown -R hadoop:hadoop /etc/profile.d/myEnv.sh
```
```bash
echo 'export ZOOKEEPER_HOME=/usr/local/zookeeper' >> /etc/profile.d/myEnv.sh
echo 'export PATH=$PATH:$ZOOKEEPER_HOME/bin' >> /etc/profile.d/myEnv.sh
```
```bash
# 分发到nn2、nn3
scp_all_zookeeper.sh /etc/profile.d/myEnv.sh /etc/profile.d/
# source 环境变量
ssh_all_zookeeper.sh source /etc/profile
```
### Zookeeper的命令
```bash
#启动zk服务
ssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh start
#查看每个机器ZK运行的状态
ssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh status
#整体停止服务
ssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh stop
#重启zk服务
ssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh restart
```
```shell
#启动zookeeper客户端并连接zookeeper集群
/usr/local/zookeeper/bin/zkCli.sh -server nn1:2181
# 可以简化为:
zkCli.sh
```

View File

@@ -0,0 +1,286 @@
### 上传`hadoop-3.1.4.tar.gz`到`/tmp`,解压
>注意在六台机器均上传到`/tmp`
```bash
# 在6台机器执行
sudo tar -zxvf /tmp/hadoop-3.1.4.tar.gz -C /usr/local/
# 分发到其他主机
ssh_root.sh chown -R hadoop:hadoop /usr/local/hadoop-3.1.4
ssh_root.sh ln -s /usr/local/hadoop-3.1.4/ /usr/local/hadoop
```
### 配置环境变量
```bash
echo 'export HADOOP_HOME=/usr/local/hadoop' >> /etc/profile.d/myEnv.sh
echo 'export PATH=$PATH:$HADOOP_HOME/bin' >> /etc/profile.d/myEnv.sh
echo 'export PATH=$PATH:$HADOOP_HOME/sbin' >> /etc/profile.d/myEnv.sh
```
```bash
# 分发到nn2、nn3、s1、s2、s3
scp_all.sh /etc/profile.d/myEnv.sh /etc/profile.d/
# source 环境变量
ssh_root.sh source /etc/profile
```
>还需要创建 `/data`这个目录由于nn1、nn2、nn3已经创建`/data`,其他三台需要创建一下
```bash
### 在s1、s2、s3执行
sudo mkdir /data
sudo chown -R hadoop:hadoop /data
```
### 修改core-site.xml
```bash
vim /usr/local/hadoop/etc/hadoop/core-site.xml
```
```xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://ns1</value>
<description>默认文件服务的协议和NS逻辑名称和hdfs-site.xml里的对应此配置替代了1.0里的fs.default.name</description>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/data/tmp</value>
<description>数据存储目录</description>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>hadoop</value>
<description>
hdfs dfsadmin refreshSuperUserGroupsConfiguration,
yarn rmadmin refreshSuperUserGroupsConfiguration
使用这两个命令不用重启就能刷新
</description>
</property>
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>localhost</value>
<description>本地代理</description>
</property>
<!-- zkfc的配置 -->
<property>
<name>ha.zookeeper.quorum</name>
<value>nn1:2181,nn2:2181,nn3:2181</value>
<description>HA使用的zookeeper地址</description>
</property>
</configuration>
```
### 修改`hdfs-site.xml`
```bash
vim /usr/local/hadoop/etc/hadoop/hdfs-site.xml
```
```xml
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>/data/namenode</value>
<description>namenode本地文件存放地址</description>
</property>
<property>
<name>dfs.nameservices</name>
<value>ns1</value>
<description>提供服务的NS逻辑名称与core-site.xml里的对应</description>
</property>
<!-- namenode的配置 -->
<!-- 主要的 -->
<property>
<name>dfs.ha.namenodes.ns1</name>
<value>nn1,nn2,nn3</value>
<description>列出该逻辑名称下的NameNode逻辑名称</description>
</property>
<property>
<name>dfs.namenode.rpc-address.ns1.nn1</name>
<value>nn1:9000</value>
<description>指定NameNode的RPC位置</description>
</property>
<property>
<name>dfs.namenode.http-address.ns1.nn1</name>
<value>nn1:50070</value>
<description>指定NameNode的Web Server位置</description>
</property>
<property>
<name>dfs.namenode.rpc-address.ns1.nn2</name>
<value>nn2:9000</value>
<description>指定NameNode的RPC位置</description>
</property>
<property>
<name>dfs.namenode.http-address.ns1.nn2</name>
<value>nn2:50070</value>
<description>指定NameNode的Web Server位置</description>
</property>
<property>
<name>dfs.namenode.rpc-address.ns1.nn3</name>
<value>nn3:9000</value>
<description>指定NameNode的RPC位置</description>
</property>
<property>
<name>dfs.namenode.http-address.ns1.nn3</name>
<value>nn3:50070</value>
<description>指定NameNode的Web Server位置</description>
</property>
<property>
<name>dfs.namenode.handler.count</name>
<value>77</value>
<description>namenode的工作线程数</description>
</property>
<!-- journaldata配置,使得其他两个namenode同步第一个namenode数据 -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://nn1:8485;nn2:8485;nn3:8485/ns1</value>
<description>指定用于HA存放edits的共享存储通常是namenode的所在机器</description>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/data/journaldata/</value>
<description>journaldata服务存放文件的地址</description>
</property>
<property>
<name>ipc.client.connect.max.retries</name>
<value>10</value>
<description>namenode和journalnode的链接重试次数10次</description>
</property>
<property>
<name>ipc.client.connect.retry.interval</name>
<value>10000</value>
<description>重试的间隔时间10s</description>
</property>
<!-- zkfc的配置 -->
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
<description>指定HA做隔离的方法缺省是ssh可设为shell稍后详述</description>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/hadoop/.ssh/id_rsa</value>
<description>杀死命令脚本的免密配置秘钥</description>
</property>
<property>
<name>dfs.client.failover.proxy.provider.ns1</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
<description>指定客户端用于HA切换的代理类不同的NS可以用不同的代理类以上示例为Hadoop 2.0自带的缺省代理类</description>
</property>
<property>
<name>dfs.client.failover.proxy.provider.auto-ha</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<!-- datanode配置 -->
<property>
<name>dfs.datanode.data.dir</name>
<value>/data/datanode</value>
<description>datanode本地文件存放地址</description>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
<description>文件复本数</description>
</property>
<property>
<name>dfs.namenode.datanode.registration.ip-hostname-check</name>
<value>false</value>
</property>
<property>
<name>dfs.client.use.datanode.hostname</name>
<value>true</value>
</property>
<property>
<name>dfs.datanode.use.datanode.hostname</name>
<value>true</value>
</property>
</configuration>
```
### 修改`hadoop-env.sh`
```bash
vim /usr/local/hadoop/etc/hadoop/hadoop-env.sh
```
```bash
# 添加这两行
source /etc/profile
export HADOOP_HEAPSIZE_MAX=512
```
### 分发这些配置文件
```bash
scp_all.sh /usr/local/hadoop/etc/hadoop/core-site.xml /usr/local/hadoop/etc/hadoop/
scp_all.sh /usr/local/hadoop/etc/hadoop/hdfs-site.xml /usr/local/hadoop/etc/hadoop/
scp_all.sh /usr/local/hadoop/etc/hadoop/hadoop-env.sh /usr/local/hadoop/etc/hadoop/
```
### 集群初始化
- 需要先启动zookeeper集群
```bash
ssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh start
```
```bash
# 第一次启动先启动journalnode便于3个namenode的元数据同步
ssh_all_zookeeper.sh hadoop-daemon.sh start journalnode
```
- `zkfc`搭建
```bash
#在nn1节点执行
hdfs zkfc -formatZK
#nn1 nn2 nn3启动zkfc
hadoop-daemon.sh start zkfc
```
- 初始化nn1的namenode在nn1执行
```bash
hdfs namenode -format
hadoop-daemon.sh start namenode
```
- 格式化第二台和第三台namenode并且启动namenode,在nn2、nn3执行
```bash
hdfs namenode -bootstrapStandby
hadoop-daemon.sh start namenode
```
- 修改**workers**
```bash
vim /usr/local/hadoop/etc/hadoop/workers
```
修改为
```text
s1
s2
s3
```
分发给其他机器
```bash
scp_all.sh /usr/local/hadoop/etc/hadoop/workers /usr/local/hadoop/etc/hadoop
```
- 启动datanode节点在s1、s2、s3执行
```bash
#启动各个节点的datanode仅在一台机器执行即可nn1或其他任意机器
hadoop-daemons.sh start datanode
```
### hdfs集群启动
```bash
start-dfs.sh
```

View File

@@ -0,0 +1,482 @@
- `yarn.env.sh`
```bash
# 添加如下内容:
source /etc/profile
JAVA=$JAVA_HOME/bin/java
JAVA_HEAP_MAX=-Xmx256m
YARN_HEAPSIZE=256 export YARN_RESOURCEMANAGER_HEAPSIZE=256
```
```bash
# 分发 yarn.env.sh 到其他主机
scp_all.sh /usr/local/hadoop/etc/hadoop/yarn-env.sh /usr/local/hadoop/etc/hadoop/
```
- `yarn-site.xml`
```xml
<configuration>
<!-- RM1 configs start -->
<property>
<name>yarn.resourcemanager.address.rm1</name>
<value>nn1:8032</value>
<description>ResourceManager 对客户端暴露的地址。客户端通过该地址向RM提交应用程序杀死应用程序等</description>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>nn1</value>
<description>ResourceManager主机名</description>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address.rm1</name>
<value>nn1:8030</value>
<description>ResourceManager 对ApplicationMaster暴露的访问地址。ApplicationMaster通过该地址向RM申请资源、释放资源等。</description>
</property>
<property>
<name>yarn.resourcemanager.webapp.https.address.rm1</name>
<value>nn1:8089</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm1</name>
<value>nn1:8088</value>
<description>ResourceManager对外web ui地址。用户可通过该地址在浏览器中查看集群各类信息。</description>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm1</name>
<value>nn1:8031</value>
<description>ResourceManager 对NodeManager暴露的地址.。NodeManager通过该地址向RM汇报心跳领取任务等。</description>
</property>
<property>
<name>yarn.resourcemanager.admin.address.rm1</name>
<value>nn1:8033</value>
<description>ResourceManager 对管理员暴露的访问地址。管理员通过该地址向RM发送管理命令等</description>
</property>
<!-- RM1 configs end -->
<!-- RM2 configs start -->
<property>
<name>yarn.resourcemanager.address.rm2</name>
<value>nn2:8032</value>
<description>ResourceManager 对客户端暴露的地址。客户端通过该地址向RM提交应用程序杀死应用程序等</description>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>nn2</value>
<description>ResourceManager主机名</description>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address.rm2</name>
<value>nn2:8030</value>
<description>ResourceManager 对ApplicationMaster暴露的访问地址。ApplicationMaster通过该地址向RM申请资源、释放资>源等。</description>
</property>
<property>
<name>yarn.resourcemanager.webapp.https.address.rm2</name>
<value>nn2:8089</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>nn2:8088</value>
<description>ResourceManager对外web ui地址。用户可通过该地址在浏览器中查看集群各类信息。</description>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm2</name>
<value>nn2:8031</value>
<description>ResourceManager 对NodeManager暴露的地址.。NodeManager通过该地址向RM汇报心跳领取任务等。</description>
</property>
<property>
<name>yarn.resourcemanager.admin.address.rm2</name>
<value>nn2:8033</value>
<description>ResourceManager 对管理员暴露的访问地址。管理员通过该地址向RM发送管理命令等</description>
</property>
<!-- RM2 configs end -->
<!-- RM3 configs start -->
<property>
<name>yarn.resourcemanager.address.rm3</name>
<value>nn3:8032</value>
<description>ResourceManager 对客户端暴露的地址。客户端通过该地址向RM提交应用程序杀死应用程序等</description>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm3</name>
<value>nn3</value>
<description>ResourceManager主机名</description>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address.rm3</name>
<value>nn3:8030</value>
<description>ResourceManager 对ApplicationMaster暴露的访问地址。ApplicationMaster通过该地址向RM申请资源、释放资源等。</description>
</property>
<property>
<name>yarn.resourcemanager.webapp.https.address.rm3</name>
<value>nn3:8089</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm3</name>
<value>nn3:8088</value>
<description>ResourceManager对外web ui地址。用户可通过该地址在浏览器中查看集群各类信息。</description>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm3</name>
<value>nn3:8031</value>
<description>ResourceManager 对NodeManager暴露的地址.。NodeManager通过该地址向RM汇报心跳领取任务等。</description>
</property>
<property>
<name>yarn.resourcemanager.admin.address.rm3</name>
<value>nn3:8033</value>
<description>ResourceManager 对管理员暴露的访问地址。管理员通过该地址向RM发送管理命令等</description>
</property>
<!-- yarn ha start -->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
<description>是否开启yarn ha</description>
</property>
<property>
<name>yarn.resourcemanager.ha.automatic-failover.embedded</name>
<value>true</value>
<description>ha状态切换为自动切换</description>
</property>
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2,rm3</value>
<description>RMs的逻辑id列表</description>
</property>
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>nn1:2181,nn2:2181,nn3:2181</value>
<description>ha状态的存储地址</description>
</property>
<!-- yarn ha end -->
<!-- 元数据存储共享 start -->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>pseudo-yarn-rm-cluster</value>
<description>集群的Id</description>
</property>
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
<description>默认值为false也就是说resourcemanager挂了相应的正在运行的任务在rm恢复后不能重新启动</description>
</property>
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
<description>配置RM状态信息存储方式3有两种一种是FileSystemRMStateStore,另一种是MemoryRMStateStore还有一种目前较为主流的是zkstore</description>
</property>
<property>
<name>yarn.resourcemanager.zk.state-store.address</name>
<value>nn1:2181,nn2:2181,nn3:2181</value>
<description>当使用ZK存储时指定在ZK上的存储地址。</description>
</property>
<!-- 元数据存储共享 end-->
<!-- nodeManager基础配置 start-->
<property>
<name>yarn.nodemanager.local-dirs</name>
<value>/data/yarn/local</value>
<description>中间结果存放位置存放执行Container所需的数据如可执行程序或jar包配置文件等和运行过程中产生的临时数据</description>
</property>
<property>
<name>yarn.nodemanager.log-dirs</name>
<value>/data/yarn/logs</value>
<description>Container运行日志存放地址可配置多个目录</description>
</property>
<property>
<name>yarn.nodemanager.address</name>
<value>0.0.0.0:9103</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
<description>NodeManager上运行的附属服务。需配置成mapreduce_shuffle才可运行MapReduce程序</description>
</property>
<property>
<name>yarn.nodemanager.webapp.address</name>
<value>0.0.0.0:8042</value>
</property>
<property>
<name>yarn.nodemanager.localizer.address</name>
<value>0.0.0.0:8040</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>mapreduce.shuffle.port</name>
<value>23080</value>
</property>
<!-- nodeManager基础配置 end-->
<!-- nodeMananger资源限定 start -->
<property>
<name>yarn.scheduler.minimum-allocation-vcores</name>
<value>1</value>
<description>单个任务可申请的最小虚拟CPU个数</description>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-vcores</name>
<value>3</value>
<description>单个任务可申请的最大虚拟CPU个数此参数对应yarn.nodemanager.resource.cpu-vcores建议最大为一个物理CPU的数量</description>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>1536</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>1024</value>
<description>单个任务可申请的最多物理内存量</description>
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>3</value>
<description>该节点上YARN可使用的虚拟CPU个数一个物理CPU对应3个虚拟CPU</description>
</property>
<!-- 关闭内存检测 start -->
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
<description>虚拟内存检测默认是True</description>
</property>
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
<description>物理内存检测默认是True</description>
</property>
<!-- 关闭内存检测 end -->
<property>
<name>yarn.application.classpath</name>
<value>$HADOOP_CONF_DIR,
$HADOOP_COMMON_HOME/share/hadoop/common/*,
$HADOOP_COMMON_HOME/share/hadoop/common/lib/*,
$HADOOP_COMMON_HOME/share/hadoop/hdfs/*,
$HADOOP_COMMON_HOME/share/hadoop/hdfs/lib/*,
$HADOOP_COMMON_HOME/share/hadoop/mapreduce/*,
$HADOOP_COMMON_HOME/share/hadoop/mapreduce/lib/*,
$HADOOP_COMMON_HOME/share/hadoop/yarn/*,
$HADOOP_COMMON_HOME/share/hadoop/yarn/lib/*</value>
</property>
<!-- nodeMananger资源限定 end-->
<!-- 容量调度器的优先级
0 - 5 , 0是默认值 5 优先级最高
start
-->
<property>
<name>yarn.cluster.max-application-priority</name>
<value>5</value>
</property>
<!-- 容量调度器的优先级
0 - 5 , 0是默认值 5 优先级最高
end
-->
<!--在yarn-site.xml中配置执行的任务文件应该上传到/user的用户目录下 start -->
<property>
<name>yarn.app.mapreduce.am.staging-dir</name>
<value>/user</value>
</property>
<property>
<name>yarn.app.mapreduce.am.scheduler.connection.wait.interval-ms</name>
<value>5000</value>
</property>
<!-- end -->
<!-- 日志聚集功能开启 start-->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
<description>是否启用日志聚集功能</description>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>/tmp/app-logs</value>
<description>当应用程序运行结束后日志被转移到的HDFS目录启用日志聚集功能时有效</description>
</property>
<!--目录相关 end -->
<!-- 其它 -->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>1209600</value>
<description>nodemanager上所有Container的运行日志在HDFS中的保存时间保留半个月</description>
</property>
<!-- 日志聚集功能开启end -->
</configuration>
```
```bash
# 分发 yarn-site.xml 到其他主机
scp_all.sh /usr/local/hadoop/etc/hadoop/yarn-site.xml /usr/local/hadoop/etc/hadoop/
```
`mapred-site.xml`
```xml
<configuration>
<!--运行模式 -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
<description>运行模式</description>
</property>
<!--运行模式 -->
<!--资源限定 -->
<property>
<name>yarn.app.mapreduce.am.resource.mb</name>
<value>1024</value>
<description>MR ApplicationMaster yarn申请的内存量</description>
</property>
<property>
<name>yarn.app.mapreduce.am.command-opts</name>
<value>-Xmx768m</value>
<description>jvm使用内存</description>
</property>
<property>
<name>mapreduce.map.memory.mb</name>
<value>1024</value>
<description>每个Map Task yarn申请内存</description>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>1024</value>
<description>每个Reduce Task yarn申请内存</description>
</property>
<property>
<name>yarn.app.mapreduce.am.resource.cpu-vcores</name>
<value>1</value>
<description>MR ApplicationMaster占用的虚拟CPU个数此参数对应yarn.nodemanager.resource.cpu-vcores建议最大为一个物理CPU的数量</description>
</property>
<property>
<name>mapreduce.reduce.java.opts</name>
<value>-Xmx768m</value>
<description>reduce jvm实际内存</description>
</property>
<property>
<name>mapreduce.map.java.opts</name>
<value>-Xmx768m</value>
<description>map jvm实际内存</description>
</property>
<property>
<name>mapreduce.map.cpu.vcores</name>
<value>1</value>
<description>每个map Task需要的虚拟cpu数</description>
</property>
<property>
<name>mapreduce.reduce.cpu.vcores</name>
<value>1</value>
<description>每个Reduce Task需要的虚拟cpu数</description>
</property>
<property>
<name>mapreduce.application.classpath</name>
<value>/usr/local/hadoop/etc/hadoop,/usr/local/hadoop/share/hadoop/common/*,/usr/local/hadoop/share/hadoop/common/lib/*,/usr/local/hadoop/share/hadoop/hdfs/*,/usr/local/hadoop/share/hadoop/hdfs/lib/*,/usr/local/hadoop/share/hadoop/mapreduce/*,/usr/local/hadoop/share/hadoop/mapreduce/lib/*,/usr/local/hadoop/share/hadoop/yarn/*,/usr/local/hadoop/share/hadoop/yarn/lib/*,/usr/local/hadoop/lib/*,/usr/local/hbase/lib/*</value>
<description>运行mr程序所使用的虚拟机运行时的classpath</description>
</property>
<!-- 在nn1 开启历史服务器 start-->
<property>
<name>mapreduce.jobhistory.address</name>
<value>nn1:10020</value>
<description>MapReduce JobHistory Server地址</description>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>nn1:19888</value>
<description>MapReduce JobHistory Server Web UI地址</description>
</property>
<property>
<name>mapreduce.jobhistory.intermediate-done-dir</name>
<value>/data/mapred/tmp</value>
<description>MapReduce作业产生的日志存放位置</description>
</property>
<property>
<name>mapreduce.jobhistory.done-dir</name>
<value>/data/mapred/done</value>
<description>MR JobHistory Server管理的日志的存放位置</description>
</property>
<property>
<name>mapreduce.job.userlog.retain.hours</name>
<value>48</value>
</property>
<!-- 在nn1 开启历史服务器 end-->
<!-- map阶段开启输出压缩便于reduce阶段从磁盘拉取数据 start-->
<property>
<name>mapreduce.map.output.compress</name>
<value>true</value>
<description>map是否开启输出压缩</description>
</property>
<property>
<name>mapreduce.map.output.compress.codec</name>
<value>org.apache.hadoop.io.compress.Bzip2Codec</value>
<description>map输出默认的算法</description>
</property>
<!-- map阶段开启输出压缩 end-->
<!--
这里是开启reduce输出压缩如果开启了那么输出结果无法直接看到需要进行解压缩
如果需要,取消注释即可
<property>
<name>mapreduce.output.fileoutputformat.compress</name>
<value>true</value>
<description>reduce是否开启输出压缩</description>
</property>
<property>
<name>mapreduce.output.fileoutputformat.compress.codec</name>
<value>org.apache.hadoop.io.compress.Bzip2Codec</value>
<description>reduce输出默认的算法</description>
</property>
-->
</configuration>
```
```bash
# 分发 mapred-site.xml 到其他主机
scp_all.sh /usr/local/hadoop/etc/hadoop/mapred-site.xml /usr/local/hadoop/etc/hadoop/
```
**配置capacity-scheduler.xml**
```xml
<!-- yarn使用容量调度器来管理集群的资源
配置root队列下两个子队列 hainiu占比80 default占比20
start
-->
<configuration>
<property>
<name>yarn.scheduler.capacity.root.queues</name>
<value>hainiu,default</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.hainiu.capacity</name>
<value>80</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.capacity</name>
<value>20</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.hainiu.maximum-capacity</name>
<value>100</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.maximum-capacity</name>
<value>100</value>
</property>
<!-- yarn使用容量调度器来管理集群的资源
配置root队列下两个子队列 hainiu占比80 default占比20
end
-->
</configuration>
```
```shell
# 分发 capacity-scheduler.xml 到其他主机
scp_all.sh /usr/local/hadoop/etc/hadoop/capacity-scheduler.xml /usr/local/hadoop/etc/hadoop/
```
```shell
#启动yarn如果已经启动可以先关闭yarn集群stop-yarn.sh
start-yarn.sh
#启动历史服务器 ,在 nn1执行
mapred --daemon start historyserver
```

Binary file not shown.

After

Width:  |  Height:  |  Size: 152 KiB

View File

@@ -0,0 +1,33 @@
```xml
<configuration>
<!--运行模式 -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!--jobhistory properties-->
<property>
<name>mapreduce.jobhistory.address</name>
<value>master:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>master:19888</value>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=$(HADOOP_HOME)</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=$(HADOOP_HOME)</value>
</property>
<property>
<name>mapreduce, reduce,env</name>
<value>HADOOP_MAPRED_HOME=$(HADOOP_HOME)</value>
</property>
</configuration>
```

76
Spark搭建/01.md Normal file
View File

@@ -0,0 +1,76 @@
### spark安装
-`spark-3.3.4-bin-hadoop3-scala2.13.tgz`上传到每台机器的`/tmp`
```bash
# 解压
ssh_root.sh tar -zxf /tmp/spark-3.3.4-bin-hadoop3-scala2.13.tgz -C /usr/local
```
- 修改 spark 的目录所有者为hadoop
```bash
ssh_root.sh chown -R hadoop:hadoop /usr/local/spark-3.3.4-bin-hadoop3-scala2.13/
```
- 添加一个软连接
```bash
ssh_root.sh ln -s /usr/local/spark-3.3.4-bin-hadoop3-scala2.13 /usr/local/spark
```
### spark 配置
- `spark-env.sh``workers`
```bash
# 先重命名一下
cd /usr/local/spark/conf
mv spark-env.sh.template spark-env.sh
mv workers.template workers
```
```bash
# 编辑 spark-env.sh
vim spark-env.sh
```
```bash
# 在文件末尾添加:
export HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop
export SPARK_WORKER_CORES=2
export SPARK_WORKER_MEMORY=1G
export SPARK_DAEMON_JAVA_OPTS="-Dspark.deploy.recoveryMode=ZOOKEEPER -Dspark.deploy.zookeeper.url=nn1:2181,nn2:2181,nn3:2181 -Dspark.deploy.zookeeper.dir=/spark3"
```
```bash
# 编辑 workers
vim workers
```
编辑如下:
![[./images/workers.png]]
```bash
# 分发到其他主机
scp_all.sh /usr/local/spark/conf/spark-env.sh /usr/local/spark/conf/
scp_all.sh /usr/local/spark/conf/workers /usr/local/spark/conf/
```
- 环境变量配置
```bash
# 在/etc/profile.d/myEnv.sh中进行配置
echo 'export SPARK_HOME=/usr/local/spark' >> /etc/profile.d/myEnv.sh
echo 'export PATH=$PATH:$SPARK_HOME/bin' >> /etc/profile.d/myEnv.sh
echo 'export PATH=$PATH:$SPARK_HOME/sbin' >> /etc/profile.d/myEnv.sh
# 分发到其他主机
scp_all.sh /etc/profile.d/myEnv.sh /etc/profile.d
# 在5台主机分别执行
source /etc/profile
```
执行任务
```bash
spark-submit --master spark://nn1:7077,nn2:7077 \
--executor-cores 2 \
--executor-memory 1G \
--total-executor-cores 6 \
--class org.apache.spark.examples.SparkPi \
/usr/local/spark/examples/jars/spark-examples_2.13-3.3.4.jar \
10000
```
```bash
#!/bin/bash
ssh_all_zk.sh ${ZOOKEEPER_HOME}/bin/zkServer.sh start
${HADOOP_HOME}/sbin/start-all.sh
```

Binary file not shown.

After

Width:  |  Height:  |  Size: 116 KiB

5
欢迎.md Normal file
View File

@@ -0,0 +1,5 @@
这是你的新*仓库*。
写点笔记,[[创建链接]],或者试一试[导入器](https://help.obsidian.md/Plugins/Importer)插件!
当你准备好了,就将该笔记文件删除,使这个仓库为你所用。