This commit is contained in:
2024-10-11 11:12:32 +08:00
commit 8b4a30b940
30 changed files with 1005115 additions and 0 deletions

38
.gitignore vendored Normal file
View File

@@ -0,0 +1,38 @@
target/
!.mvn/wrapper/maven-wrapper.jar
!**/src/main/**/target/
!**/src/test/**/target/
### IntelliJ IDEA ###
.idea/modules.xml
.idea/jarRepositories.xml
.idea/compiler.xml
.idea/libraries/
*.iws
*.iml
*.ipr
### Eclipse ###
.apt_generated
.classpath
.factorypath
.project
.settings
.springBeans
.sts4-cache
### NetBeans ###
/nbproject/private/
/nbbuild/
/dist/
/nbdist/
/.nb-gradle/
build/
!**/src/main/**/build/
!**/src/test/**/build/
### VS Code ###
.vscode/
### Mac OS ###
.DS_Store

8
.idea/.gitignore generated vendored Normal file
View File

@@ -0,0 +1,8 @@
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml

7
.idea/codeStyles/Project.xml generated Normal file
View File

@@ -0,0 +1,7 @@
<component name="ProjectCodeStyleConfiguration">
<code_scheme name="Project" version="173">
<ScalaCodeStyleSettings>
<option name="MULTILINE_STRING_CLOSING_QUOTES_ON_NEW_LINE" value="true" />
</ScalaCodeStyleSettings>
</code_scheme>
</component>

5
.idea/codeStyles/codeStyleConfig.xml generated Normal file
View File

@@ -0,0 +1,5 @@
<component name="ProjectCodeStyleConfiguration">
<state>
<option name="PREFERRED_PROJECT_CODE_STYLE" value="Default" />
</state>
</component>

11
.idea/encodings.xml generated Normal file
View File

@@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Encoding">
<file url="file://$PROJECT_DIR$/MapReduceTest/src/main/java" charset="UTF-8" />
<file url="file://$PROJECT_DIR$/MapReduceTest/src/main/resources" charset="UTF-8" />
<file url="file://$PROJECT_DIR$/Spark/src/main/java" charset="UTF-8" />
<file url="file://$PROJECT_DIR$/Spark/src/main/resources" charset="UTF-8" />
<file url="file://$PROJECT_DIR$/src/main/java" charset="UTF-8" />
<file url="file://$PROJECT_DIR$/src/main/resources" charset="UTF-8" />
</component>
</project>

14
.idea/misc.xml generated Normal file
View File

@@ -0,0 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ExternalStorageConfigurationManager" enabled="true" />
<component name="MavenProjectsManager">
<option name="originalFiles">
<list>
<option value="$PROJECT_DIR$/pom.xml" />
</list>
</option>
</component>
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" default="true" project-jdk-name="1.8" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/out" />
</component>
</project>

6
.idea/scala_compiler.xml generated Normal file
View File

@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ScalaCompilerConfiguration">
<profile name="Maven 1" modules="Spark" />
</component>
</project>

124
.idea/uiDesigner.xml generated Normal file
View File

@@ -0,0 +1,124 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Palette2">
<group name="Swing">
<item class="com.intellij.uiDesigner.HSpacer" tooltip-text="Horizontal Spacer" icon="/com/intellij/uiDesigner/icons/hspacer.svg" removable="false" auto-create-binding="false" can-attach-label="false">
<default-constraints vsize-policy="1" hsize-policy="6" anchor="0" fill="1" />
</item>
<item class="com.intellij.uiDesigner.VSpacer" tooltip-text="Vertical Spacer" icon="/com/intellij/uiDesigner/icons/vspacer.svg" removable="false" auto-create-binding="false" can-attach-label="false">
<default-constraints vsize-policy="6" hsize-policy="1" anchor="0" fill="2" />
</item>
<item class="javax.swing.JPanel" icon="/com/intellij/uiDesigner/icons/panel.svg" removable="false" auto-create-binding="false" can-attach-label="false">
<default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3" />
</item>
<item class="javax.swing.JScrollPane" icon="/com/intellij/uiDesigner/icons/scrollPane.svg" removable="false" auto-create-binding="false" can-attach-label="true">
<default-constraints vsize-policy="7" hsize-policy="7" anchor="0" fill="3" />
</item>
<item class="javax.swing.JButton" icon="/com/intellij/uiDesigner/icons/button.svg" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="0" hsize-policy="3" anchor="0" fill="1" />
<initial-values>
<property name="text" value="Button" />
</initial-values>
</item>
<item class="javax.swing.JRadioButton" icon="/com/intellij/uiDesigner/icons/radioButton.svg" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
<initial-values>
<property name="text" value="RadioButton" />
</initial-values>
</item>
<item class="javax.swing.JCheckBox" icon="/com/intellij/uiDesigner/icons/checkBox.svg" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
<initial-values>
<property name="text" value="CheckBox" />
</initial-values>
</item>
<item class="javax.swing.JLabel" icon="/com/intellij/uiDesigner/icons/label.svg" removable="false" auto-create-binding="false" can-attach-label="false">
<default-constraints vsize-policy="0" hsize-policy="0" anchor="8" fill="0" />
<initial-values>
<property name="text" value="Label" />
</initial-values>
</item>
<item class="javax.swing.JTextField" icon="/com/intellij/uiDesigner/icons/textField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
<preferred-size width="150" height="-1" />
</default-constraints>
</item>
<item class="javax.swing.JPasswordField" icon="/com/intellij/uiDesigner/icons/passwordField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
<preferred-size width="150" height="-1" />
</default-constraints>
</item>
<item class="javax.swing.JFormattedTextField" icon="/com/intellij/uiDesigner/icons/formattedTextField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
<preferred-size width="150" height="-1" />
</default-constraints>
</item>
<item class="javax.swing.JTextArea" icon="/com/intellij/uiDesigner/icons/textArea.svg" removable="false" auto-create-binding="true" can-attach-label="true">
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
<preferred-size width="150" height="50" />
</default-constraints>
</item>
<item class="javax.swing.JTextPane" icon="/com/intellij/uiDesigner/icons/textPane.svg" removable="false" auto-create-binding="true" can-attach-label="true">
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
<preferred-size width="150" height="50" />
</default-constraints>
</item>
<item class="javax.swing.JEditorPane" icon="/com/intellij/uiDesigner/icons/editorPane.svg" removable="false" auto-create-binding="true" can-attach-label="true">
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
<preferred-size width="150" height="50" />
</default-constraints>
</item>
<item class="javax.swing.JComboBox" icon="/com/intellij/uiDesigner/icons/comboBox.svg" removable="false" auto-create-binding="true" can-attach-label="true">
<default-constraints vsize-policy="0" hsize-policy="2" anchor="8" fill="1" />
</item>
<item class="javax.swing.JTable" icon="/com/intellij/uiDesigner/icons/table.svg" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
<preferred-size width="150" height="50" />
</default-constraints>
</item>
<item class="javax.swing.JList" icon="/com/intellij/uiDesigner/icons/list.svg" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="6" hsize-policy="2" anchor="0" fill="3">
<preferred-size width="150" height="50" />
</default-constraints>
</item>
<item class="javax.swing.JTree" icon="/com/intellij/uiDesigner/icons/tree.svg" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
<preferred-size width="150" height="50" />
</default-constraints>
</item>
<item class="javax.swing.JTabbedPane" icon="/com/intellij/uiDesigner/icons/tabbedPane.svg" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
<preferred-size width="200" height="200" />
</default-constraints>
</item>
<item class="javax.swing.JSplitPane" icon="/com/intellij/uiDesigner/icons/splitPane.svg" removable="false" auto-create-binding="false" can-attach-label="false">
<default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
<preferred-size width="200" height="200" />
</default-constraints>
</item>
<item class="javax.swing.JSpinner" icon="/com/intellij/uiDesigner/icons/spinner.svg" removable="false" auto-create-binding="true" can-attach-label="true">
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
</item>
<item class="javax.swing.JSlider" icon="/com/intellij/uiDesigner/icons/slider.svg" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
</item>
<item class="javax.swing.JSeparator" icon="/com/intellij/uiDesigner/icons/separator.svg" removable="false" auto-create-binding="false" can-attach-label="false">
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3" />
</item>
<item class="javax.swing.JProgressBar" icon="/com/intellij/uiDesigner/icons/progressbar.svg" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1" />
</item>
<item class="javax.swing.JToolBar" icon="/com/intellij/uiDesigner/icons/toolbar.svg" removable="false" auto-create-binding="false" can-attach-label="false">
<default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1">
<preferred-size width="-1" height="20" />
</default-constraints>
</item>
<item class="javax.swing.JToolBar$Separator" icon="/com/intellij/uiDesigner/icons/toolbarSeparator.svg" removable="false" auto-create-binding="false" can-attach-label="false">
<default-constraints vsize-policy="0" hsize-policy="0" anchor="0" fill="1" />
</item>
<item class="javax.swing.JScrollBar" icon="/com/intellij/uiDesigner/icons/scrollbar.svg" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="6" hsize-policy="0" anchor="0" fill="2" />
</item>
</group>
</component>
</project>

6
.idea/vcs.xml generated Normal file
View File

@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

View File

@@ -0,0 +1,39 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<parent>
<artifactId>HaiNiuProjects</artifactId>
<groupId>com.aisi</groupId>
<version>1.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>MapReduceTest</artifactId>
<build>
<plugins>
<plugin>
<artifactId>maven-shade-plugin</artifactId>
<version>3.2.4</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
</execution>
</executions>
<configuration>
<archive>
<manifest>
<addDefaultImplementationEntries>true</addDefaultImplementationEntries>
<mainClass>com.aisi.wordcount.WordCountDriver</mainClass>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</build>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
</properties>
</project>

95
MapReduceTest/pom.xml Normal file
View File

@@ -0,0 +1,95 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.aisi</groupId>
<artifactId>HaiNiuProjects</artifactId>
<version>1.0-SNAPSHOT</version>
</parent>
<artifactId>MapReduceTest</artifactId>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>3.1.4</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
<version>1.7.32</version>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.2.4</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<createDependencyReducedPom>false</createDependencyReducedPom>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
<shadedArtifactAttached>true</shadedArtifactAttached>
<shadedArtifactPrimary>true</shadedArtifactPrimary>
<outputFile>${project.build.directory}/${project.build.finalName}-shaded.jar</outputFile>
<relocators>
<relocator>
<pattern>org.apache.commons</pattern>
<shadedPattern>shade.org.apache.commons</shadedPattern>
</relocator>
</relocators>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>com.aisi.wordcount.WordCountDriver</mainClass>
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@@ -0,0 +1,45 @@
package com.aisi.wordcount;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class WordCountDriver {
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
// 创建配置文件对象
Configuration conf = new Configuration();
// conf.set("fs.defaultFS", "hdfs://localhost:9000/");
// 创建任务对象
Job job = Job.getInstance(conf, "wordcount");
// 设置入口类
job.setJarByClass(WordCountDriver.class);
// 设置mapper类
job.setMapperClass(WordCountMapper.class);
// 设置reducer类
job.setReducerClass(WordCountReducer.class);
// 设置reducer输出类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
// 设置mapper输出类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
// 设置mapreduce要处理的文件路径(hdfs路径)
FileInputFormat.addInputPath(job, new Path(args[0]));//"hdfs://ns1/word/words.txt"
// 设置mapreduce处理完成保存的文件路径
FileOutputFormat.setOutputPath(job, new Path(args[1])); //"hdfs://ns1/word/result"
boolean completion = job.waitForCompletion(true);
// 判断是否运行成功
if (completion) {
System.exit(0);
}else {
System.exit(1);
}
}
}

View File

@@ -0,0 +1,27 @@
package com.aisi.wordcount;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
@Override
/**
* key : 行首偏移量
* value一行的数据
*/
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException {
System.out.println("map invoke...");
String[] words = value.toString().split(" ");
for (String word : words) {
context.write(new Text(word), new IntWritable(1));
// (hello,1 pooo1 shenjianz1
}
}
}

View File

@@ -0,0 +1,25 @@
package com.aisi.wordcount;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
// (hello,1 pooo1 shenjianz1 (hello,1)
// (hello,[1,1])
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
System.out.println("reduce invoke...");
// 记录每个单词的总数
int sum = 0;
for (IntWritable value : values) {
sum += value.get();
}
context.write(key, new IntWritable(sum));
// (hello,2)
}
}

View File

@@ -0,0 +1,36 @@
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://ns1</value>
<description>默认文件服务的协议和NS逻辑名称和hdfs-site.xml里的对应此配置替代了1.0里的fs.default.name</description>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/data/tmp</value>
<description>数据存储目录</description>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>hadoop</value>
<description>
hdfs dfsadmin refreshSuperUserGroupsConfiguration,
yarn rmadmin refreshSuperUserGroupsConfiguration
使用这两个命令不用重启就能刷新
</description>
</property>
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>localhost</value>
<description>本地代理</description>
</property>
<!-- zkfc的配置 -->
<property>
<name>ha.zookeeper.quorum</name>
<value>nn1:2181,nn2:2181,nn3:2181</value>
<description>HA使用的zookeeper地址</description>
</property>
</configuration>

View File

@@ -0,0 +1,140 @@
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>/data/namenode</value>
<description>namenode本地文件存放地址</description>
</property>
<property>
<name>dfs.nameservices</name>
<value>ns1</value>
<description>提供服务的NS逻辑名称与core-site.xml里的对应</description>
</property>
<!-- namenode的配置 -->
<!-- 主要的 -->
<property>
<name>dfs.ha.namenodes.ns1</name>
<value>nn1,nn2,nn3</value>
<description>列出该逻辑名称下的NameNode逻辑名称</description>
</property>
<property>
<name>dfs.namenode.rpc-address.ns1.nn1</name>
<value>nn1:9000</value>
<description>指定NameNode的RPC位置</description>
</property>
<property>
<name>dfs.namenode.http-address.ns1.nn1</name>
<value>nn1:50070</value>
<description>指定NameNode的Web Server位置</description>
</property>
<property>
<name>dfs.namenode.rpc-address.ns1.nn2</name>
<value>nn2:9000</value>
<description>指定NameNode的RPC位置</description>
</property>
<property>
<name>dfs.namenode.http-address.ns1.nn2</name>
<value>nn2:50070</value>
<description>指定NameNode的Web Server位置</description>
</property>
<property>
<name>dfs.namenode.rpc-address.ns1.nn3</name>
<value>nn3:9000</value>
<description>指定NameNode的RPC位置</description>
</property>
<property>
<name>dfs.namenode.http-address.ns1.nn3</name>
<value>nn3:50070</value>
<description>指定NameNode的Web Server位置</description>
</property>
<property>
<name>dfs.namenode.handler.count</name>
<value>77</value>
<description>namenode的工作线程数</description>
</property>
<!-- journaldata配置,使得其他两个namenode同步第一个namenode数据 -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://nn1:8485;nn2:8485;nn3:8485/ns1</value>
<description>指定用于HA存放edits的共享存储通常是namenode的所在机器</description>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/data/journaldata/</value>
<description>journaldata服务存放文件的地址</description>
</property>
<property>
<name>ipc.client.connect.max.retries</name>
<value>10</value>
<description>namenode和journalnode的链接重试次数10次</description>
</property>
<property>
<name>ipc.client.connect.retry.interval</name>
<value>10000</value>
<description>重试的间隔时间10s</description>
</property>
<!-- zkfc的配置 -->
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
<description>指定HA做隔离的方法缺省是ssh可设为shell稍后详述</description>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/hadoop/.ssh/id_rsa</value>
<description>杀死命令脚本的免密配置秘钥</description>
</property>
<property>
<name>dfs.client.failover.proxy.provider.ns1</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
<description>指定客户端用于HA切换的代理类不同的NS可以用不同的代理类以上示例为Hadoop 2.0自带的缺省代理类</description>
</property>
<property>
<name>dfs.client.failover.proxy.provider.auto-ha</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<!-- datanode配置 -->
<property>
<name>dfs.datanode.data.dir</name>
<value>/data/datanode</value>
<description>datanode本地文件存放地址</description>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
<description>文件复本数</description>
</property>
<property>
<name>dfs.namenode.datanode.registration.ip-hostname-check</name>
<value>false</value>
</property>
<property>
<name>dfs.client.use.datanode.hostname</name>
<value>true</value>
</property>
<property>
<name>dfs.datanode.use.datanode.hostname</name>
<value>true</value>
</property>
</configuration>

4
Spark/data/a.txt Normal file
View File

@@ -0,0 +1,4 @@
1 zhangsan 20 male
2 lisi 30 female
3 wangwu 35 male
4 zhaosi 40 female

3883
Spark/data/movies.txt Normal file

File diff suppressed because it is too large Load Diff

1000209
Spark/data/ratings.txt Normal file

File diff suppressed because it is too large Load Diff

13
Spark/data/word.txt Normal file
View File

@@ -0,0 +1,13 @@
shenjianZ poop yuqing yuqin
yuqing yuqin shenjianZ poop
shenjianZ poop yuqing yuqin
yuqing yuqin shenjianZ poop
shenjianZ poop yuqing yuqin
yuqing yuqin shenjianZ poop
shenjianZ poop yuqing yuqin
yuqing yuqin shenjianZ poop
shenjianZ poop yuqing yuqin
yuqing yuqin shenjianZ poop
shenjianZ poop yuqing yuqin
yuqing yuqin shenjianZ poop

View File

@@ -0,0 +1,41 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<parent>
<artifactId>HaiNiuProjects</artifactId>
<groupId>com.aisi</groupId>
<version>1.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>Spark</artifactId>
<build>
<plugins>
<plugin>
<artifactId>maven-shade-plugin</artifactId>
<version>3.2.4</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
</execution>
</executions>
<configuration>
<filters>
<filter>
<artifact>*:commons-beanutils-core</artifact>
<excludes>
<exclude>META-INF/services/**</exclude>
</excludes>
</filter>
</filters>
</configuration>
</plugin>
</plugins>
</build>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
</properties>
</project>

98
Spark/pom.xml Normal file
View File

@@ -0,0 +1,98 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.aisi</groupId>
<artifactId>HaiNiuProjects</artifactId>
<version>1.0-SNAPSHOT</version>
</parent>
<artifactId>Spark</artifactId>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.12</artifactId>
<version>3.1.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.3</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.30</version>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.12</artifactId> <!-- 根据你的 Scala 版本选择合适的版本 -->
<version>3.1.2</version> <!-- 根据你的 Spark 版本选择合适的版本 -->
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>3.1.2</version>
</dependency>
</dependencies>
<build>
<plugins>
<!-- Scala Maven Plugin -->
<!-- <plugin>-->
<!-- <groupId>net.alchim31.maven</groupId>-->
<!-- <artifactId>scala-maven-plugin</artifactId>-->
<!-- <version>4.5.3</version>-->
<!-- <executions>-->
<!-- <execution>-->
<!-- <goals>-->
<!-- <goal>compile</goal>-->
<!-- <goal>testCompile</goal>-->
<!-- </goals>-->
<!-- </execution>-->
<!-- </executions>-->
<!-- </plugin>-->
<!-- Maven Shade Plugin -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.2.4</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
</execution>
</executions>
<configuration>
<filters>
<filter>
<artifact>*:commons-beanutils-core</artifact>
<excludes>
<exclude>META-INF/services/**</exclude>
</excludes>
</filter>
</filters>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@@ -0,0 +1,17 @@
package com.aisi;
//TIP 要<b>运行</b>代码,请按 <shortcut actionId="Run"/> 或
// 点击装订区域中的 <icon src="AllIcons.Actions.Execute"/> 图标。
public class Main {
public static void main(String[] args) {
//TIP 当文本光标位于高亮显示的文本处时按 <shortcut actionId="ShowIntentionActions"/>
// 查看 IntelliJ IDEA 建议如何修正。
System.out.printf("Hello and welcome!");
for (int i = 1; i <= 5; i++) {
//TIP 按 <shortcut actionId="Debug"/> 开始调试代码。我们已经设置了一个 <icon src="AllIcons.Debugger.Db_set_breakpoint"/> 断点
// 但您始终可以通过按 <shortcut actionId="ToggleLineBreakpoint"/> 添加更多断点。
System.out.println("i = " + i);
}
}
}

View File

@@ -0,0 +1,27 @@
package com.aisi.spark
import org.apache.spark.{SparkConf, SparkContext}
object WordCount {
def main(args: Array[String]): Unit = {
val config = new SparkConf()
config.setMaster("local")
config.setAppName("SH_wordcount")
// 禁用权限设置
config.set("spark.hadoop.dfs.permissions", "false")
val sc = new SparkContext(config)
// 使用 Windows 文件路径格式
val rdd = sc.textFile("D:/JetBrainsToolProject/IntelJ IDEA/HaiNiuProjects/Spark/data/word.txt")
val rdd1 = rdd.flatMap(_.split(" "))
val rdd2 = rdd1.map((_, 1))
val rdd3 = rdd2.groupBy(_._1)
val rdd4 = rdd3.mapValues(_.size)
// 保存结果到本地路径
rdd4.saveAsTextFile("D:/JetBrainsToolProject/IntelJ IDEA/HaiNiuProjects/Spark/data/res")
}
}

View File

@@ -0,0 +1,20 @@
package com.aisi.spark
import org.apache.spark.SparkContext
object WordCountForCluster {
def main(args: Array[String]): Unit = {
val sc = new SparkContext()
val Array(input,output) = args
// 使用 Windows 文件路径格式
val rdd = sc.textFile(input)
val rdd1 = rdd.flatMap(_.split(" "))
val rdd2 = rdd1.map((_, 1))
val rdd3 = rdd2.groupBy(_._1)
val rdd4 = rdd3.mapValues(_.size)
// 保存结果到本地路径
rdd4.saveAsTextFile(output)
}
}

View File

@@ -0,0 +1,6 @@
log4j.rootLogger=info,console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.out
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c %M(): %m%n

View File

@@ -0,0 +1,19 @@
package com.aisi.spark
import org.apache.hive.jdbc.HiveDriver
import java.sql.DriverManager
object TestBeeline{
def main(args: Array[String]): Unit = {
classOf[HiveDriver]
val connection = DriverManager.getConnection("jdbc:hive2://nn1:20000", "hadoop", null)
val statement = connection.prepareStatement("SELECT count(1) as cnt from stu;")
val resultSet = statement.executeQuery()
while (resultSet.next()){
val cnt = resultSet.getLong("cnt")
println("stu表的总条数为" + cnt)
}
connection.close()
}
}

View File

@@ -0,0 +1,75 @@
package com.aisi.spark
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.expressions.Window
import org.apache.spark.sql.{DataFrame, SQLContext}
import org.apache.spark.{SparkConf, SparkContext}
object TestMovieWithSql {
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
conf.setAppName("movie")
conf.setMaster("local[*]")
conf.set("spark.shuffle.partitions", "20")
val sc = new SparkContext(conf)
val sqlSc = new SQLContext(sc)
import sqlSc.implicits._
val df: DataFrame = sc.textFile("Spark/data/movies.txt")
.flatMap(t => {
val line = t.split(",")
val movieId = line(0)
val movieTypes = line.reverse.head
val movieName = line.tail.reverse.tail.reverse.mkString(" ")
// movieTypes.split("\\|").map(movieRecord(movieId, movieName, _))
movieTypes.split("\\|").map(movieType => (movieId, movieName, movieType)) // 返回三元组
}).toDF("movieId", "movieName", "movieType")
// df.limit(10).show()
val df1 = sc.textFile("Spark/data/ratings.txt")
.map(t => {
val line = t.split(",")
val userId = line(0)
val movieId = line(1)
val score = line(2).toDouble
(userId, movieId, score)
}).toDF("userId", "movieId", "score")
df1.limit(10).show()
import org.apache.spark.sql.functions._
df.join(df1,"movieId").groupBy("userId","movieType")
.agg(count("userId").as("cnt"))
.withColumn("rn",row_number().over(Window.partitionBy("userId").orderBy($"cnt".desc)))
.where("rn = 1") // 取到 userId 分区的 第一个,就是 cnt最大的那条数据
.show()
//
// row_number():这是一个窗口函数,用来为分组中的每一行分配一个唯一的行号。行号按照排序规则依次递增。
// Window.partitionBy("userId")partitionBy 会根据 userId 划分数据,即对于每个 userId 来独立计算行号,这意味着每个用户的数据被视为一个独立的分区。
// orderBy($"cnt".desc):在每个 userId 分区内,数据会根据 cnt即该用户对某种电影类型的观看次数降序排列。行号根据这种排序结果分配。
// +------+---------+---+---+
// |userId|movieType|cnt| rn|
// +------+---------+---+---+
// | 1090| Drama| 47| 1|
// | 1159| Sci-Fi|102| 1|
// | 1436| Drama| 33| 1|
// | 1512| Comedy| 19| 1|
// | 1572| Thriller| 16| 1|
// | 2069| Comedy| 30| 1|
// | 2088| Sci-Fi|201| 1|
// | 2136| Comedy| 83| 1|
// | 2162| Comedy|105| 1|
// | 2294| Drama| 35| 1|
// | 2904| Drama| 34| 1|
// | 296| Horror| 42| 1|
// | 3210| Drama| 97| 1|
// | 3414| Action|181| 1|
// | 3606| Drama| 33| 1|
// | 3959| Comedy| 11| 1|
// | 4032| Action|113| 1|
// | 467| Romance| 34| 1|
// | 4821| Drama|100| 1|
// | 4937| Drama|136| 1|
// +------+---------+---+---+
}
}
//case class movieRecord(var movieId:String,var movieName:String, var movieTypes:String)

View File

@@ -0,0 +1,45 @@
package com.aisi.spark
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, SQLContext}
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.functions._ // agg、count函数导入
import org.apache.spark.sql.expressions.Window // 导入 Window
object TestSparkSql {
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
conf.setMaster("local[*]")
conf.setAppName("test sql")
val sc = new SparkContext(conf)
val sqlSc = new SQLContext(sc)
//引入环境信息
import sqlSc.implicits._
val rdd: RDD[(Int, String, Int, String)] = sc.textFile("Spark/data/a.txt")
.map(t => {
val line = t.split(" ")
val id = line(0).toInt
val name = line(1)
val age = line(2).toInt
val gender = line(3)
(id, name, age, gender)
})
val df: DataFrame = rdd.toDF("id", "name", "age", "gender")
df.show() //展示表数据
df.printSchema() //展示表格字段信息
// df.where("age > 20").groupBy("gender").sum("age").show()
// df.orderBy($"age".desc).show()
// 聚合并排序
// val result = df.groupBy("gender")
// .agg(
// count("id").as("count_id"), // 计数 id
// sum("age").as("sum_age") // 求和 age
// )
// .orderBy($"sum_age".desc) // 按 sum_age 降序排序
// result.show()
df.withColumn("rn",row_number().over(Window.partitionBy("gender").orderBy($"age".desc)))
.where("rn = 1")
.show()
}
}

32
pom.xml Normal file
View File

@@ -0,0 +1,32 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.aisi</groupId>
<artifactId>HaiNiuProjects</artifactId>
<version>1.0-SNAPSHOT</version>
<packaging>pom</packaging>
<modules>
<module>MapReduceTest</module>
<module>Spark</module>
</modules>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<hadoop.version>3.1.4</hadoop.version>
</properties>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
</dependency>
</dependencies>
</dependencyManagement>
</project>