commit
This commit is contained in:
38
.gitignore
vendored
Normal file
38
.gitignore
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
target/
|
||||
!.mvn/wrapper/maven-wrapper.jar
|
||||
!**/src/main/**/target/
|
||||
!**/src/test/**/target/
|
||||
|
||||
### IntelliJ IDEA ###
|
||||
.idea/modules.xml
|
||||
.idea/jarRepositories.xml
|
||||
.idea/compiler.xml
|
||||
.idea/libraries/
|
||||
*.iws
|
||||
*.iml
|
||||
*.ipr
|
||||
|
||||
### Eclipse ###
|
||||
.apt_generated
|
||||
.classpath
|
||||
.factorypath
|
||||
.project
|
||||
.settings
|
||||
.springBeans
|
||||
.sts4-cache
|
||||
|
||||
### NetBeans ###
|
||||
/nbproject/private/
|
||||
/nbbuild/
|
||||
/dist/
|
||||
/nbdist/
|
||||
/.nb-gradle/
|
||||
build/
|
||||
!**/src/main/**/build/
|
||||
!**/src/test/**/build/
|
||||
|
||||
### VS Code ###
|
||||
.vscode/
|
||||
|
||||
### Mac OS ###
|
||||
.DS_Store
|
||||
8
.idea/.gitignore
generated
vendored
Normal file
8
.idea/.gitignore
generated
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
# Default ignored files
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
# Editor-based HTTP Client requests
|
||||
/httpRequests/
|
||||
# Datasource local storage ignored files
|
||||
/dataSources/
|
||||
/dataSources.local.xml
|
||||
7
.idea/codeStyles/Project.xml
generated
Normal file
7
.idea/codeStyles/Project.xml
generated
Normal file
@@ -0,0 +1,7 @@
|
||||
<component name="ProjectCodeStyleConfiguration">
|
||||
<code_scheme name="Project" version="173">
|
||||
<ScalaCodeStyleSettings>
|
||||
<option name="MULTILINE_STRING_CLOSING_QUOTES_ON_NEW_LINE" value="true" />
|
||||
</ScalaCodeStyleSettings>
|
||||
</code_scheme>
|
||||
</component>
|
||||
5
.idea/codeStyles/codeStyleConfig.xml
generated
Normal file
5
.idea/codeStyles/codeStyleConfig.xml
generated
Normal file
@@ -0,0 +1,5 @@
|
||||
<component name="ProjectCodeStyleConfiguration">
|
||||
<state>
|
||||
<option name="PREFERRED_PROJECT_CODE_STYLE" value="Default" />
|
||||
</state>
|
||||
</component>
|
||||
11
.idea/encodings.xml
generated
Normal file
11
.idea/encodings.xml
generated
Normal file
@@ -0,0 +1,11 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="Encoding">
|
||||
<file url="file://$PROJECT_DIR$/MapReduceTest/src/main/java" charset="UTF-8" />
|
||||
<file url="file://$PROJECT_DIR$/MapReduceTest/src/main/resources" charset="UTF-8" />
|
||||
<file url="file://$PROJECT_DIR$/Spark/src/main/java" charset="UTF-8" />
|
||||
<file url="file://$PROJECT_DIR$/Spark/src/main/resources" charset="UTF-8" />
|
||||
<file url="file://$PROJECT_DIR$/src/main/java" charset="UTF-8" />
|
||||
<file url="file://$PROJECT_DIR$/src/main/resources" charset="UTF-8" />
|
||||
</component>
|
||||
</project>
|
||||
14
.idea/misc.xml
generated
Normal file
14
.idea/misc.xml
generated
Normal file
@@ -0,0 +1,14 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ExternalStorageConfigurationManager" enabled="true" />
|
||||
<component name="MavenProjectsManager">
|
||||
<option name="originalFiles">
|
||||
<list>
|
||||
<option value="$PROJECT_DIR$/pom.xml" />
|
||||
</list>
|
||||
</option>
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" default="true" project-jdk-name="1.8" project-jdk-type="JavaSDK">
|
||||
<output url="file://$PROJECT_DIR$/out" />
|
||||
</component>
|
||||
</project>
|
||||
6
.idea/scala_compiler.xml
generated
Normal file
6
.idea/scala_compiler.xml
generated
Normal file
@@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ScalaCompilerConfiguration">
|
||||
<profile name="Maven 1" modules="Spark" />
|
||||
</component>
|
||||
</project>
|
||||
124
.idea/uiDesigner.xml
generated
Normal file
124
.idea/uiDesigner.xml
generated
Normal file
@@ -0,0 +1,124 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="Palette2">
|
||||
<group name="Swing">
|
||||
<item class="com.intellij.uiDesigner.HSpacer" tooltip-text="Horizontal Spacer" icon="/com/intellij/uiDesigner/icons/hspacer.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
||||
<default-constraints vsize-policy="1" hsize-policy="6" anchor="0" fill="1" />
|
||||
</item>
|
||||
<item class="com.intellij.uiDesigner.VSpacer" tooltip-text="Vertical Spacer" icon="/com/intellij/uiDesigner/icons/vspacer.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
||||
<default-constraints vsize-policy="6" hsize-policy="1" anchor="0" fill="2" />
|
||||
</item>
|
||||
<item class="javax.swing.JPanel" icon="/com/intellij/uiDesigner/icons/panel.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
||||
<default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3" />
|
||||
</item>
|
||||
<item class="javax.swing.JScrollPane" icon="/com/intellij/uiDesigner/icons/scrollPane.svg" removable="false" auto-create-binding="false" can-attach-label="true">
|
||||
<default-constraints vsize-policy="7" hsize-policy="7" anchor="0" fill="3" />
|
||||
</item>
|
||||
<item class="javax.swing.JButton" icon="/com/intellij/uiDesigner/icons/button.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||
<default-constraints vsize-policy="0" hsize-policy="3" anchor="0" fill="1" />
|
||||
<initial-values>
|
||||
<property name="text" value="Button" />
|
||||
</initial-values>
|
||||
</item>
|
||||
<item class="javax.swing.JRadioButton" icon="/com/intellij/uiDesigner/icons/radioButton.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||
<default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
|
||||
<initial-values>
|
||||
<property name="text" value="RadioButton" />
|
||||
</initial-values>
|
||||
</item>
|
||||
<item class="javax.swing.JCheckBox" icon="/com/intellij/uiDesigner/icons/checkBox.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||
<default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
|
||||
<initial-values>
|
||||
<property name="text" value="CheckBox" />
|
||||
</initial-values>
|
||||
</item>
|
||||
<item class="javax.swing.JLabel" icon="/com/intellij/uiDesigner/icons/label.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
||||
<default-constraints vsize-policy="0" hsize-policy="0" anchor="8" fill="0" />
|
||||
<initial-values>
|
||||
<property name="text" value="Label" />
|
||||
</initial-values>
|
||||
</item>
|
||||
<item class="javax.swing.JTextField" icon="/com/intellij/uiDesigner/icons/textField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
||||
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
|
||||
<preferred-size width="150" height="-1" />
|
||||
</default-constraints>
|
||||
</item>
|
||||
<item class="javax.swing.JPasswordField" icon="/com/intellij/uiDesigner/icons/passwordField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
||||
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
|
||||
<preferred-size width="150" height="-1" />
|
||||
</default-constraints>
|
||||
</item>
|
||||
<item class="javax.swing.JFormattedTextField" icon="/com/intellij/uiDesigner/icons/formattedTextField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
||||
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
|
||||
<preferred-size width="150" height="-1" />
|
||||
</default-constraints>
|
||||
</item>
|
||||
<item class="javax.swing.JTextArea" icon="/com/intellij/uiDesigner/icons/textArea.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
||||
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
|
||||
<preferred-size width="150" height="50" />
|
||||
</default-constraints>
|
||||
</item>
|
||||
<item class="javax.swing.JTextPane" icon="/com/intellij/uiDesigner/icons/textPane.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
||||
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
|
||||
<preferred-size width="150" height="50" />
|
||||
</default-constraints>
|
||||
</item>
|
||||
<item class="javax.swing.JEditorPane" icon="/com/intellij/uiDesigner/icons/editorPane.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
||||
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
|
||||
<preferred-size width="150" height="50" />
|
||||
</default-constraints>
|
||||
</item>
|
||||
<item class="javax.swing.JComboBox" icon="/com/intellij/uiDesigner/icons/comboBox.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
||||
<default-constraints vsize-policy="0" hsize-policy="2" anchor="8" fill="1" />
|
||||
</item>
|
||||
<item class="javax.swing.JTable" icon="/com/intellij/uiDesigner/icons/table.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
|
||||
<preferred-size width="150" height="50" />
|
||||
</default-constraints>
|
||||
</item>
|
||||
<item class="javax.swing.JList" icon="/com/intellij/uiDesigner/icons/list.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||
<default-constraints vsize-policy="6" hsize-policy="2" anchor="0" fill="3">
|
||||
<preferred-size width="150" height="50" />
|
||||
</default-constraints>
|
||||
</item>
|
||||
<item class="javax.swing.JTree" icon="/com/intellij/uiDesigner/icons/tree.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
|
||||
<preferred-size width="150" height="50" />
|
||||
</default-constraints>
|
||||
</item>
|
||||
<item class="javax.swing.JTabbedPane" icon="/com/intellij/uiDesigner/icons/tabbedPane.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||
<default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
|
||||
<preferred-size width="200" height="200" />
|
||||
</default-constraints>
|
||||
</item>
|
||||
<item class="javax.swing.JSplitPane" icon="/com/intellij/uiDesigner/icons/splitPane.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
||||
<default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
|
||||
<preferred-size width="200" height="200" />
|
||||
</default-constraints>
|
||||
</item>
|
||||
<item class="javax.swing.JSpinner" icon="/com/intellij/uiDesigner/icons/spinner.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
||||
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
|
||||
</item>
|
||||
<item class="javax.swing.JSlider" icon="/com/intellij/uiDesigner/icons/slider.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
|
||||
</item>
|
||||
<item class="javax.swing.JSeparator" icon="/com/intellij/uiDesigner/icons/separator.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
||||
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3" />
|
||||
</item>
|
||||
<item class="javax.swing.JProgressBar" icon="/com/intellij/uiDesigner/icons/progressbar.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||
<default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1" />
|
||||
</item>
|
||||
<item class="javax.swing.JToolBar" icon="/com/intellij/uiDesigner/icons/toolbar.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
||||
<default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1">
|
||||
<preferred-size width="-1" height="20" />
|
||||
</default-constraints>
|
||||
</item>
|
||||
<item class="javax.swing.JToolBar$Separator" icon="/com/intellij/uiDesigner/icons/toolbarSeparator.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
||||
<default-constraints vsize-policy="0" hsize-policy="0" anchor="0" fill="1" />
|
||||
</item>
|
||||
<item class="javax.swing.JScrollBar" icon="/com/intellij/uiDesigner/icons/scrollbar.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||
<default-constraints vsize-policy="6" hsize-policy="0" anchor="0" fill="2" />
|
||||
</item>
|
||||
</group>
|
||||
</component>
|
||||
</project>
|
||||
6
.idea/vcs.xml
generated
Normal file
6
.idea/vcs.xml
generated
Normal file
@@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
||||
39
MapReduceTest/dependency-reduced-pom.xml
Normal file
39
MapReduceTest/dependency-reduced-pom.xml
Normal file
@@ -0,0 +1,39 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<parent>
|
||||
<artifactId>HaiNiuProjects</artifactId>
|
||||
<groupId>com.aisi</groupId>
|
||||
<version>1.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<artifactId>MapReduceTest</artifactId>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<artifactId>maven-shade-plugin</artifactId>
|
||||
<version>3.2.4</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>shade</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
<configuration>
|
||||
<archive>
|
||||
<manifest>
|
||||
<addDefaultImplementationEntries>true</addDefaultImplementationEntries>
|
||||
<mainClass>com.aisi.wordcount.WordCountDriver</mainClass>
|
||||
</manifest>
|
||||
</archive>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<maven.compiler.source>8</maven.compiler.source>
|
||||
<maven.compiler.target>8</maven.compiler.target>
|
||||
</properties>
|
||||
</project>
|
||||
95
MapReduceTest/pom.xml
Normal file
95
MapReduceTest/pom.xml
Normal file
@@ -0,0 +1,95 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>com.aisi</groupId>
|
||||
<artifactId>HaiNiuProjects</artifactId>
|
||||
<version>1.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<artifactId>MapReduceTest</artifactId>
|
||||
|
||||
<properties>
|
||||
<maven.compiler.source>8</maven.compiler.source>
|
||||
<maven.compiler.target>8</maven.compiler.target>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-client</artifactId>
|
||||
<version>3.1.4</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-simple</artifactId>
|
||||
<version>1.7.32</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
<version>1.2.17</version>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-shade-plugin</artifactId>
|
||||
<version>3.2.4</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>shade</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<createDependencyReducedPom>false</createDependencyReducedPom>
|
||||
<filters>
|
||||
<filter>
|
||||
<artifact>*:*</artifact>
|
||||
<excludes>
|
||||
<exclude>META-INF/*.SF</exclude>
|
||||
<exclude>META-INF/*.DSA</exclude>
|
||||
<exclude>META-INF/*.RSA</exclude>
|
||||
</excludes>
|
||||
</filter>
|
||||
</filters>
|
||||
<shadedArtifactAttached>true</shadedArtifactAttached>
|
||||
<shadedArtifactPrimary>true</shadedArtifactPrimary>
|
||||
<outputFile>${project.build.directory}/${project.build.finalName}-shaded.jar</outputFile>
|
||||
<relocators>
|
||||
<relocator>
|
||||
<pattern>org.apache.commons</pattern>
|
||||
<shadedPattern>shade.org.apache.commons</shadedPattern>
|
||||
</relocator>
|
||||
</relocators>
|
||||
<filters>
|
||||
<filter>
|
||||
<artifact>*:*</artifact>
|
||||
<excludes>
|
||||
<exclude>META-INF/*.SF</exclude>
|
||||
<exclude>META-INF/*.DSA</exclude>
|
||||
<exclude>META-INF/*.RSA</exclude>
|
||||
</excludes>
|
||||
</filter>
|
||||
</filters>
|
||||
<transformers>
|
||||
<transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
|
||||
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
|
||||
<mainClass>com.aisi.wordcount.WordCountDriver</mainClass>
|
||||
</transformer>
|
||||
</transformers>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
|
||||
</project>
|
||||
@@ -0,0 +1,45 @@
|
||||
package com.aisi.wordcount;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.IntWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.Job;
|
||||
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
|
||||
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
|
||||
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class WordCountDriver {
|
||||
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
|
||||
// 创建配置文件对象
|
||||
Configuration conf = new Configuration();
|
||||
// conf.set("fs.defaultFS", "hdfs://localhost:9000/");
|
||||
// 创建任务对象
|
||||
Job job = Job.getInstance(conf, "wordcount");
|
||||
// 设置入口类
|
||||
job.setJarByClass(WordCountDriver.class);
|
||||
// 设置mapper类
|
||||
job.setMapperClass(WordCountMapper.class);
|
||||
// 设置reducer类
|
||||
job.setReducerClass(WordCountReducer.class);
|
||||
// 设置reducer输出类型
|
||||
job.setOutputKeyClass(Text.class);
|
||||
job.setOutputValueClass(IntWritable.class);
|
||||
// 设置mapper输出类型
|
||||
job.setMapOutputKeyClass(Text.class);
|
||||
job.setMapOutputValueClass(IntWritable.class);
|
||||
// 设置mapreduce要处理的文件路径(hdfs路径)
|
||||
FileInputFormat.addInputPath(job, new Path(args[0]));//"hdfs://ns1/word/words.txt"
|
||||
// 设置mapreduce处理完成保存的文件路径
|
||||
FileOutputFormat.setOutputPath(job, new Path(args[1])); //"hdfs://ns1/word/result"
|
||||
boolean completion = job.waitForCompletion(true);
|
||||
// 判断是否运行成功
|
||||
if (completion) {
|
||||
System.exit(0);
|
||||
}else {
|
||||
System.exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
package com.aisi.wordcount;
|
||||
|
||||
import org.apache.hadoop.io.IntWritable;
|
||||
import org.apache.hadoop.io.LongWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.Mapper;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
|
||||
@Override
|
||||
/**
|
||||
* key : 行首偏移量
|
||||
* value:一行的数据
|
||||
*/
|
||||
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException {
|
||||
System.out.println("map invoke...");
|
||||
String[] words = value.toString().split(" ");
|
||||
for (String word : words) {
|
||||
context.write(new Text(word), new IntWritable(1));
|
||||
// (hello,1) (pooo,1) (shenjianz,1)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
package com.aisi.wordcount;
|
||||
|
||||
import org.apache.hadoop.io.IntWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.Reducer;
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
|
||||
|
||||
// (hello,1) (pooo,1) (shenjianz,1) (hello,1)
|
||||
// (hello,[1,1])
|
||||
@Override
|
||||
protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
|
||||
System.out.println("reduce invoke...");
|
||||
// 记录每个单词的总数
|
||||
int sum = 0;
|
||||
for (IntWritable value : values) {
|
||||
sum += value.get();
|
||||
}
|
||||
context.write(key, new IntWritable(sum));
|
||||
// (hello,2)
|
||||
}
|
||||
}
|
||||
36
MapReduceTest/src/main/resources/core-site.xml
Normal file
36
MapReduceTest/src/main/resources/core-site.xml
Normal file
@@ -0,0 +1,36 @@
|
||||
<configuration>
|
||||
<property>
|
||||
<name>fs.defaultFS</name>
|
||||
<value>hdfs://ns1</value>
|
||||
<description>默认文件服务的协议和NS逻辑名称,和hdfs-site.xml里的对应此配置替代了1.0里的fs.default.name</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.tmp.dir</name>
|
||||
<value>/data/tmp</value>
|
||||
<description>数据存储目录</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.proxyuser.root.groups</name>
|
||||
<value>hadoop</value>
|
||||
<description>
|
||||
hdfs dfsadmin –refreshSuperUserGroupsConfiguration,
|
||||
yarn rmadmin –refreshSuperUserGroupsConfiguration
|
||||
使用这两个命令不用重启就能刷新
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.proxyuser.root.hosts</name>
|
||||
<value>localhost</value>
|
||||
<description>本地代理</description>
|
||||
</property>
|
||||
|
||||
<!-- zkfc的配置 -->
|
||||
<property>
|
||||
<name>ha.zookeeper.quorum</name>
|
||||
<value>nn1:2181,nn2:2181,nn3:2181</value>
|
||||
<description>HA使用的zookeeper地址</description>
|
||||
</property>
|
||||
</configuration>
|
||||
140
MapReduceTest/src/main/resources/hdfs-site.xml
Normal file
140
MapReduceTest/src/main/resources/hdfs-site.xml
Normal file
@@ -0,0 +1,140 @@
|
||||
<configuration>
|
||||
<property>
|
||||
<name>dfs.namenode.name.dir</name>
|
||||
<value>/data/namenode</value>
|
||||
<description>namenode本地文件存放地址</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.nameservices</name>
|
||||
<value>ns1</value>
|
||||
<description>提供服务的NS逻辑名称,与core-site.xml里的对应</description>
|
||||
</property>
|
||||
|
||||
<!-- namenode的配置 -->
|
||||
<!-- 主要的 -->
|
||||
<property>
|
||||
<name>dfs.ha.namenodes.ns1</name>
|
||||
<value>nn1,nn2,nn3</value>
|
||||
<description>列出该逻辑名称下的NameNode逻辑名称</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.rpc-address.ns1.nn1</name>
|
||||
<value>nn1:9000</value>
|
||||
<description>指定NameNode的RPC位置</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.http-address.ns1.nn1</name>
|
||||
<value>nn1:50070</value>
|
||||
<description>指定NameNode的Web Server位置</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.rpc-address.ns1.nn2</name>
|
||||
<value>nn2:9000</value>
|
||||
<description>指定NameNode的RPC位置</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.http-address.ns1.nn2</name>
|
||||
<value>nn2:50070</value>
|
||||
<description>指定NameNode的Web Server位置</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.rpc-address.ns1.nn3</name>
|
||||
<value>nn3:9000</value>
|
||||
<description>指定NameNode的RPC位置</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.http-address.ns1.nn3</name>
|
||||
<value>nn3:50070</value>
|
||||
<description>指定NameNode的Web Server位置</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.handler.count</name>
|
||||
<value>77</value>
|
||||
<description>namenode的工作线程数</description>
|
||||
</property>
|
||||
|
||||
<!-- journaldata配置,使得其他两个namenode同步第一个namenode数据 -->
|
||||
<property>
|
||||
<name>dfs.namenode.shared.edits.dir</name>
|
||||
<value>qjournal://nn1:8485;nn2:8485;nn3:8485/ns1</value>
|
||||
<description>指定用于HA存放edits的共享存储,通常是namenode的所在机器</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.journalnode.edits.dir</name>
|
||||
<value>/data/journaldata/</value>
|
||||
<description>journaldata服务存放文件的地址</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>ipc.client.connect.max.retries</name>
|
||||
<value>10</value>
|
||||
<description>namenode和journalnode的链接重试次数10次</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>ipc.client.connect.retry.interval</name>
|
||||
<value>10000</value>
|
||||
<description>重试的间隔时间10s</description>
|
||||
</property>
|
||||
|
||||
<!-- zkfc的配置 -->
|
||||
<property>
|
||||
<name>dfs.ha.fencing.methods</name>
|
||||
<value>sshfence</value>
|
||||
<description>指定HA做隔离的方法,缺省是ssh,可设为shell,稍后详述</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.ha.fencing.ssh.private-key-files</name>
|
||||
<value>/home/hadoop/.ssh/id_rsa</value>
|
||||
<description>杀死命令脚本的免密配置秘钥</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.client.failover.proxy.provider.ns1</name>
|
||||
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
|
||||
<description>指定客户端用于HA切换的代理类,不同的NS可以用不同的代理类以上示例为Hadoop 2.0自带的缺省代理类</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.client.failover.proxy.provider.auto-ha</name>
|
||||
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.ha.automatic-failover.enabled</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<!-- datanode配置 -->
|
||||
<property>
|
||||
<name>dfs.datanode.data.dir</name>
|
||||
<value>/data/datanode</value>
|
||||
<description>datanode本地文件存放地址</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.replication</name>
|
||||
<value>3</value>
|
||||
<description>文件复本数</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.namenode.datanode.registration.ip-hostname-check</name>
|
||||
<value>false</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.client.use.datanode.hostname</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.datanode.use.datanode.hostname</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
</configuration>
|
||||
4
Spark/data/a.txt
Normal file
4
Spark/data/a.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
1 zhangsan 20 male
|
||||
2 lisi 30 female
|
||||
3 wangwu 35 male
|
||||
4 zhaosi 40 female
|
||||
3883
Spark/data/movies.txt
Normal file
3883
Spark/data/movies.txt
Normal file
File diff suppressed because it is too large
Load Diff
1000209
Spark/data/ratings.txt
Normal file
1000209
Spark/data/ratings.txt
Normal file
File diff suppressed because it is too large
Load Diff
13
Spark/data/word.txt
Normal file
13
Spark/data/word.txt
Normal file
@@ -0,0 +1,13 @@
|
||||
shenjianZ poop yuqing yuqin
|
||||
yuqing yuqin shenjianZ poop
|
||||
shenjianZ poop yuqing yuqin
|
||||
yuqing yuqin shenjianZ poop
|
||||
shenjianZ poop yuqing yuqin
|
||||
yuqing yuqin shenjianZ poop
|
||||
shenjianZ poop yuqing yuqin
|
||||
yuqing yuqin shenjianZ poop
|
||||
shenjianZ poop yuqing yuqin
|
||||
yuqing yuqin shenjianZ poop
|
||||
shenjianZ poop yuqing yuqin
|
||||
yuqing yuqin shenjianZ poop
|
||||
|
||||
41
Spark/dependency-reduced-pom.xml
Normal file
41
Spark/dependency-reduced-pom.xml
Normal file
@@ -0,0 +1,41 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<parent>
|
||||
<artifactId>HaiNiuProjects</artifactId>
|
||||
<groupId>com.aisi</groupId>
|
||||
<version>1.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<artifactId>Spark</artifactId>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<artifactId>maven-shade-plugin</artifactId>
|
||||
<version>3.2.4</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>shade</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
<configuration>
|
||||
<filters>
|
||||
<filter>
|
||||
<artifact>*:commons-beanutils-core</artifact>
|
||||
<excludes>
|
||||
<exclude>META-INF/services/**</exclude>
|
||||
</excludes>
|
||||
</filter>
|
||||
</filters>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<maven.compiler.source>8</maven.compiler.source>
|
||||
<maven.compiler.target>8</maven.compiler.target>
|
||||
</properties>
|
||||
</project>
|
||||
98
Spark/pom.xml
Normal file
98
Spark/pom.xml
Normal file
@@ -0,0 +1,98 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>com.aisi</groupId>
|
||||
<artifactId>HaiNiuProjects</artifactId>
|
||||
<version>1.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<artifactId>Spark</artifactId>
|
||||
|
||||
<properties>
|
||||
<maven.compiler.source>8</maven.compiler.source>
|
||||
<maven.compiler.target>8</maven.compiler.target>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.12</artifactId>
|
||||
<version>3.1.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-client</artifactId>
|
||||
<version>2.7.3</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
<version>1.7.30</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
<version>1.2.17</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-sql_2.12</artifactId> <!-- 根据你的 Scala 版本选择合适的版本 -->
|
||||
<version>3.1.2</version> <!-- 根据你的 Spark 版本选择合适的版本 -->
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hive</groupId>
|
||||
<artifactId>hive-jdbc</artifactId>
|
||||
<version>3.1.2</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<build>
|
||||
<plugins>
|
||||
<!-- Scala Maven Plugin -->
|
||||
<!-- <plugin>-->
|
||||
<!-- <groupId>net.alchim31.maven</groupId>-->
|
||||
<!-- <artifactId>scala-maven-plugin</artifactId>-->
|
||||
<!-- <version>4.5.3</version>-->
|
||||
<!-- <executions>-->
|
||||
<!-- <execution>-->
|
||||
<!-- <goals>-->
|
||||
<!-- <goal>compile</goal>-->
|
||||
<!-- <goal>testCompile</goal>-->
|
||||
<!-- </goals>-->
|
||||
<!-- </execution>-->
|
||||
<!-- </executions>-->
|
||||
<!-- </plugin>-->
|
||||
|
||||
<!-- Maven Shade Plugin -->
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-shade-plugin</artifactId>
|
||||
<version>3.2.4</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>shade</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
<configuration>
|
||||
<filters>
|
||||
<filter>
|
||||
<artifact>*:commons-beanutils-core</artifact>
|
||||
<excludes>
|
||||
<exclude>META-INF/services/**</exclude>
|
||||
</excludes>
|
||||
</filter>
|
||||
</filters>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
</project>
|
||||
17
Spark/src/main/java/com/aisi/Main.java
Normal file
17
Spark/src/main/java/com/aisi/Main.java
Normal file
@@ -0,0 +1,17 @@
|
||||
package com.aisi;
|
||||
|
||||
//TIP 要<b>运行</b>代码,请按 <shortcut actionId="Run"/> 或
|
||||
// 点击装订区域中的 <icon src="AllIcons.Actions.Execute"/> 图标。
|
||||
public class Main {
|
||||
public static void main(String[] args) {
|
||||
//TIP 当文本光标位于高亮显示的文本处时按 <shortcut actionId="ShowIntentionActions"/>
|
||||
// 查看 IntelliJ IDEA 建议如何修正。
|
||||
System.out.printf("Hello and welcome!");
|
||||
|
||||
for (int i = 1; i <= 5; i++) {
|
||||
//TIP 按 <shortcut actionId="Debug"/> 开始调试代码。我们已经设置了一个 <icon src="AllIcons.Debugger.Db_set_breakpoint"/> 断点
|
||||
// 但您始终可以通过按 <shortcut actionId="ToggleLineBreakpoint"/> 添加更多断点。
|
||||
System.out.println("i = " + i);
|
||||
}
|
||||
}
|
||||
}
|
||||
27
Spark/src/main/java/com/aisi/spark/WordCount.scala
Normal file
27
Spark/src/main/java/com/aisi/spark/WordCount.scala
Normal file
@@ -0,0 +1,27 @@
|
||||
package com.aisi.spark
|
||||
|
||||
import org.apache.spark.{SparkConf, SparkContext}
|
||||
|
||||
object WordCount {
|
||||
def main(args: Array[String]): Unit = {
|
||||
val config = new SparkConf()
|
||||
config.setMaster("local")
|
||||
config.setAppName("SH_wordcount")
|
||||
|
||||
// 禁用权限设置
|
||||
config.set("spark.hadoop.dfs.permissions", "false")
|
||||
|
||||
val sc = new SparkContext(config)
|
||||
|
||||
// 使用 Windows 文件路径格式
|
||||
val rdd = sc.textFile("D:/JetBrainsToolProject/IntelJ IDEA/HaiNiuProjects/Spark/data/word.txt")
|
||||
|
||||
val rdd1 = rdd.flatMap(_.split(" "))
|
||||
val rdd2 = rdd1.map((_, 1))
|
||||
val rdd3 = rdd2.groupBy(_._1)
|
||||
val rdd4 = rdd3.mapValues(_.size)
|
||||
|
||||
// 保存结果到本地路径
|
||||
rdd4.saveAsTextFile("D:/JetBrainsToolProject/IntelJ IDEA/HaiNiuProjects/Spark/data/res")
|
||||
}
|
||||
}
|
||||
20
Spark/src/main/java/com/aisi/spark/WordCountForCluster.scala
Normal file
20
Spark/src/main/java/com/aisi/spark/WordCountForCluster.scala
Normal file
@@ -0,0 +1,20 @@
|
||||
package com.aisi.spark
|
||||
|
||||
import org.apache.spark.SparkContext
|
||||
|
||||
object WordCountForCluster {
|
||||
def main(args: Array[String]): Unit = {
|
||||
val sc = new SparkContext()
|
||||
val Array(input,output) = args
|
||||
// 使用 Windows 文件路径格式
|
||||
val rdd = sc.textFile(input)
|
||||
|
||||
val rdd1 = rdd.flatMap(_.split(" "))
|
||||
val rdd2 = rdd1.map((_, 1))
|
||||
val rdd3 = rdd2.groupBy(_._1)
|
||||
val rdd4 = rdd3.mapValues(_.size)
|
||||
|
||||
// 保存结果到本地路径
|
||||
rdd4.saveAsTextFile(output)
|
||||
}
|
||||
}
|
||||
6
Spark/src/main/resources/log4j.properties
Normal file
6
Spark/src/main/resources/log4j.properties
Normal file
@@ -0,0 +1,6 @@
|
||||
log4j.rootLogger=info,console
|
||||
|
||||
log4j.appender.console=org.apache.log4j.ConsoleAppender
|
||||
log4j.appender.console.target=System.out
|
||||
log4j.appender.console.layout=org.apache.log4j.PatternLayout
|
||||
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c %M(): %m%n
|
||||
19
Spark/src/main/scala/com/aisi/spark/TestBeeline.scala
Normal file
19
Spark/src/main/scala/com/aisi/spark/TestBeeline.scala
Normal file
@@ -0,0 +1,19 @@
|
||||
package com.aisi.spark
|
||||
|
||||
import org.apache.hive.jdbc.HiveDriver
|
||||
|
||||
import java.sql.DriverManager
|
||||
|
||||
object TestBeeline{
|
||||
def main(args: Array[String]): Unit = {
|
||||
classOf[HiveDriver]
|
||||
val connection = DriverManager.getConnection("jdbc:hive2://nn1:20000", "hadoop", null)
|
||||
val statement = connection.prepareStatement("SELECT count(1) as cnt from stu;")
|
||||
val resultSet = statement.executeQuery()
|
||||
while (resultSet.next()){
|
||||
val cnt = resultSet.getLong("cnt")
|
||||
println("stu表的总条数为:" + cnt)
|
||||
}
|
||||
connection.close()
|
||||
}
|
||||
}
|
||||
75
Spark/src/main/scala/com/aisi/spark/TestMovieWithSql.scala
Normal file
75
Spark/src/main/scala/com/aisi/spark/TestMovieWithSql.scala
Normal file
@@ -0,0 +1,75 @@
|
||||
package com.aisi.spark
|
||||
|
||||
import org.apache.spark.rdd.RDD
|
||||
import org.apache.spark.sql.expressions.Window
|
||||
import org.apache.spark.sql.{DataFrame, SQLContext}
|
||||
import org.apache.spark.{SparkConf, SparkContext}
|
||||
|
||||
object TestMovieWithSql {
|
||||
def main(args: Array[String]): Unit = {
|
||||
val conf = new SparkConf()
|
||||
conf.setAppName("movie")
|
||||
conf.setMaster("local[*]")
|
||||
conf.set("spark.shuffle.partitions", "20")
|
||||
val sc = new SparkContext(conf)
|
||||
val sqlSc = new SQLContext(sc)
|
||||
|
||||
import sqlSc.implicits._
|
||||
val df: DataFrame = sc.textFile("Spark/data/movies.txt")
|
||||
.flatMap(t => {
|
||||
val line = t.split(",")
|
||||
val movieId = line(0)
|
||||
val movieTypes = line.reverse.head
|
||||
val movieName = line.tail.reverse.tail.reverse.mkString(" ")
|
||||
// movieTypes.split("\\|").map(movieRecord(movieId, movieName, _))
|
||||
movieTypes.split("\\|").map(movieType => (movieId, movieName, movieType)) // 返回三元组
|
||||
}).toDF("movieId", "movieName", "movieType")
|
||||
// df.limit(10).show()
|
||||
val df1 = sc.textFile("Spark/data/ratings.txt")
|
||||
.map(t => {
|
||||
val line = t.split(",")
|
||||
val userId = line(0)
|
||||
val movieId = line(1)
|
||||
val score = line(2).toDouble
|
||||
(userId, movieId, score)
|
||||
}).toDF("userId", "movieId", "score")
|
||||
df1.limit(10).show()
|
||||
import org.apache.spark.sql.functions._
|
||||
df.join(df1,"movieId").groupBy("userId","movieType")
|
||||
.agg(count("userId").as("cnt"))
|
||||
.withColumn("rn",row_number().over(Window.partitionBy("userId").orderBy($"cnt".desc)))
|
||||
.where("rn = 1") // 取到 userId 分区的 第一个,就是 cnt最大的那条数据
|
||||
.show()
|
||||
//
|
||||
// row_number():这是一个窗口函数,用来为分组中的每一行分配一个唯一的行号。行号按照排序规则依次递增。
|
||||
// Window.partitionBy("userId"):partitionBy 会根据 userId 划分数据,即对于每个 userId 来独立计算行号,这意味着每个用户的数据被视为一个独立的分区。
|
||||
// orderBy($"cnt".desc):在每个 userId 分区内,数据会根据 cnt(即该用户对某种电影类型的观看次数)降序排列。行号根据这种排序结果分配。
|
||||
|
||||
// +------+---------+---+---+
|
||||
// |userId|movieType|cnt| rn|
|
||||
// +------+---------+---+---+
|
||||
// | 1090| Drama| 47| 1|
|
||||
// | 1159| Sci-Fi|102| 1|
|
||||
// | 1436| Drama| 33| 1|
|
||||
// | 1512| Comedy| 19| 1|
|
||||
// | 1572| Thriller| 16| 1|
|
||||
// | 2069| Comedy| 30| 1|
|
||||
// | 2088| Sci-Fi|201| 1|
|
||||
// | 2136| Comedy| 83| 1|
|
||||
// | 2162| Comedy|105| 1|
|
||||
// | 2294| Drama| 35| 1|
|
||||
// | 2904| Drama| 34| 1|
|
||||
// | 296| Horror| 42| 1|
|
||||
// | 3210| Drama| 97| 1|
|
||||
// | 3414| Action|181| 1|
|
||||
// | 3606| Drama| 33| 1|
|
||||
// | 3959| Comedy| 11| 1|
|
||||
// | 4032| Action|113| 1|
|
||||
// | 467| Romance| 34| 1|
|
||||
// | 4821| Drama|100| 1|
|
||||
// | 4937| Drama|136| 1|
|
||||
// +------+---------+---+---+
|
||||
}
|
||||
|
||||
}
|
||||
//case class movieRecord(var movieId:String,var movieName:String, var movieTypes:String)
|
||||
45
Spark/src/main/scala/com/aisi/spark/TestSparkSql.scala
Normal file
45
Spark/src/main/scala/com/aisi/spark/TestSparkSql.scala
Normal file
@@ -0,0 +1,45 @@
|
||||
package com.aisi.spark
|
||||
|
||||
import org.apache.spark.rdd.RDD
|
||||
import org.apache.spark.sql.{DataFrame, SQLContext}
|
||||
import org.apache.spark.{SparkConf, SparkContext}
|
||||
import org.apache.spark.sql.functions._ // agg、count函数导入
|
||||
import org.apache.spark.sql.expressions.Window // 导入 Window
|
||||
object TestSparkSql {
|
||||
def main(args: Array[String]): Unit = {
|
||||
val conf = new SparkConf()
|
||||
conf.setMaster("local[*]")
|
||||
conf.setAppName("test sql")
|
||||
val sc = new SparkContext(conf)
|
||||
val sqlSc = new SQLContext(sc)
|
||||
//引入环境信息
|
||||
import sqlSc.implicits._
|
||||
val rdd: RDD[(Int, String, Int, String)] = sc.textFile("Spark/data/a.txt")
|
||||
.map(t => {
|
||||
val line = t.split(" ")
|
||||
val id = line(0).toInt
|
||||
val name = line(1)
|
||||
val age = line(2).toInt
|
||||
val gender = line(3)
|
||||
(id, name, age, gender)
|
||||
})
|
||||
val df: DataFrame = rdd.toDF("id", "name", "age", "gender")
|
||||
df.show() //展示表数据
|
||||
df.printSchema() //展示表格字段信息
|
||||
// df.where("age > 20").groupBy("gender").sum("age").show()
|
||||
// df.orderBy($"age".desc).show()
|
||||
// 聚合并排序
|
||||
// val result = df.groupBy("gender")
|
||||
// .agg(
|
||||
// count("id").as("count_id"), // 计数 id
|
||||
// sum("age").as("sum_age") // 求和 age
|
||||
// )
|
||||
// .orderBy($"sum_age".desc) // 按 sum_age 降序排序
|
||||
// result.show()
|
||||
|
||||
|
||||
df.withColumn("rn",row_number().over(Window.partitionBy("gender").orderBy($"age".desc)))
|
||||
.where("rn = 1")
|
||||
.show()
|
||||
}
|
||||
}
|
||||
32
pom.xml
Normal file
32
pom.xml
Normal file
@@ -0,0 +1,32 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>com.aisi</groupId>
|
||||
<artifactId>HaiNiuProjects</artifactId>
|
||||
<version>1.0-SNAPSHOT</version>
|
||||
<packaging>pom</packaging>
|
||||
<modules>
|
||||
<module>MapReduceTest</module>
|
||||
<module>Spark</module>
|
||||
</modules>
|
||||
|
||||
<properties>
|
||||
<maven.compiler.source>8</maven.compiler.source>
|
||||
<maven.compiler.target>8</maven.compiler.target>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<hadoop.version>3.1.4</hadoop.version>
|
||||
</properties>
|
||||
<dependencyManagement>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-client</artifactId>
|
||||
<version>${hadoop.version}</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</dependencyManagement>
|
||||
|
||||
</project>
|
||||
Reference in New Issue
Block a user