update kafka consumer-01
This commit is contained in:
10
MapReduceTest/data/file.txt
Normal file
10
MapReduceTest/data/file.txt
Normal file
@@ -0,0 +1,10 @@
|
||||
2024-10-01 www.example.com
|
||||
2024-10-01 www.example.com
|
||||
2024-10-01 www.test.com
|
||||
2024-10-02 www.example.com
|
||||
2024-10-02 www.test.com
|
||||
2024-10-02 www.sample.com
|
||||
2024-10-03 www.example.com
|
||||
2024-10-03 www.test.com
|
||||
2024-10-03 www.sample.com
|
||||
2024-10-03 www.example.com
|
||||
@@ -33,6 +33,14 @@
|
||||
<artifactId>log4j</artifactId>
|
||||
<version>1.2.17</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.junit.jupiter</groupId>
|
||||
<artifactId>junit-jupiter</artifactId>
|
||||
<version>5.9.3</version> <!-- 使用最新的稳定版本 -->
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
|
||||
|
||||
</dependencies>
|
||||
<build>
|
||||
@@ -60,28 +68,28 @@
|
||||
</filter>
|
||||
</filters>
|
||||
<shadedArtifactAttached>true</shadedArtifactAttached>
|
||||
<shadedArtifactPrimary>true</shadedArtifactPrimary>
|
||||
<!-- <shadedArtifactPrimary>true</shadedArtifactPrimary>-->
|
||||
<outputFile>${project.build.directory}/${project.build.finalName}-shaded.jar</outputFile>
|
||||
<relocators>
|
||||
<relocator>
|
||||
<pattern>org.apache.commons</pattern>
|
||||
<shadedPattern>shade.org.apache.commons</shadedPattern>
|
||||
</relocator>
|
||||
</relocators>
|
||||
<!-- <relocators>-->
|
||||
<!-- <relocator>-->
|
||||
<!-- <pattern>org.apache.commons</pattern>-->
|
||||
<!-- <shadedPattern>shade.org.apache.commons</shadedPattern>-->
|
||||
<!-- </relocator>-->
|
||||
<!-- </relocators>-->
|
||||
<filters>
|
||||
<filter>
|
||||
<artifact>*:*</artifact>
|
||||
<excludes>
|
||||
<exclude>META-INF/*.SF</exclude>
|
||||
<exclude>META-INF/*.DSA</exclude>
|
||||
<exclude>META-INF/*.RSA</exclude>
|
||||
</excludes>
|
||||
</filter>
|
||||
<!-- <filter>-->
|
||||
<!-- <artifact>*:*</artifact>-->
|
||||
<!-- <excludes>-->
|
||||
<!-- <exclude>META-INF/*.SF</exclude>-->
|
||||
<!-- <exclude>META-INF/*.DSA</exclude>-->
|
||||
<!-- <exclude>META-INF/*.RSA</exclude>-->
|
||||
<!-- </excludes>-->
|
||||
<!-- </filter>-->
|
||||
</filters>
|
||||
<transformers>
|
||||
<transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
|
||||
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
|
||||
<mainClass>com.aisi.wordcount.WordCountDriver</mainClass>
|
||||
<mainClass>com.aisi.accesscount.VisitCountDriver</mainClass>
|
||||
</transformer>
|
||||
</transformers>
|
||||
</configuration>
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
package com.aisi.accesscount;
|
||||
|
||||
import org.apache.hadoop.io.IntWritable;
|
||||
import org.apache.hadoop.io.LongWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.Mapper;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class SortMapper extends Mapper<LongWritable, Text, IntWritable, Text> {
|
||||
@Override
|
||||
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
|
||||
// 数据格式:日期 总访问次数
|
||||
String[] fields = value.toString().split("\t");
|
||||
if (fields.length == 2) {
|
||||
String date = fields[0];
|
||||
int count = Integer.parseInt(fields[1]);
|
||||
// 以访问次数作为 key,日期作为 value
|
||||
context.write(new IntWritable(count), new Text(date));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
package com.aisi.accesscount;
|
||||
|
||||
import org.apache.hadoop.io.IntWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.Reducer;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class SortReducer extends Reducer<IntWritable, Text, Text, IntWritable> {
|
||||
@Override
|
||||
protected void reduce(IntWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
|
||||
for (Text date : values) {
|
||||
context.write(date, key);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,51 @@
|
||||
package com.aisi.accesscount;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.IntWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.Job;
|
||||
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
|
||||
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
||||
|
||||
public class VisitCountDriver {
|
||||
public static void main(String[] args) throws Exception {
|
||||
Configuration conf = new Configuration();
|
||||
|
||||
// 第一个 Job:统计每日访问次数
|
||||
Job countJob = Job.getInstance(conf, "Visit Count");
|
||||
countJob.setJarByClass(VisitCountDriver.class);
|
||||
countJob.setMapperClass(VisitCountMapper.class);
|
||||
countJob.setReducerClass(VisitCountReducer.class);
|
||||
|
||||
countJob.setMapOutputKeyClass(Text.class);
|
||||
countJob.setMapOutputValueClass(IntWritable.class);
|
||||
countJob.setOutputKeyClass(Text.class);
|
||||
countJob.setOutputValueClass(IntWritable.class);
|
||||
|
||||
FileInputFormat.addInputPath(countJob, new Path(args[0]));
|
||||
Path tempOutput = new Path("temp_output");
|
||||
FileOutputFormat.setOutputPath(countJob, tempOutput);
|
||||
|
||||
boolean countJobSuccess = countJob.waitForCompletion(true);
|
||||
if (!countJobSuccess) {
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
// 第二个 Job:将访问次数进行升序排序
|
||||
Job sortJob = Job.getInstance(conf, "Sort Visits");
|
||||
sortJob.setJarByClass(VisitCountDriver.class);
|
||||
sortJob.setMapperClass(SortMapper.class);
|
||||
sortJob.setReducerClass(SortReducer.class);
|
||||
|
||||
sortJob.setMapOutputKeyClass(IntWritable.class);
|
||||
sortJob.setMapOutputValueClass(Text.class);
|
||||
sortJob.setOutputKeyClass(Text.class);
|
||||
sortJob.setOutputValueClass(IntWritable.class);
|
||||
|
||||
FileInputFormat.addInputPath(sortJob, tempOutput);
|
||||
FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
|
||||
|
||||
System.exit(sortJob.waitForCompletion(true) ? 0 : 1);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
package com.aisi.accesscount;
|
||||
|
||||
import org.apache.hadoop.io.IntWritable;
|
||||
import org.apache.hadoop.io.LongWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.Mapper;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class VisitCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
|
||||
@Override
|
||||
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
|
||||
// 解析输入的每一行数据,假设格式为 "日期 URL"
|
||||
String[] fields = value.toString().split(" ");
|
||||
if (fields.length == 2) {
|
||||
String date = fields[0];
|
||||
context.write(new Text(date), new IntWritable(1));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
package com.aisi.accesscount;
|
||||
|
||||
import org.apache.hadoop.io.IntWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.Reducer;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class VisitCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
|
||||
@Override
|
||||
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
|
||||
int sum = 0;
|
||||
for (IntWritable value : values) {
|
||||
sum += value.get();
|
||||
}
|
||||
context.write(key, new IntWritable(sum));
|
||||
}
|
||||
}
|
||||
7
MapReduceTest/src/main/java/com/aisi/api/Test.java
Normal file
7
MapReduceTest/src/main/java/com/aisi/api/Test.java
Normal file
@@ -0,0 +1,7 @@
|
||||
package com.aisi.api;
|
||||
|
||||
|
||||
|
||||
public class Test {
|
||||
|
||||
}
|
||||
107
MapReduceTest/src/test/java/Example.java
Normal file
107
MapReduceTest/src/test/java/Example.java
Normal file
@@ -0,0 +1,107 @@
|
||||
import org.apache.commons.compress.utils.IOUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.*;
|
||||
import org.apache.hadoop.io.SequenceFile;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.junit.jupiter.api.AfterAll;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
public class Example {
|
||||
static FileSystem fs = null;
|
||||
@BeforeAll
|
||||
public static void setup() throws IOException {
|
||||
Configuration conf = new Configuration();
|
||||
fs = FileSystem.get(conf);
|
||||
}
|
||||
@AfterAll
|
||||
public static void teardown() throws IOException {
|
||||
if (fs != null) {
|
||||
fs.close();
|
||||
}
|
||||
}
|
||||
@Test
|
||||
public void list() throws IOException {
|
||||
FileStatus[] fileStatuses = fs.listStatus(new Path("/"));
|
||||
for (FileStatus fileStatus : fileStatuses) {
|
||||
System.out.println(fileStatus.getPath());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void mkdir() throws IOException {
|
||||
boolean mkdirsed = fs.mkdirs(new Path("/test"));
|
||||
if (mkdirsed) {
|
||||
System.out.println("mkdirsed");
|
||||
}else
|
||||
System.out.println("mkdir failed");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void delete() throws IOException {
|
||||
boolean deleted = fs.delete(new Path("/test"), true);
|
||||
if (deleted) {
|
||||
System.out.println("delete");
|
||||
}else
|
||||
System.out.println("delete failed");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void upload() throws IOException {
|
||||
fs.copyFromLocalFile(new Path("d:\\tmp\\process.xml"), new Path("/test/process.xml"));
|
||||
System.out.println("upload success");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void download() throws IOException {
|
||||
fs.copyToLocalFile(new Path("/test/process.xml"), new Path("d:\\tmp\\process_download.xml"));
|
||||
System.out.println("download success");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void read() throws IOException {
|
||||
FSDataInputStream fsDataInputStream = fs.open(new Path("/test/process.xml"));
|
||||
new BufferedReader(new InputStreamReader(fsDataInputStream)).lines().forEach(System.out::println);
|
||||
fsDataInputStream.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void write() throws IOException {
|
||||
FSDataOutputStream fsDataOutputStream = fs.create(new Path("/test/process_replication.xml"));
|
||||
FSDataInputStream fsDataInputStream = fs.open(new Path("/test/process.xml"));
|
||||
BufferedReader reader = new BufferedReader(new InputStreamReader(fsDataInputStream, "utf-8"));
|
||||
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, "utf-8"));
|
||||
String line = "";
|
||||
while ((line = reader.readLine()) != null) {
|
||||
writer.write(line);
|
||||
writer.newLine();
|
||||
}
|
||||
writer.close();
|
||||
reader.close();
|
||||
fsDataOutputStream.close();
|
||||
fsDataInputStream.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void read1() throws IOException {
|
||||
Configuration conf=new Configuration();
|
||||
//获取SequenceFile.Reader对象
|
||||
SequenceFile.Reader reader=new SequenceFile.Reader(fs,new Path("/example/part-m-00000"),conf);
|
||||
//获取序列化中使用的键和值类型
|
||||
Text key=new Text();
|
||||
Text value=new Text();
|
||||
//将读取的数据写入janfeb.txt文件
|
||||
BufferedWriter out=new BufferedWriter(new OutputStreamWriter(new FileOutputStream("D:\\tmp\\5-12.txt")));
|
||||
while(reader.next(key,value)){
|
||||
out.write(key.toString()+"\t"+value.toString()+"\r\n");
|
||||
}
|
||||
out.close();
|
||||
reader.close();
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
56
MapReduceTest/src/test/java/SelectData.java
Normal file
56
MapReduceTest/src/test/java/SelectData.java
Normal file
@@ -0,0 +1,56 @@
|
||||
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
|
||||
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
||||
import org.apache.hadoop.mapreduce.Job;
|
||||
import org.apache.hadoop.mapreduce.Mapper;
|
||||
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
|
||||
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
|
||||
import org.apache.hadoop.util.GenericOptionsParser;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
|
||||
//代码5-3
|
||||
import java.io.IOException;
|
||||
|
||||
public class SelectData {
|
||||
public static class MyMap extends Mapper<Object, Text, Text, Text> {
|
||||
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
|
||||
String line = value.toString();
|
||||
String arr[] = line.split(",");
|
||||
if (arr[4].contains("2021/1") || arr[4].contains("2021/2")) {
|
||||
context.write(new Text(arr[2]),
|
||||
new Text(arr[4].substring(0, 9)));
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
Configuration conf = new Configuration();
|
||||
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
|
||||
if(otherArgs.length<2){
|
||||
System.err.println("必须输入读取文件路径和输出路径");
|
||||
System.exit(2);
|
||||
}
|
||||
Job job =Job.getInstance(conf,"Select Data");
|
||||
job.setJarByClass(SelectData.class);
|
||||
job.setMapperClass(MyMap.class);
|
||||
job.setOutputKeyClass(Text.class);
|
||||
job.setOutputValueClass(Text.class);
|
||||
|
||||
//设置输入格式
|
||||
job.setInputFormatClass(TextInputFormat.class);
|
||||
//设置输出格式
|
||||
job.setOutputFormatClass(SequenceFileOutputFormat.class);
|
||||
//设置reduce的任务数是0
|
||||
job.setNumReduceTasks(0);
|
||||
for(int i=0;i<otherArgs.length-1;++i){
|
||||
FileInputFormat.addInputPath(job,new Path(otherArgs[i]));
|
||||
}
|
||||
FileOutputFormat.setOutputPath(job,new Path(otherArgs[otherArgs.length-1]));
|
||||
System.exit(job.waitForCompletion(true)?0:1);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user