update kafka consumer-01

This commit is contained in:
2024-10-27 10:55:40 +08:00
parent 4726888819
commit eece6503c9
45 changed files with 1444 additions and 28 deletions

View File

@@ -0,0 +1,10 @@
2024-10-01 www.example.com
2024-10-01 www.example.com
2024-10-01 www.test.com
2024-10-02 www.example.com
2024-10-02 www.test.com
2024-10-02 www.sample.com
2024-10-03 www.example.com
2024-10-03 www.test.com
2024-10-03 www.sample.com
2024-10-03 www.example.com

View File

@@ -33,6 +33,14 @@
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter</artifactId>
<version>5.9.3</version> <!-- 使用最新的稳定版本 -->
<scope>test</scope>
</dependency>
</dependencies>
<build>
@@ -60,28 +68,28 @@
</filter>
</filters>
<shadedArtifactAttached>true</shadedArtifactAttached>
<shadedArtifactPrimary>true</shadedArtifactPrimary>
<!-- <shadedArtifactPrimary>true</shadedArtifactPrimary>-->
<outputFile>${project.build.directory}/${project.build.finalName}-shaded.jar</outputFile>
<relocators>
<relocator>
<pattern>org.apache.commons</pattern>
<shadedPattern>shade.org.apache.commons</shadedPattern>
</relocator>
</relocators>
<!-- <relocators>-->
<!-- <relocator>-->
<!-- <pattern>org.apache.commons</pattern>-->
<!-- <shadedPattern>shade.org.apache.commons</shadedPattern>-->
<!-- </relocator>-->
<!-- </relocators>-->
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
<!-- <filter>-->
<!-- <artifact>*:*</artifact>-->
<!-- <excludes>-->
<!-- <exclude>META-INF/*.SF</exclude>-->
<!-- <exclude>META-INF/*.DSA</exclude>-->
<!-- <exclude>META-INF/*.RSA</exclude>-->
<!-- </excludes>-->
<!-- </filter>-->
</filters>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>com.aisi.wordcount.WordCountDriver</mainClass>
<mainClass>com.aisi.accesscount.VisitCountDriver</mainClass>
</transformer>
</transformers>
</configuration>

View File

@@ -0,0 +1,22 @@
package com.aisi.accesscount;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class SortMapper extends Mapper<LongWritable, Text, IntWritable, Text> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// 数据格式:日期 总访问次数
String[] fields = value.toString().split("\t");
if (fields.length == 2) {
String date = fields[0];
int count = Integer.parseInt(fields[1]);
// 以访问次数作为 key日期作为 value
context.write(new IntWritable(count), new Text(date));
}
}
}

View File

@@ -0,0 +1,16 @@
package com.aisi.accesscount;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class SortReducer extends Reducer<IntWritable, Text, Text, IntWritable> {
@Override
protected void reduce(IntWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
for (Text date : values) {
context.write(date, key);
}
}
}

View File

@@ -0,0 +1,51 @@
package com.aisi.accesscount;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class VisitCountDriver {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
// 第一个 Job统计每日访问次数
Job countJob = Job.getInstance(conf, "Visit Count");
countJob.setJarByClass(VisitCountDriver.class);
countJob.setMapperClass(VisitCountMapper.class);
countJob.setReducerClass(VisitCountReducer.class);
countJob.setMapOutputKeyClass(Text.class);
countJob.setMapOutputValueClass(IntWritable.class);
countJob.setOutputKeyClass(Text.class);
countJob.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(countJob, new Path(args[0]));
Path tempOutput = new Path("temp_output");
FileOutputFormat.setOutputPath(countJob, tempOutput);
boolean countJobSuccess = countJob.waitForCompletion(true);
if (!countJobSuccess) {
System.exit(1);
}
// 第二个 Job将访问次数进行升序排序
Job sortJob = Job.getInstance(conf, "Sort Visits");
sortJob.setJarByClass(VisitCountDriver.class);
sortJob.setMapperClass(SortMapper.class);
sortJob.setReducerClass(SortReducer.class);
sortJob.setMapOutputKeyClass(IntWritable.class);
sortJob.setMapOutputValueClass(Text.class);
sortJob.setOutputKeyClass(Text.class);
sortJob.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(sortJob, tempOutput);
FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
System.exit(sortJob.waitForCompletion(true) ? 0 : 1);
}
}

View File

@@ -0,0 +1,20 @@
package com.aisi.accesscount;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class VisitCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// 解析输入的每一行数据,假设格式为 "日期 URL"
String[] fields = value.toString().split(" ");
if (fields.length == 2) {
String date = fields[0];
context.write(new Text(date), new IntWritable(1));
}
}
}

View File

@@ -0,0 +1,18 @@
package com.aisi.accesscount;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class VisitCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable value : values) {
sum += value.get();
}
context.write(key, new IntWritable(sum));
}
}

View File

@@ -0,0 +1,7 @@
package com.aisi.api;
public class Test {
}

View File

@@ -0,0 +1,107 @@
import org.apache.commons.compress.utils.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.*;
public class Example {
static FileSystem fs = null;
@BeforeAll
public static void setup() throws IOException {
Configuration conf = new Configuration();
fs = FileSystem.get(conf);
}
@AfterAll
public static void teardown() throws IOException {
if (fs != null) {
fs.close();
}
}
@Test
public void list() throws IOException {
FileStatus[] fileStatuses = fs.listStatus(new Path("/"));
for (FileStatus fileStatus : fileStatuses) {
System.out.println(fileStatus.getPath());
}
}
@Test
public void mkdir() throws IOException {
boolean mkdirsed = fs.mkdirs(new Path("/test"));
if (mkdirsed) {
System.out.println("mkdirsed");
}else
System.out.println("mkdir failed");
}
@Test
public void delete() throws IOException {
boolean deleted = fs.delete(new Path("/test"), true);
if (deleted) {
System.out.println("delete");
}else
System.out.println("delete failed");
}
@Test
public void upload() throws IOException {
fs.copyFromLocalFile(new Path("d:\\tmp\\process.xml"), new Path("/test/process.xml"));
System.out.println("upload success");
}
@Test
public void download() throws IOException {
fs.copyToLocalFile(new Path("/test/process.xml"), new Path("d:\\tmp\\process_download.xml"));
System.out.println("download success");
}
@Test
public void read() throws IOException {
FSDataInputStream fsDataInputStream = fs.open(new Path("/test/process.xml"));
new BufferedReader(new InputStreamReader(fsDataInputStream)).lines().forEach(System.out::println);
fsDataInputStream.close();
}
@Test
public void write() throws IOException {
FSDataOutputStream fsDataOutputStream = fs.create(new Path("/test/process_replication.xml"));
FSDataInputStream fsDataInputStream = fs.open(new Path("/test/process.xml"));
BufferedReader reader = new BufferedReader(new InputStreamReader(fsDataInputStream, "utf-8"));
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, "utf-8"));
String line = "";
while ((line = reader.readLine()) != null) {
writer.write(line);
writer.newLine();
}
writer.close();
reader.close();
fsDataOutputStream.close();
fsDataInputStream.close();
}
@Test
public void read1() throws IOException {
Configuration conf=new Configuration();
//获取SequenceFile.Reader对象
SequenceFile.Reader reader=new SequenceFile.Reader(fs,new Path("/example/part-m-00000"),conf);
//获取序列化中使用的键和值类型
Text key=new Text();
Text value=new Text();
//将读取的数据写入janfeb.txt文件
BufferedWriter out=new BufferedWriter(new OutputStreamWriter(new FileOutputStream("D:\\tmp\\5-12.txt")));
while(reader.next(key,value)){
out.write(key.toString()+"\t"+value.toString()+"\r\n");
}
out.close();
reader.close();
}
}

View File

@@ -0,0 +1,56 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.conf.Configuration;
//代码5-3
import java.io.IOException;
public class SelectData {
public static class MyMap extends Mapper<Object, Text, Text, Text> {
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
String arr[] = line.split(",");
if (arr[4].contains("2021/1") || arr[4].contains("2021/2")) {
context.write(new Text(arr[2]),
new Text(arr[4].substring(0, 9)));
}
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if(otherArgs.length<2){
System.err.println("必须输入读取文件路径和输出路径");
System.exit(2);
}
Job job =Job.getInstance(conf,"Select Data");
job.setJarByClass(SelectData.class);
job.setMapperClass(MyMap.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
//设置输入格式
job.setInputFormatClass(TextInputFormat.class);
//设置输出格式
job.setOutputFormatClass(SequenceFileOutputFormat.class);
//设置reduce的任务数是0
job.setNumReduceTasks(0);
for(int i=0;i<otherArgs.length-1;++i){
FileInputFormat.addInputPath(job,new Path(otherArgs[i]));
}
FileOutputFormat.setOutputPath(job,new Path(otherArgs[otherArgs.length-1]));
System.exit(job.waitForCompletion(true)?0:1);
}
}