update kafka consumer-01

This commit is contained in:
shenjianZ 2024-10-27 10:55:40 +08:00
parent 4726888819
commit eece6503c9
45 changed files with 1444 additions and 28 deletions

View File

@ -1,6 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Encoding">
<file url="file://$PROJECT_DIR$/Kafka/src/main/java" charset="UTF-8" />
<file url="file://$PROJECT_DIR$/Kafka/src/main/resources" charset="UTF-8" />
<file url="file://$PROJECT_DIR$/MapReduceTest/src/main/java" charset="UTF-8" />
<file url="file://$PROJECT_DIR$/MapReduceTest/src/main/resources" charset="UTF-8" />
<file url="file://$PROJECT_DIR$/Spark/src/main/java" charset="UTF-8" />

64
Kafka/pom.xml Normal file
View File

@ -0,0 +1,64 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.aisi</groupId>
<artifactId>HaiNiuProjects</artifactId>
<version>1.0-SNAPSHOT</version>
</parent>
<artifactId>Kafka</artifactId>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>3.3.2</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.30</version>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.2.4</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<createDependencyReducedPom>false</createDependencyReducedPom>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>com.aisi.producer.ProducerWithObjectSerializer</mainClass> <!-- 指定你的主类 -->
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,32 @@
package com.aisi.consumer;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.protocol.types.Field;
import java.time.Duration;
import java.util.Arrays;
import java.util.List;
import java.util.Properties;
public class Consumer1 {
public static void main(String[] args) {
Properties properties = new Properties();
properties.put("bootstrap.servers", "localhost:9092");
properties.put("group.id", "aisi-group");
properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
KafkaConsumer<String, String> consumer = new KafkaConsumer(properties);
List<String> topics = Arrays.asList("topic_a", "topic_b"); // 将每个主题作为独立的字符串
consumer.subscribe(topics);
consumer.subscribe(topics);
while (true) {
ConsumerRecords<String, String> records = consumer.poll(Duration.ofSeconds(1));
for (ConsumerRecord<String, String> record : records) {
System.out.println(record.topic() + "->" + record.partition() + "->" + record.offset() + "->" + record.key() + "->" + record.value());
}
}
}
}

View File

@ -0,0 +1,49 @@
package com.aisi.consumer;
import org.apache.kafka.clients.consumer.*;
import org.apache.kafka.common.TopicPartition;
import java.time.Duration;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Properties;
public class ConsumerWithCooperativeStickyAssignor {
public static void main(String[] args) {
Properties properties = new Properties();
properties.put("bootstrap.servers", "localhost:9092");
properties.put("group.id", "aisi-group");
properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
//设定分区分配策略为 cooperative-sticky
properties.put("partition.assignment.strategy", CooperativeStickyAssignor.class.getName());
//设定consumer断开超时时间最小不能小于6s
properties.put("session.timeout.ms", 6000);
KafkaConsumer<String, String> consumer = new KafkaConsumer(properties);
List<String> topics = Arrays.asList("topic_d","topic_e");
consumer.subscribe(topics);
consumer.subscribe(topics, new ConsumerRebalanceListener() {
@Override
public void onPartitionsRevoked(Collection<TopicPartition> partitions) {
for (TopicPartition topicPartition : partitions) {
System.out.println("分区移除:::::"+topicPartition.topic() + ":" + topicPartition.partition());
}
}
@Override
public void onPartitionsAssigned(Collection<TopicPartition> partitions) {
for (TopicPartition topicPartition : partitions) {
System.out.println("分区添加:::::"+topicPartition.topic() + ":" + topicPartition.partition());
}
}
});
while (true) {
ConsumerRecords<String, String> records = consumer.poll(Duration.ofSeconds(1));
for (ConsumerRecord<String, String> record : records) {
System.out.println(record.topic() + "->" + record.partition() + "->" + record.offset() + "->" + record.key() + "->" + record.value());
}
}
}
}

View File

@ -0,0 +1,40 @@
package com.aisi.consumer;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.serialization.Deserializer;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.util.Arrays;
import java.util.List;
import java.util.Properties;
public class ConsumerWithDeserializer {
public static void main(String[] args) {
Properties properties = new Properties();
properties.put("bootstrap.servers", "localhost:9092");
properties.put("group.id", "aisi-group");
properties.put("key.deserializer", MyStringDeserializer.class.getName());
properties.put("value.deserializer", MyStringDeserializer.class.getName());
KafkaConsumer<String, String> consumer = new KafkaConsumer(properties);
List<String> topics = Arrays.asList("topic_a", "topic_b"); // 将每个主题作为独立的字符串
consumer.subscribe(topics);
consumer.subscribe(topics);
while (true) {
ConsumerRecords<String, String> records = consumer.poll(Duration.ofSeconds(1));
for (ConsumerRecord<String, String> record : records) {
System.out.println(record.topic() + "->" + record.partition() + "->" + record.offset() + "->" + record.key() + "->" + record.value());
}
}
}
public static class MyStringDeserializer implements Deserializer<String> {
@Override
public String deserialize(String topic, byte[] data) {
return new String(data, StandardCharsets.UTF_8);
}
}
}

View File

@ -0,0 +1,49 @@
package com.aisi.consumer;
import org.apache.kafka.clients.consumer.*;
import org.apache.kafka.common.TopicPartition;
import java.time.Duration;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Properties;
public class ConsumerWithRangeAssignor {
public static void main(String[] args) {
Properties properties = new Properties();
properties.put("bootstrap.servers", "localhost:9092");
properties.put("group.id", "aisi-group");
properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
//设定分区分配策略为range
properties.put("partition.assignment.strategy", RangeAssignor.class.getName());
//设定consumer断开超时时间最小不能小于6s
properties.put("session.timeout.ms", 6000);
KafkaConsumer<String, String> consumer = new KafkaConsumer(properties);
List<String> topics = Arrays.asList("topic_d");
consumer.subscribe(topics);
consumer.subscribe(topics, new ConsumerRebalanceListener() {
@Override
public void onPartitionsRevoked(Collection<TopicPartition> partitions) {
for (TopicPartition topicPartition : partitions) {
System.out.println("分区移除:::::"+topicPartition.topic() + ":" + topicPartition.partition());
}
}
@Override
public void onPartitionsAssigned(Collection<TopicPartition> partitions) {
for (TopicPartition topicPartition : partitions) {
System.out.println("分区添加:::::"+topicPartition.topic() + ":" + topicPartition.partition());
}
}
});
while (true) {
ConsumerRecords<String, String> records = consumer.poll(Duration.ofSeconds(1));
for (ConsumerRecord<String, String> record : records) {
System.out.println(record.topic() + "->" + record.partition() + "->" + record.offset() + "->" + record.key() + "->" + record.value());
}
}
}
}

View File

@ -0,0 +1,49 @@
package com.aisi.consumer;
import org.apache.kafka.clients.consumer.*;
import org.apache.kafka.common.TopicPartition;
import java.time.Duration;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Properties;
public class ConsumerWithRoundRobinAssignor {
public static void main(String[] args) {
Properties properties = new Properties();
properties.put("bootstrap.servers", "localhost:9092");
properties.put("group.id", "aisi-group");
properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
//设定分区分配策略为 round-robin
properties.put("partition.assignment.strategy", RoundRobinAssignor.class.getName());
//设定consumer断开超时时间最小不能小于6s
properties.put("session.timeout.ms", 6000);
KafkaConsumer<String, String> consumer = new KafkaConsumer(properties);
List<String> topics = Arrays.asList("topic_d","topic_e");
consumer.subscribe(topics);
consumer.subscribe(topics, new ConsumerRebalanceListener() {
@Override
public void onPartitionsRevoked(Collection<TopicPartition> partitions) {
for (TopicPartition topicPartition : partitions) {
System.out.println("分区移除:::::"+topicPartition.topic() + ":" + topicPartition.partition());
}
}
@Override
public void onPartitionsAssigned(Collection<TopicPartition> partitions) {
for (TopicPartition topicPartition : partitions) {
System.out.println("分区添加:::::"+topicPartition.topic() + ":" + topicPartition.partition());
}
}
});
while (true) {
ConsumerRecords<String, String> records = consumer.poll(Duration.ofSeconds(1));
for (ConsumerRecord<String, String> record : records) {
System.out.println(record.topic() + "->" + record.partition() + "->" + record.offset() + "->" + record.key() + "->" + record.value());
}
}
}
}

View File

@ -0,0 +1,49 @@
package com.aisi.consumer;
import org.apache.kafka.clients.consumer.*;
import org.apache.kafka.common.TopicPartition;
import java.time.Duration;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Properties;
public class ConsumerWithStickyAssignor {
public static void main(String[] args) {
Properties properties = new Properties();
properties.put("bootstrap.servers", "localhost:9092");
properties.put("group.id", "aisi-group");
properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
//设定分区分配策略为 sticky
properties.put("partition.assignment.strategy", StickyAssignor.class.getName());
//设定consumer断开超时时间最小不能小于6s
properties.put("session.timeout.ms", 6000);
KafkaConsumer<String, String> consumer = new KafkaConsumer(properties);
List<String> topics = Arrays.asList("topic_d","topic_e");
consumer.subscribe(topics);
consumer.subscribe(topics, new ConsumerRebalanceListener() {
@Override
public void onPartitionsRevoked(Collection<TopicPartition> partitions) {
for (TopicPartition topicPartition : partitions) {
System.out.println("分区移除:::::"+topicPartition.topic() + ":" + topicPartition.partition());
}
}
@Override
public void onPartitionsAssigned(Collection<TopicPartition> partitions) {
for (TopicPartition topicPartition : partitions) {
System.out.println("分区添加:::::"+topicPartition.topic() + ":" + topicPartition.partition());
}
}
});
while (true) {
ConsumerRecords<String, String> records = consumer.poll(Duration.ofSeconds(1));
for (ConsumerRecord<String, String> record : records) {
System.out.println(record.topic() + "->" + record.partition() + "->" + record.offset() + "->" + record.key() + "->" + record.value());
}
}
}
}

View File

@ -0,0 +1,78 @@
package com.aisi.producer;
import org.apache.kafka.clients.producer.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Map;
import java.util.Properties;
public class Producer1 {
private static final Logger log = LoggerFactory.getLogger(Producer1.class);
public static void main(String[] args) throws Exception {
KafkaProducer<String, String> kafkaProducer = getStringStringKafkaProducer();
for (int i = 0; i < 5; i++) {
ProducerRecord<String, String> record = new ProducerRecord<>("topic_b", "message-value: " + i);
kafkaProducer.send(record, (metadata, exception) -> {
if (exception == null) {
// 消息发送成功
System.out.println("Message sent successfully to topic: " + metadata.topic() +
" partition: " + metadata.partition() +
" offset: " + metadata.offset());
} else {
// 消息发送失败
exception.printStackTrace();
}
});
}
// 确保所有消息都被发送
kafkaProducer.flush();
// 关闭生产者
kafkaProducer.close();
}
private static KafkaProducer<String, String> getStringStringKafkaProducer() {
Properties properties = new Properties();
properties.put("bootstrap.servers", "nn1:9092");
properties.put("batch.size", 16384);
properties.put("acks", "all"); // 建议将-1改为all
properties.put("retries", 3);
properties.put("linger.ms", 50);
properties.put("buffer.memory", 33554432);
properties.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
properties.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
properties.put(ProducerConfig.INTERCEPTOR_CLASSES_CONFIG,MyInterceptor.class.getName());
return new KafkaProducer<>(properties);
}
public static class MyInterceptor implements ProducerInterceptor<String, String> {
@Override
public ProducerRecord<String, String> onSend(ProducerRecord<String, String> producerRecord) {
// 对消息进行处理例如添加时间戳等
return new ProducerRecord<>(producerRecord.topic(), System.currentTimeMillis() + "-" + producerRecord.value());
}
@Override
public void onAcknowledgement(RecordMetadata recordMetadata, Exception e) {
if (e != null) {
log.error("Failed to send message with error: {}", e.getMessage());
}else {
log.info("Successfully sent message to topic {}, partition {}, offset {}",
recordMetadata.topic(), recordMetadata.partition(), recordMetadata.offset());
}
}
@Override
public void close() {
}
@Override
public void configure(Map<String, ?> map) {
}
}
}

View File

@ -0,0 +1,124 @@
package com.aisi.producer;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.common.serialization.Serializer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.util.Properties;
public class ProducerWithObjectSerializer {
private static final Logger log = LoggerFactory.getLogger(ProducerWithObjectSerializer.class);
public static void main(String[] args) throws Exception {
KafkaProducer<String, Student> kafkaProducer = getStringStringKafkaProducer();
Student stu1 = new Student();
stu1.setName("张三");
stu1.setAge(18);
stu1.setScore(100);
Student stu2 = new Student();
stu2.setName("李四");
stu2.setAge(24);
stu2.setScore(100);
kafkaProducer.send(new ProducerRecord<>("student", 1,"blog-1", stu1) , (metadata, exception) -> {
if (exception != null) {
log.error("发送消息失败", exception);
}else {
log.info("发送消息成功, offset: " + metadata.offset());
log.info("发送消息成功, topic: " + metadata.topic());
log.info("发送消息成功, partition: " + metadata.partition());
log.info("发送消息成功, timestamp: " + metadata.timestamp());
}
});
kafkaProducer.send(new ProducerRecord<>("student", 2,"blog-2", stu2), (metadata, exception) -> {
if (exception != null) {
log.error("发送消息失败", exception);
}else {
log.info("发送消息成功, offset: " + metadata.offset());
log.info("发送消息成功, topic: " + metadata.topic());
log.info("发送消息成功, partition: " + metadata.partition());
log.info("发送消息成功, timestamp: " + metadata.timestamp());
}
});
// 确保所有消息都被发送
kafkaProducer.flush();
// 关闭生产者
kafkaProducer.close();
}
private static KafkaProducer<String, Student> getStringStringKafkaProducer() {
Properties properties = new Properties();
properties.put("bootstrap.servers", "nn1:9092");
properties.put("batch.size", 16384);
properties.put("acks", "all");
properties.put("retries", 3);
properties.put("linger.ms", 50);
properties.put("buffer.memory", 33554432);
properties.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
properties.put("value.serializer", StudentSerializer.class.getName());
return new KafkaProducer<>(properties);
}
public static class Student implements Serializable {
private String name;
private int score;
private int age;
public Student(String name, int score, int age) {
this.name = name;
this.score = score;
this.age = age;
}
public Student() {
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public int getScore() {
return score;
}
public void setScore(int score) {
this.score = score;
}
public int getAge() {
return age;
}
public void setAge(int age) {
this.age = age;
}
@Override
public String toString() {
return "Student{" +
"name='" + name + '\'' +
", score=" + score +
", age=" + age +
'}';
}
}
public static class StudentSerializer implements Serializer<Student> {
@Override
public byte[] serialize(String s, Student student) {
return student.toString().getBytes(StandardCharsets.UTF_8);
}
}
}

View File

@ -0,0 +1,59 @@
package com.aisi.producer;
import org.apache.kafka.clients.producer.*;
import org.apache.kafka.common.serialization.Serializer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.nio.charset.StandardCharsets;
import java.util.Properties;
public class ProducerWithStringSerializer {
private static final Logger log = LoggerFactory.getLogger(ProducerWithStringSerializer.class);
public static void main(String[] args) throws Exception {
KafkaProducer<String, String> kafkaProducer = getStringStringKafkaProducer();
for (int i = 0; i < 5; i++) {
ProducerRecord<String, String> record = new ProducerRecord<>("topic_b","key_:"+i,"message-value: " + i);
kafkaProducer.send(record, (metadata, exception) -> {
if (exception == null) {
// 消息发送成功
System.out.println("Message sent successfully to topic: " + metadata.topic() +
" partition: " + metadata.partition() +
" offset: " + metadata.offset());
} else {
// 消息发送失败
exception.printStackTrace();
}
});
}
// 确保所有消息都被发送
kafkaProducer.flush();
// 关闭生产者
kafkaProducer.close();
}
private static KafkaProducer<String, String> getStringStringKafkaProducer() {
Properties properties = new Properties();
properties.put("bootstrap.servers", "nn1:9092");
properties.put("batch.size", 16384);
properties.put("acks", "all");
properties.put("retries", 3);
properties.put("linger.ms", 50);
properties.put("buffer.memory", 33554432);
properties.put("key.serializer", MyStringSerializer.class.getName());
properties.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
return new KafkaProducer<>(properties);
}
public static class MyStringSerializer implements Serializer<String> {
@Override
public byte[] serialize(String topic, String data) {
// log.warn("serialize method called with parameters: {}, {}", s, s2);
return data.getBytes(StandardCharsets.UTF_8);
}
}
}

View File

@ -0,0 +1,53 @@
package com.aisi.producer;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Partitioner;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.common.Cluster;
import java.util.Map;
import java.util.Properties;
public class ProducerWithUDPartitioner {
public static void main(String[] args) {
Properties properties = new Properties();
properties.put("bootstrap.servers", "nn1:9092");
properties.put("batch.size", 16384);
properties.put("acks", "all"); // 建议将-1改为all
properties.put("retries", 3);
properties.put("linger.ms", 50);
properties.put("buffer.memory", 33554432);
properties.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
properties.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
properties.put(ProducerConfig.PARTITIONER_CLASS_CONFIG, MyPartitioner.class);
KafkaProducer<String, String> kafkaProducer = new KafkaProducer<>(properties);
for (int i = 0; i < 10; i++) {
kafkaProducer.send(new ProducerRecord<>("topic1", "key" + i, "value" + i));
}
kafkaProducer.close();
}
public static class MyPartitioner implements Partitioner {
@Override
public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
if (keyBytes == null) {
return 0;
}
return 0;
}
@Override
public void close() {
}
@Override
public void configure(Map<String, ?> configs) {
}
}
}

View File

@ -0,0 +1,5 @@
log4j.rootLogger=info,console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.out
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c %M(): %m%n

View File

@ -0,0 +1,10 @@
2024-10-01 www.example.com
2024-10-01 www.example.com
2024-10-01 www.test.com
2024-10-02 www.example.com
2024-10-02 www.test.com
2024-10-02 www.sample.com
2024-10-03 www.example.com
2024-10-03 www.test.com
2024-10-03 www.sample.com
2024-10-03 www.example.com

View File

@ -33,6 +33,14 @@
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter</artifactId>
<version>5.9.3</version> <!-- 使用最新的稳定版本 -->
<scope>test</scope>
</dependency>
</dependencies>
<build>
@ -60,28 +68,28 @@
</filter>
</filters>
<shadedArtifactAttached>true</shadedArtifactAttached>
<shadedArtifactPrimary>true</shadedArtifactPrimary>
<!-- <shadedArtifactPrimary>true</shadedArtifactPrimary>-->
<outputFile>${project.build.directory}/${project.build.finalName}-shaded.jar</outputFile>
<relocators>
<relocator>
<pattern>org.apache.commons</pattern>
<shadedPattern>shade.org.apache.commons</shadedPattern>
</relocator>
</relocators>
<!-- <relocators>-->
<!-- <relocator>-->
<!-- <pattern>org.apache.commons</pattern>-->
<!-- <shadedPattern>shade.org.apache.commons</shadedPattern>-->
<!-- </relocator>-->
<!-- </relocators>-->
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
<!-- <filter>-->
<!-- <artifact>*:*</artifact>-->
<!-- <excludes>-->
<!-- <exclude>META-INF/*.SF</exclude>-->
<!-- <exclude>META-INF/*.DSA</exclude>-->
<!-- <exclude>META-INF/*.RSA</exclude>-->
<!-- </excludes>-->
<!-- </filter>-->
</filters>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>com.aisi.wordcount.WordCountDriver</mainClass>
<mainClass>com.aisi.accesscount.VisitCountDriver</mainClass>
</transformer>
</transformers>
</configuration>

View File

@ -0,0 +1,22 @@
package com.aisi.accesscount;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class SortMapper extends Mapper<LongWritable, Text, IntWritable, Text> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// 数据格式日期 总访问次数
String[] fields = value.toString().split("\t");
if (fields.length == 2) {
String date = fields[0];
int count = Integer.parseInt(fields[1]);
// 以访问次数作为 key日期作为 value
context.write(new IntWritable(count), new Text(date));
}
}
}

View File

@ -0,0 +1,16 @@
package com.aisi.accesscount;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class SortReducer extends Reducer<IntWritable, Text, Text, IntWritable> {
@Override
protected void reduce(IntWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
for (Text date : values) {
context.write(date, key);
}
}
}

View File

@ -0,0 +1,51 @@
package com.aisi.accesscount;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class VisitCountDriver {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
// 第一个 Job统计每日访问次数
Job countJob = Job.getInstance(conf, "Visit Count");
countJob.setJarByClass(VisitCountDriver.class);
countJob.setMapperClass(VisitCountMapper.class);
countJob.setReducerClass(VisitCountReducer.class);
countJob.setMapOutputKeyClass(Text.class);
countJob.setMapOutputValueClass(IntWritable.class);
countJob.setOutputKeyClass(Text.class);
countJob.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(countJob, new Path(args[0]));
Path tempOutput = new Path("temp_output");
FileOutputFormat.setOutputPath(countJob, tempOutput);
boolean countJobSuccess = countJob.waitForCompletion(true);
if (!countJobSuccess) {
System.exit(1);
}
// 第二个 Job将访问次数进行升序排序
Job sortJob = Job.getInstance(conf, "Sort Visits");
sortJob.setJarByClass(VisitCountDriver.class);
sortJob.setMapperClass(SortMapper.class);
sortJob.setReducerClass(SortReducer.class);
sortJob.setMapOutputKeyClass(IntWritable.class);
sortJob.setMapOutputValueClass(Text.class);
sortJob.setOutputKeyClass(Text.class);
sortJob.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(sortJob, tempOutput);
FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
System.exit(sortJob.waitForCompletion(true) ? 0 : 1);
}
}

View File

@ -0,0 +1,20 @@
package com.aisi.accesscount;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class VisitCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// 解析输入的每一行数据假设格式为 "日期 URL"
String[] fields = value.toString().split(" ");
if (fields.length == 2) {
String date = fields[0];
context.write(new Text(date), new IntWritable(1));
}
}
}

View File

@ -0,0 +1,18 @@
package com.aisi.accesscount;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class VisitCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable value : values) {
sum += value.get();
}
context.write(key, new IntWritable(sum));
}
}

View File

@ -0,0 +1,7 @@
package com.aisi.api;
public class Test {
}

View File

@ -0,0 +1,107 @@
import org.apache.commons.compress.utils.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.*;
public class Example {
static FileSystem fs = null;
@BeforeAll
public static void setup() throws IOException {
Configuration conf = new Configuration();
fs = FileSystem.get(conf);
}
@AfterAll
public static void teardown() throws IOException {
if (fs != null) {
fs.close();
}
}
@Test
public void list() throws IOException {
FileStatus[] fileStatuses = fs.listStatus(new Path("/"));
for (FileStatus fileStatus : fileStatuses) {
System.out.println(fileStatus.getPath());
}
}
@Test
public void mkdir() throws IOException {
boolean mkdirsed = fs.mkdirs(new Path("/test"));
if (mkdirsed) {
System.out.println("mkdirsed");
}else
System.out.println("mkdir failed");
}
@Test
public void delete() throws IOException {
boolean deleted = fs.delete(new Path("/test"), true);
if (deleted) {
System.out.println("delete");
}else
System.out.println("delete failed");
}
@Test
public void upload() throws IOException {
fs.copyFromLocalFile(new Path("d:\\tmp\\process.xml"), new Path("/test/process.xml"));
System.out.println("upload success");
}
@Test
public void download() throws IOException {
fs.copyToLocalFile(new Path("/test/process.xml"), new Path("d:\\tmp\\process_download.xml"));
System.out.println("download success");
}
@Test
public void read() throws IOException {
FSDataInputStream fsDataInputStream = fs.open(new Path("/test/process.xml"));
new BufferedReader(new InputStreamReader(fsDataInputStream)).lines().forEach(System.out::println);
fsDataInputStream.close();
}
@Test
public void write() throws IOException {
FSDataOutputStream fsDataOutputStream = fs.create(new Path("/test/process_replication.xml"));
FSDataInputStream fsDataInputStream = fs.open(new Path("/test/process.xml"));
BufferedReader reader = new BufferedReader(new InputStreamReader(fsDataInputStream, "utf-8"));
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, "utf-8"));
String line = "";
while ((line = reader.readLine()) != null) {
writer.write(line);
writer.newLine();
}
writer.close();
reader.close();
fsDataOutputStream.close();
fsDataInputStream.close();
}
@Test
public void read1() throws IOException {
Configuration conf=new Configuration();
//获取SequenceFile.Reader对象
SequenceFile.Reader reader=new SequenceFile.Reader(fs,new Path("/example/part-m-00000"),conf);
//获取序列化中使用的键和值类型
Text key=new Text();
Text value=new Text();
//将读取的数据写入janfeb.txt文件
BufferedWriter out=new BufferedWriter(new OutputStreamWriter(new FileOutputStream("D:\\tmp\\5-12.txt")));
while(reader.next(key,value)){
out.write(key.toString()+"\t"+value.toString()+"\r\n");
}
out.close();
reader.close();
}
}

View File

@ -0,0 +1,56 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.conf.Configuration;
//代码5-3
import java.io.IOException;
public class SelectData {
public static class MyMap extends Mapper<Object, Text, Text, Text> {
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
String arr[] = line.split(",");
if (arr[4].contains("2021/1") || arr[4].contains("2021/2")) {
context.write(new Text(arr[2]),
new Text(arr[4].substring(0, 9)));
}
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if(otherArgs.length<2){
System.err.println("必须输入读取文件路径和输出路径");
System.exit(2);
}
Job job =Job.getInstance(conf,"Select Data");
job.setJarByClass(SelectData.class);
job.setMapperClass(MyMap.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
//设置输入格式
job.setInputFormatClass(TextInputFormat.class);
//设置输出格式
job.setOutputFormatClass(SequenceFileOutputFormat.class);
//设置reduce的任务数是0
job.setNumReduceTasks(0);
for(int i=0;i<otherArgs.length-1;++i){
FileInputFormat.addInputPath(job,new Path(otherArgs[i]));
}
FileOutputFormat.setOutputPath(job,new Path(otherArgs[otherArgs.length-1]));
System.exit(job.waitForCompletion(true)?0:1);
}
}

15
Spark/data/shops.txt Normal file
View File

@ -0,0 +1,15 @@
sid1,2022-01-18,500
sid1,2022-02-10,500
sid1,2022-02-10,200
sid1,2022-02-11,600
sid1,2022-02-12,400
sid1,2022-02-13,200
sid1,2022-02-15,100
sid1,2022-03-05,180
sid1,2022-04-05,280
sid1,2022-04-06,220
sid2,2022-02-10,100
sid2,2022-02-11,100
sid2,2022-02-13,100
sid2,2022-03-15,100
sid2,2022-04-15,100

12
Spark/data/user.txt Normal file
View File

@ -0,0 +1,12 @@
id01,2022-02-28
id01,2022-03-01
id01,2022-03-01
id01,2022-03-02
id01,2022-03-05
id01,2022-03-04
id01,2022-03-06
id01,2022-03-07
id02,2022-03-01
id02,2022-03-02
id02,2022-03-03
id02,2022-03-06

View File

@ -55,6 +55,13 @@
<artifactId>hive-jdbc</artifactId>
<version>3.1.2</version>
</dependency>
<!-- Spark Streaming -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.12</artifactId>
<version>3.1.2</version> <!-- 选择你需要的版本 -->
</dependency>
</dependencies>
<build>
<plugins>

View File

@ -1,4 +1,4 @@
package com.aisi.spark
package com.aisi.sparkSql
import org.apache.spark.{SparkConf, SparkContext}

View File

@ -1,4 +1,4 @@
package com.aisi.spark
package com.aisi.sparkSql
import org.apache.spark.SparkContext

View File

@ -1,4 +1,4 @@
log4j.rootLogger=info,console
log4j.rootLogger=error,console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.out

View File

@ -0,0 +1,115 @@
import java.io.{File, PrintWriter}
import java.text.SimpleDateFormat
import java.util.{Date, Properties, Random}
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
object MockData {
def randomNum(index: Int, random: Random): String = {
var str = ""
for (i <- 0 until index) {
str += random.nextInt(10)
}
str
}
def fillZero(random: Random, num: Int, index: Int): String = {
val randomNum = random.nextInt(num)
var randomNumStr = randomNum.toString
if (randomNum < 10) {
randomNumStr = ("%0" + index + "d").format(randomNum)
}
randomNumStr
}
def initFile(path: String): PrintWriter = {
new PrintWriter(new File(path))
}
def writeDataToFile(pw: PrintWriter, content: String): Unit = {
pw.write(content + "\n")
pw.flush()
}
def closeFile(pw: PrintWriter): Unit = {
pw.close()
}
def initKafkaProducer(): KafkaProducer[String, String] = {
val props = new Properties()
props.put("bootstrap.servers", "localhost:9092")
props.put("acks", "all")
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer")
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer")
new KafkaProducer[String, String](props)
}
def writeDataToKafka(producer: KafkaProducer[String, String], content: String): Unit = {
producer.send(new ProducerRecord[String, String]("RoadRealTimeLog", content))
}
def closeKafka(producer: KafkaProducer[String, String]): Unit = {
producer.close()
}
def mock(): Unit = {
val pw = initFile("d:\\tmp\\data.txt")
val producer = initKafkaProducer()
val random = new Random()
val locations = Array("鲁", "京", "豫", "京", "沪", "赣", "津", "深", "黑", "粤")
val day = new SimpleDateFormat("yyyy-MM-dd").format(new Date())
for (i <- 0 until 30) {
val car = locations(random.nextInt(10)) + (65 + random.nextInt(26)).asInstanceOf[Char] + randomNum(5, random)
var baseActionTime = day + " " + fillZero(random, 24, 2)
for (j <- 0 until random.nextInt(300)) {
if (j % 30 == 0 && j != 0) {
var nextHour = ""
val baseHourParts = baseActionTime.split(" ")
if (baseHourParts.length > 1) {
val baseHour = baseHourParts(1)
if (baseHour.startsWith("0")) {
if (baseHour.endsWith("9")) {
nextHour = "10"
} else {
nextHour = "0" + (baseHour.substring(1).toInt + 1).toString
}
} else if (baseHour == "23") {
nextHour = fillZero(random, 24, 2)
} else {
nextHour = (baseHour.toInt + 1).toString
}
baseActionTime = day + " " + nextHour
} else {
baseActionTime = day + " 00" // 如果 baseActionTime 无法正确分割默认使用 00
}
}
val actionTime = baseActionTime + ":" + fillZero(random, 60, 2) + ":" + fillZero(random, 60, 2)
val monitorId = fillZero(random, 10, 4)
val speed = random.nextInt(200) + 1
val roadId = random.nextInt(50) + 1
val cameraId = "0" + randomNum(4, random)
val areald = fillZero(random, random.nextInt(8) + 1, 2)
val content = day + "\t" + monitorId + "\t" + cameraId + "\t" + car + "\t" + actionTime + "\t" + speed + "\t" + roadId + "\t" + areald
writeDataToFile(pw, content)
writeDataToKafka(producer, content)
Thread.sleep(50)
}
}
closeFile(pw)
closeKafka(producer)
}
def main(args: Array[String]): Unit = {
mock()
}
}

View File

@ -0,0 +1,51 @@
package com.aisi.sparkSql;
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.{DataFrame, SQLContext, SparkSession}
import org.apache.spark.sql.expressions.Window
import org.apache.spark.sql.functions._
object A1 {
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
conf.setMaster("local[*]")
conf.setAppName("user active")
val sc = new SparkContext(conf)
val sqlSc = new SQLContext(sc)
import sqlSc.implicits._
val userDF: DataFrame = sc.textFile("Spark/data/user.txt")
.map(t => {
val line = t.split(",")
val strDataTime = line(1).split("-")
val year = strDataTime(0)
val month = if (strDataTime(1).indexOf(0) == -1) strDataTime(1).substring(1) else strDataTime(1)
val day = if (strDataTime(2).indexOf(0) == -1) strDataTime(2).substring(1) else strDataTime(2)
(userRecord(line(0), year, month, day))
}).toDF()
userDF.show()
// 根据用户ID和月份进行分组
val groupedDF = userDF.groupBy("uid", "year", "month")
.agg(collect_list("day").as("days"))
// 展开 days 并转换为整型
val explodedDF = groupedDF
.withColumn("day", explode($"days"))
.withColumn("day", $"day".cast("int"))
// 定义窗口函数按用户年份月份排序天数
val windowSpec = Window.partitionBy("uid", "year", "month").orderBy("day")
// 计算相邻天数之间的差值
val resultDF = explodedDF
.withColumn("prev_day", lag("day", 1).over(windowSpec))
.withColumn("day_diff", $"day" - $"prev_day")
.withColumn("is_active", when($"day_diff" === 1, 1).otherwise(0))
resultDF.show()
}
}
case class userRecord(uid: String, year: String, month: String, day: String)

View File

@ -0,0 +1,44 @@
package com.aisi.sparkSql
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.{DataFrame, RelationalGroupedDataset, SQLContext, SparkSession}
import java.text.DateFormat
import java.time.format.DateTimeFormatter
/**
* 计算连续活跃用户的记录
*/
object A2 {
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
conf.setMaster("local[*]")
conf.setAppName("shop count")
val sc = new SparkContext(conf)
val sqlSc = new SQLContext(sc)
import sqlSc.implicits._
// sid,dt,money
val userDF: DataFrame = sc.textFile("Spark/data/shops.txt")
.map(t => {
val line = t.split(",")
val sid = line(0)
val strDataTime = line(1).split("-")
val year = strDataTime(0)
val month = if (strDataTime(1).indexOf(0) == -1 ) strDataTime(1).substring(1) else strDataTime(1)
val day = if (strDataTime(2).indexOf(0) == -1 ) strDataTime(2).substring(1) else strDataTime(2)
val money = line(2).toInt
shopRecord(sid,year, month, day,money)
}).toDF()
// userDF.show()
userDF.show()
// RelationalGroupedDataset: [grouping expressions: [sid: string, month: string], value: [sid: string, year: string ... 3 more fields], type: GroupBy]
val dataset = userDF.groupBy("sid","month")
println(dataset)
dataset.sum().show()
// val sparkSession = SparkSession.builder().appName("user active").master("local[*]").getOrCreate()
// userDF.groupBy("")
// userDF.show()
}
}
case class shopRecord (sid:String, year:String,month:String,day:String,money:Int){}

View File

@ -0,0 +1,152 @@
package com.aisi.sparkSql;
import java.io.{File, PrintWriter}
import java.text.SimpleDateFormat
import java.util.{Date, Properties, Random}
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
object MockData {
def randomNum(index: Int, random: Random): String = {
var str = ""
for (i <- 0 until index) {
str += random.nextInt(10)
}
str
}
def fillZero(random: Random, num: Int, index: Int): String = {
val randomNum = random.nextInt(num)
var randomNumStr = randomNum.toString
if (randomNum < 10) {
randomNumStr = ("%0" + index + "d").format(randomNum)
}
randomNumStr
}
def initFile(path: String): PrintWriter = {
new PrintWriter(new File(path))
}
def writeDataToFile(pw: PrintWriter, content: String): Unit = {
pw.write(content + "\n")
pw.flush()
}
def closeFile(pw: PrintWriter): Unit = {
pw.close()
}
def initKafkaProducer(): KafkaProducer[String,String] ={
val props = new Properties ()
props.put ("bootstrap.servers", "localhost:9092")
props.put ("acks", "all")
props.put ("key.serializer","org.apache.kafka.common.serialization.StringSerializer")
props.put ("value.serializer","org.apache.kafka.common.serialization.StringSerializer")
new KafkaProducer[String, String] (props)
}
def writeDataToKafka(producer:KafkaProducer[String,String],content:String):Unit = {
producer.send(new ProducerRecord[String,String]("RoadRealTimeLog",content))
}
def closeKafka(producer:KafkaProducer[String,String]):Unit = {
producer.close()
}
def mock(): Unit = {
val pw = initFile("路径")
val producer = initKafkaProducer()
val random = new Random()
val locations = Array("鲁","京","豫","京","沪","赣","津","深","黑","粤")
val day = new SimpleDateFormat ("yyyy-MM-dd").format (new Date())
for(i<-0 until 3000) {
val car = locations (random.nextInt (10)) + (65 + random.nextInt (26)).asInstanceOf[Char]+ randomNum(5, random)
var baseActionTime = day +""+ fillZero(random, 24,2)
for(j <- 0 until random.nextInt (300)) {
if (j % 30 == 0&j!=0) {
var nextHour = ""
val baseHour = baseActionTime.split(" ")(1)
if (baseHour.startsWith("0")) {
if (baseHour.endsWith("9")) {
nextHour = "10"
} else {
nextHour = "0" + (baseHour.substring(1).toInt + 1).toString
}
} else if (baseHour == "23") {
nextHour = fillZero(random, 24, 2)
} else {
nextHour = (baseHour.toInt + 1).toString
}
baseActionTime = day + " " + nextHour
}
val actionTime = baseActionTime + ":" + fillZero(random, 60, 2) + ":" + fillZero(random, 60, 2)
val monitorId = fillZero(random, 10, 4)
val speed = random.nextInt (200) +1
val roadId = random.nextInt (50)+1
val cameraId= "0"+ randomNum(4, random)
val areald = fillZero(random, random.nextInt(8) +1, 2)
val content = day + "\t"+ monitorId +"\t" + cameraId + "\t" + car + "\t" + actionTime + "\t" + speed + "\t" + roadId + "\t"+areald
writeDataToFile(pw, content)
writeDataToKafka(producer,content)
Thread.sleep(50)
}
}
closeFile(pw)
closeKafka(producer)
}
def main(args:Array[String]):Unit = {
mock()
}
}

View File

@ -1,4 +1,4 @@
package com.aisi.spark
package com.aisi.sparkSql
import org.apache.hive.jdbc.HiveDriver

View File

@ -1,4 +1,4 @@
package com.aisi.spark
package com.aisi.sparkSql
import org.apache.spark.sql.expressions.Window
import org.apache.spark.sql.{DataFrame, SQLContext}

View File

@ -1,4 +1,4 @@
package com.aisi.spark
package com.aisi.sparkSql
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.expressions.Window

View File

@ -1,4 +1,4 @@
package com.aisi.spark
package com.aisi.sparkSql
import org.apache.spark.sql.{DataFrame, SQLContext}
import org.apache.spark.{SparkConf, SparkContext}

View File

@ -1,4 +1,4 @@
package com.aisi.spark
package com.aisi.sparkSql
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.expressions.Window

View File

@ -1,4 +1,4 @@
package com.aisi.spark
package com.aisi.sparkSql
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, SQLContext}

View File

@ -1,4 +1,4 @@
package com.aisi.spark
package com.aisi.sparkSql
import org.apache.spark.sql.expressions.Aggregator

View File

@ -1,4 +1,4 @@
package com.aisi.spark
package com.aisi.sparkSql
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, SparkSession}

View File

@ -1,4 +1,4 @@
package com.aisi.spark
package com.aisi.sparkSql
import org.apache.spark.sql.SparkSession

View File

@ -0,0 +1,21 @@
package com.aisi.sparkSreaming
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
object TestStreaming {
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
conf.setMaster("local[*]")
conf.setAppName("testStreaming")
val ssc = new StreamingContext(conf, Seconds(5))
val ds = ssc.socketTextStream("localhost", 6666)
val ds1 = ds.flatMap(_.split(" "))
.map((_, 1))
.reduceByKey(_ + _)
ds1.print()
ssc.start()
ssc.awaitTermination()
}
}

View File

@ -11,6 +11,7 @@
<modules>
<module>MapReduceTest</module>
<module>Spark</module>
<module>Kafka</module>
</modules>
<properties>

30
路径 Normal file
View File

@ -0,0 +1,30 @@
2024-10-24 0009 04378 沪K21792 2024-10-2406:11:57 1 20 00
2024-10-24 0007 05341 沪K21792 2024-10-2406:56:05 43 36 03
2024-10-24 0003 04301 沪K21792 2024-10-2406:20:17 62 49 00
2024-10-24 0006 02290 沪K21792 2024-10-2406:13:45 5 43 02
2024-10-24 0006 08637 沪K21792 2024-10-2406:23:29 33 21 04
2024-10-24 0003 09518 沪K21792 2024-10-2406:39:04 26 39 00
2024-10-24 0008 08202 沪K21792 2024-10-2406:41:44 171 34 05
2024-10-24 0002 09586 沪K21792 2024-10-2406:22:43 69 9 01
2024-10-24 0004 06210 沪K21792 2024-10-2406:57:41 55 16 00
2024-10-24 0003 06017 沪K21792 2024-10-2406:22:51 114 49 01
2024-10-24 0006 07356 沪K21792 2024-10-2406:18:28 95 21 01
2024-10-24 0001 02689 沪K21792 2024-10-2406:05:04 92 9 03
2024-10-24 0001 00143 沪K21792 2024-10-2406:12:22 175 44 00
2024-10-24 0005 01871 沪K21792 2024-10-2406:55:25 184 23 03
2024-10-24 0004 00887 沪K21792 2024-10-2406:20:25 166 28 07
2024-10-24 0001 08940 沪K21792 2024-10-2406:59:38 10 3 04
2024-10-24 0008 02450 沪K21792 2024-10-2406:56:13 122 6 03
2024-10-24 0005 08706 沪K21792 2024-10-2406:24:02 128 5 01
2024-10-24 0004 04151 沪K21792 2024-10-2406:42:51 46 24 05
2024-10-24 0006 07990 沪K21792 2024-10-2406:30:50 88 23 01
2024-10-24 0000 04371 沪K21792 2024-10-2406:41:15 123 14 00
2024-10-24 0002 01350 沪K21792 2024-10-2406:16:00 134 22 00
2024-10-24 0006 08116 沪K21792 2024-10-2406:17:44 17 34 06
2024-10-24 0000 06980 沪K21792 2024-10-2406:14:28 104 47 03
2024-10-24 0009 06814 沪K21792 2024-10-2406:05:47 173 29 00
2024-10-24 0007 01068 沪K21792 2024-10-2406:06:57 169 18 00
2024-10-24 0007 05685 沪K21792 2024-10-2406:05:57 165 20 02
2024-10-24 0005 07818 沪K21792 2024-10-2406:08:37 157 11 00
2024-10-24 0004 06503 沪K21792 2024-10-2406:51:26 9 22 03
2024-10-24 0008 02831 沪K21792 2024-10-2406:46:46 25 16 03