update kafka consumer-01
This commit is contained in:
parent
4726888819
commit
eece6503c9
|
|
@ -1,6 +1,8 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="Encoding">
|
||||
<file url="file://$PROJECT_DIR$/Kafka/src/main/java" charset="UTF-8" />
|
||||
<file url="file://$PROJECT_DIR$/Kafka/src/main/resources" charset="UTF-8" />
|
||||
<file url="file://$PROJECT_DIR$/MapReduceTest/src/main/java" charset="UTF-8" />
|
||||
<file url="file://$PROJECT_DIR$/MapReduceTest/src/main/resources" charset="UTF-8" />
|
||||
<file url="file://$PROJECT_DIR$/Spark/src/main/java" charset="UTF-8" />
|
||||
|
|
|
|||
|
|
@ -0,0 +1,64 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>com.aisi</groupId>
|
||||
<artifactId>HaiNiuProjects</artifactId>
|
||||
<version>1.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<artifactId>Kafka</artifactId>
|
||||
|
||||
<properties>
|
||||
<maven.compiler.source>8</maven.compiler.source>
|
||||
<maven.compiler.target>8</maven.compiler.target>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
</properties>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.kafka</groupId>
|
||||
<artifactId>kafka-clients</artifactId>
|
||||
<version>3.3.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
<version>1.7.30</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
<version>1.2.17</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-shade-plugin</artifactId>
|
||||
<version>3.2.4</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>shade</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<createDependencyReducedPom>false</createDependencyReducedPom>
|
||||
<transformers>
|
||||
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
|
||||
<mainClass>com.aisi.producer.ProducerWithObjectSerializer</mainClass> <!-- 指定你的主类 -->
|
||||
</transformer>
|
||||
</transformers>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
|
||||
</project>
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
package com.aisi.consumer;
|
||||
|
||||
import org.apache.kafka.clients.consumer.ConsumerRecord;
|
||||
import org.apache.kafka.clients.consumer.ConsumerRecords;
|
||||
import org.apache.kafka.clients.consumer.KafkaConsumer;
|
||||
import org.apache.kafka.common.protocol.types.Field;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
|
||||
public class Consumer1 {
|
||||
public static void main(String[] args) {
|
||||
Properties properties = new Properties();
|
||||
properties.put("bootstrap.servers", "localhost:9092");
|
||||
properties.put("group.id", "aisi-group");
|
||||
properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
|
||||
properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
|
||||
KafkaConsumer<String, String> consumer = new KafkaConsumer(properties);
|
||||
List<String> topics = Arrays.asList("topic_a", "topic_b"); // 将每个主题作为独立的字符串
|
||||
consumer.subscribe(topics);
|
||||
|
||||
consumer.subscribe(topics);
|
||||
while (true) {
|
||||
ConsumerRecords<String, String> records = consumer.poll(Duration.ofSeconds(1));
|
||||
for (ConsumerRecord<String, String> record : records) {
|
||||
System.out.println(record.topic() + "->" + record.partition() + "->" + record.offset() + "->" + record.key() + "->" + record.value());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
package com.aisi.consumer;
|
||||
|
||||
import org.apache.kafka.clients.consumer.*;
|
||||
import org.apache.kafka.common.TopicPartition;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
|
||||
public class ConsumerWithCooperativeStickyAssignor {
|
||||
public static void main(String[] args) {
|
||||
Properties properties = new Properties();
|
||||
properties.put("bootstrap.servers", "localhost:9092");
|
||||
properties.put("group.id", "aisi-group");
|
||||
properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
|
||||
properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
|
||||
//设定分区分配策略为 cooperative-sticky
|
||||
properties.put("partition.assignment.strategy", CooperativeStickyAssignor.class.getName());
|
||||
//设定consumer断开超时时间最小不能小于6s
|
||||
properties.put("session.timeout.ms", 6000);
|
||||
KafkaConsumer<String, String> consumer = new KafkaConsumer(properties);
|
||||
List<String> topics = Arrays.asList("topic_d","topic_e");
|
||||
consumer.subscribe(topics);
|
||||
|
||||
consumer.subscribe(topics, new ConsumerRebalanceListener() {
|
||||
@Override
|
||||
public void onPartitionsRevoked(Collection<TopicPartition> partitions) {
|
||||
for (TopicPartition topicPartition : partitions) {
|
||||
System.out.println("分区移除:::::"+topicPartition.topic() + ":" + topicPartition.partition());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onPartitionsAssigned(Collection<TopicPartition> partitions) {
|
||||
for (TopicPartition topicPartition : partitions) {
|
||||
System.out.println("分区添加:::::"+topicPartition.topic() + ":" + topicPartition.partition());
|
||||
}
|
||||
}
|
||||
});
|
||||
while (true) {
|
||||
ConsumerRecords<String, String> records = consumer.poll(Duration.ofSeconds(1));
|
||||
for (ConsumerRecord<String, String> record : records) {
|
||||
System.out.println(record.topic() + "->" + record.partition() + "->" + record.offset() + "->" + record.key() + "->" + record.value());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
package com.aisi.consumer;
|
||||
|
||||
import org.apache.kafka.clients.consumer.ConsumerRecord;
|
||||
import org.apache.kafka.clients.consumer.ConsumerRecords;
|
||||
import org.apache.kafka.clients.consumer.KafkaConsumer;
|
||||
import org.apache.kafka.common.serialization.Deserializer;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.time.Duration;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
|
||||
public class ConsumerWithDeserializer {
|
||||
public static void main(String[] args) {
|
||||
Properties properties = new Properties();
|
||||
properties.put("bootstrap.servers", "localhost:9092");
|
||||
properties.put("group.id", "aisi-group");
|
||||
properties.put("key.deserializer", MyStringDeserializer.class.getName());
|
||||
properties.put("value.deserializer", MyStringDeserializer.class.getName());
|
||||
KafkaConsumer<String, String> consumer = new KafkaConsumer(properties);
|
||||
List<String> topics = Arrays.asList("topic_a", "topic_b"); // 将每个主题作为独立的字符串
|
||||
consumer.subscribe(topics);
|
||||
|
||||
consumer.subscribe(topics);
|
||||
while (true) {
|
||||
ConsumerRecords<String, String> records = consumer.poll(Duration.ofSeconds(1));
|
||||
for (ConsumerRecord<String, String> record : records) {
|
||||
System.out.println(record.topic() + "->" + record.partition() + "->" + record.offset() + "->" + record.key() + "->" + record.value());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static class MyStringDeserializer implements Deserializer<String> {
|
||||
@Override
|
||||
public String deserialize(String topic, byte[] data) {
|
||||
return new String(data, StandardCharsets.UTF_8);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
package com.aisi.consumer;
|
||||
|
||||
import org.apache.kafka.clients.consumer.*;
|
||||
import org.apache.kafka.common.TopicPartition;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
|
||||
public class ConsumerWithRangeAssignor {
|
||||
public static void main(String[] args) {
|
||||
Properties properties = new Properties();
|
||||
properties.put("bootstrap.servers", "localhost:9092");
|
||||
properties.put("group.id", "aisi-group");
|
||||
properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
|
||||
properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
|
||||
//设定分区分配策略为range
|
||||
properties.put("partition.assignment.strategy", RangeAssignor.class.getName());
|
||||
//设定consumer断开超时时间最小不能小于6s
|
||||
properties.put("session.timeout.ms", 6000);
|
||||
KafkaConsumer<String, String> consumer = new KafkaConsumer(properties);
|
||||
List<String> topics = Arrays.asList("topic_d");
|
||||
consumer.subscribe(topics);
|
||||
|
||||
consumer.subscribe(topics, new ConsumerRebalanceListener() {
|
||||
@Override
|
||||
public void onPartitionsRevoked(Collection<TopicPartition> partitions) {
|
||||
for (TopicPartition topicPartition : partitions) {
|
||||
System.out.println("分区移除:::::"+topicPartition.topic() + ":" + topicPartition.partition());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onPartitionsAssigned(Collection<TopicPartition> partitions) {
|
||||
for (TopicPartition topicPartition : partitions) {
|
||||
System.out.println("分区添加:::::"+topicPartition.topic() + ":" + topicPartition.partition());
|
||||
}
|
||||
}
|
||||
});
|
||||
while (true) {
|
||||
ConsumerRecords<String, String> records = consumer.poll(Duration.ofSeconds(1));
|
||||
for (ConsumerRecord<String, String> record : records) {
|
||||
System.out.println(record.topic() + "->" + record.partition() + "->" + record.offset() + "->" + record.key() + "->" + record.value());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
package com.aisi.consumer;
|
||||
|
||||
import org.apache.kafka.clients.consumer.*;
|
||||
import org.apache.kafka.common.TopicPartition;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
|
||||
public class ConsumerWithRoundRobinAssignor {
|
||||
public static void main(String[] args) {
|
||||
Properties properties = new Properties();
|
||||
properties.put("bootstrap.servers", "localhost:9092");
|
||||
properties.put("group.id", "aisi-group");
|
||||
properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
|
||||
properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
|
||||
//设定分区分配策略为 round-robin
|
||||
properties.put("partition.assignment.strategy", RoundRobinAssignor.class.getName());
|
||||
//设定consumer断开超时时间最小不能小于6s
|
||||
properties.put("session.timeout.ms", 6000);
|
||||
KafkaConsumer<String, String> consumer = new KafkaConsumer(properties);
|
||||
List<String> topics = Arrays.asList("topic_d","topic_e");
|
||||
consumer.subscribe(topics);
|
||||
|
||||
consumer.subscribe(topics, new ConsumerRebalanceListener() {
|
||||
@Override
|
||||
public void onPartitionsRevoked(Collection<TopicPartition> partitions) {
|
||||
for (TopicPartition topicPartition : partitions) {
|
||||
System.out.println("分区移除:::::"+topicPartition.topic() + ":" + topicPartition.partition());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onPartitionsAssigned(Collection<TopicPartition> partitions) {
|
||||
for (TopicPartition topicPartition : partitions) {
|
||||
System.out.println("分区添加:::::"+topicPartition.topic() + ":" + topicPartition.partition());
|
||||
}
|
||||
}
|
||||
});
|
||||
while (true) {
|
||||
ConsumerRecords<String, String> records = consumer.poll(Duration.ofSeconds(1));
|
||||
for (ConsumerRecord<String, String> record : records) {
|
||||
System.out.println(record.topic() + "->" + record.partition() + "->" + record.offset() + "->" + record.key() + "->" + record.value());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
package com.aisi.consumer;
|
||||
|
||||
import org.apache.kafka.clients.consumer.*;
|
||||
import org.apache.kafka.common.TopicPartition;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
|
||||
public class ConsumerWithStickyAssignor {
|
||||
public static void main(String[] args) {
|
||||
Properties properties = new Properties();
|
||||
properties.put("bootstrap.servers", "localhost:9092");
|
||||
properties.put("group.id", "aisi-group");
|
||||
properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
|
||||
properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
|
||||
//设定分区分配策略为 sticky
|
||||
properties.put("partition.assignment.strategy", StickyAssignor.class.getName());
|
||||
//设定consumer断开超时时间最小不能小于6s
|
||||
properties.put("session.timeout.ms", 6000);
|
||||
KafkaConsumer<String, String> consumer = new KafkaConsumer(properties);
|
||||
List<String> topics = Arrays.asList("topic_d","topic_e");
|
||||
consumer.subscribe(topics);
|
||||
|
||||
consumer.subscribe(topics, new ConsumerRebalanceListener() {
|
||||
@Override
|
||||
public void onPartitionsRevoked(Collection<TopicPartition> partitions) {
|
||||
for (TopicPartition topicPartition : partitions) {
|
||||
System.out.println("分区移除:::::"+topicPartition.topic() + ":" + topicPartition.partition());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onPartitionsAssigned(Collection<TopicPartition> partitions) {
|
||||
for (TopicPartition topicPartition : partitions) {
|
||||
System.out.println("分区添加:::::"+topicPartition.topic() + ":" + topicPartition.partition());
|
||||
}
|
||||
}
|
||||
});
|
||||
while (true) {
|
||||
ConsumerRecords<String, String> records = consumer.poll(Duration.ofSeconds(1));
|
||||
for (ConsumerRecord<String, String> record : records) {
|
||||
System.out.println(record.topic() + "->" + record.partition() + "->" + record.offset() + "->" + record.key() + "->" + record.value());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,78 @@
|
|||
package com.aisi.producer;
|
||||
|
||||
import org.apache.kafka.clients.producer.*;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
||||
public class Producer1 {
|
||||
private static final Logger log = LoggerFactory.getLogger(Producer1.class);
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
KafkaProducer<String, String> kafkaProducer = getStringStringKafkaProducer();
|
||||
for (int i = 0; i < 5; i++) {
|
||||
ProducerRecord<String, String> record = new ProducerRecord<>("topic_b", "message-value: " + i);
|
||||
kafkaProducer.send(record, (metadata, exception) -> {
|
||||
if (exception == null) {
|
||||
// 消息发送成功
|
||||
System.out.println("Message sent successfully to topic: " + metadata.topic() +
|
||||
" partition: " + metadata.partition() +
|
||||
" offset: " + metadata.offset());
|
||||
} else {
|
||||
// 消息发送失败
|
||||
exception.printStackTrace();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// 确保所有消息都被发送
|
||||
kafkaProducer.flush();
|
||||
// 关闭生产者
|
||||
kafkaProducer.close();
|
||||
}
|
||||
|
||||
private static KafkaProducer<String, String> getStringStringKafkaProducer() {
|
||||
Properties properties = new Properties();
|
||||
properties.put("bootstrap.servers", "nn1:9092");
|
||||
properties.put("batch.size", 16384);
|
||||
properties.put("acks", "all"); // 建议将-1改为all
|
||||
properties.put("retries", 3);
|
||||
properties.put("linger.ms", 50);
|
||||
properties.put("buffer.memory", 33554432);
|
||||
properties.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
|
||||
properties.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
|
||||
properties.put(ProducerConfig.INTERCEPTOR_CLASSES_CONFIG,MyInterceptor.class.getName());
|
||||
return new KafkaProducer<>(properties);
|
||||
}
|
||||
|
||||
public static class MyInterceptor implements ProducerInterceptor<String, String> {
|
||||
|
||||
@Override
|
||||
public ProducerRecord<String, String> onSend(ProducerRecord<String, String> producerRecord) {
|
||||
// 对消息进行处理,例如添加时间戳等
|
||||
return new ProducerRecord<>(producerRecord.topic(), System.currentTimeMillis() + "-" + producerRecord.value());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onAcknowledgement(RecordMetadata recordMetadata, Exception e) {
|
||||
if (e != null) {
|
||||
log.error("Failed to send message with error: {}", e.getMessage());
|
||||
}else {
|
||||
log.info("Successfully sent message to topic {}, partition {}, offset {}",
|
||||
recordMetadata.topic(), recordMetadata.partition(), recordMetadata.offset());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void configure(Map<String, ?> map) {
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,124 @@
|
|||
package com.aisi.producer;
|
||||
|
||||
import org.apache.kafka.clients.producer.KafkaProducer;
|
||||
import org.apache.kafka.clients.producer.ProducerRecord;
|
||||
import org.apache.kafka.common.serialization.Serializer;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Properties;
|
||||
|
||||
public class ProducerWithObjectSerializer {
|
||||
private static final Logger log = LoggerFactory.getLogger(ProducerWithObjectSerializer.class);
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
KafkaProducer<String, Student> kafkaProducer = getStringStringKafkaProducer();
|
||||
|
||||
Student stu1 = new Student();
|
||||
stu1.setName("张三");
|
||||
stu1.setAge(18);
|
||||
stu1.setScore(100);
|
||||
Student stu2 = new Student();
|
||||
stu2.setName("李四");
|
||||
stu2.setAge(24);
|
||||
stu2.setScore(100);
|
||||
kafkaProducer.send(new ProducerRecord<>("student", 1,"blog-1", stu1) , (metadata, exception) -> {
|
||||
if (exception != null) {
|
||||
log.error("发送消息失败", exception);
|
||||
}else {
|
||||
log.info("发送消息成功, offset: " + metadata.offset());
|
||||
log.info("发送消息成功, topic: " + metadata.topic());
|
||||
log.info("发送消息成功, partition: " + metadata.partition());
|
||||
log.info("发送消息成功, timestamp: " + metadata.timestamp());
|
||||
}
|
||||
});
|
||||
kafkaProducer.send(new ProducerRecord<>("student", 2,"blog-2", stu2), (metadata, exception) -> {
|
||||
if (exception != null) {
|
||||
log.error("发送消息失败", exception);
|
||||
}else {
|
||||
log.info("发送消息成功, offset: " + metadata.offset());
|
||||
log.info("发送消息成功, topic: " + metadata.topic());
|
||||
log.info("发送消息成功, partition: " + metadata.partition());
|
||||
log.info("发送消息成功, timestamp: " + metadata.timestamp());
|
||||
}
|
||||
});
|
||||
|
||||
// 确保所有消息都被发送
|
||||
kafkaProducer.flush();
|
||||
// 关闭生产者
|
||||
kafkaProducer.close();
|
||||
}
|
||||
|
||||
private static KafkaProducer<String, Student> getStringStringKafkaProducer() {
|
||||
Properties properties = new Properties();
|
||||
properties.put("bootstrap.servers", "nn1:9092");
|
||||
properties.put("batch.size", 16384);
|
||||
properties.put("acks", "all");
|
||||
properties.put("retries", 3);
|
||||
properties.put("linger.ms", 50);
|
||||
properties.put("buffer.memory", 33554432);
|
||||
properties.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
|
||||
properties.put("value.serializer", StudentSerializer.class.getName());
|
||||
return new KafkaProducer<>(properties);
|
||||
}
|
||||
|
||||
public static class Student implements Serializable {
|
||||
private String name;
|
||||
|
||||
private int score;
|
||||
private int age;
|
||||
|
||||
public Student(String name, int score, int age) {
|
||||
this.name = name;
|
||||
this.score = score;
|
||||
this.age = age;
|
||||
}
|
||||
|
||||
public Student() {
|
||||
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public int getScore() {
|
||||
return score;
|
||||
}
|
||||
|
||||
public void setScore(int score) {
|
||||
this.score = score;
|
||||
}
|
||||
|
||||
public int getAge() {
|
||||
return age;
|
||||
}
|
||||
|
||||
public void setAge(int age) {
|
||||
this.age = age;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Student{" +
|
||||
"name='" + name + '\'' +
|
||||
", score=" + score +
|
||||
", age=" + age +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
|
||||
public static class StudentSerializer implements Serializer<Student> {
|
||||
@Override
|
||||
public byte[] serialize(String s, Student student) {
|
||||
return student.toString().getBytes(StandardCharsets.UTF_8);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
package com.aisi.producer;
|
||||
|
||||
import org.apache.kafka.clients.producer.*;
|
||||
import org.apache.kafka.common.serialization.Serializer;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Properties;
|
||||
|
||||
public class ProducerWithStringSerializer {
|
||||
private static final Logger log = LoggerFactory.getLogger(ProducerWithStringSerializer.class);
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
KafkaProducer<String, String> kafkaProducer = getStringStringKafkaProducer();
|
||||
for (int i = 0; i < 5; i++) {
|
||||
ProducerRecord<String, String> record = new ProducerRecord<>("topic_b","key_:"+i,"message-value: " + i);
|
||||
kafkaProducer.send(record, (metadata, exception) -> {
|
||||
if (exception == null) {
|
||||
// 消息发送成功
|
||||
System.out.println("Message sent successfully to topic: " + metadata.topic() +
|
||||
" partition: " + metadata.partition() +
|
||||
" offset: " + metadata.offset());
|
||||
} else {
|
||||
// 消息发送失败
|
||||
exception.printStackTrace();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// 确保所有消息都被发送
|
||||
kafkaProducer.flush();
|
||||
// 关闭生产者
|
||||
kafkaProducer.close();
|
||||
}
|
||||
|
||||
private static KafkaProducer<String, String> getStringStringKafkaProducer() {
|
||||
Properties properties = new Properties();
|
||||
properties.put("bootstrap.servers", "nn1:9092");
|
||||
properties.put("batch.size", 16384);
|
||||
properties.put("acks", "all");
|
||||
properties.put("retries", 3);
|
||||
properties.put("linger.ms", 50);
|
||||
properties.put("buffer.memory", 33554432);
|
||||
properties.put("key.serializer", MyStringSerializer.class.getName());
|
||||
properties.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
|
||||
return new KafkaProducer<>(properties);
|
||||
}
|
||||
|
||||
public static class MyStringSerializer implements Serializer<String> {
|
||||
@Override
|
||||
public byte[] serialize(String topic, String data) {
|
||||
// log.warn("serialize method called with parameters: {}, {}", s, s2);
|
||||
|
||||
return data.getBytes(StandardCharsets.UTF_8);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,53 @@
|
|||
package com.aisi.producer;
|
||||
|
||||
import org.apache.kafka.clients.producer.KafkaProducer;
|
||||
import org.apache.kafka.clients.producer.Partitioner;
|
||||
import org.apache.kafka.clients.producer.ProducerConfig;
|
||||
import org.apache.kafka.clients.producer.ProducerRecord;
|
||||
import org.apache.kafka.common.Cluster;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
||||
public class ProducerWithUDPartitioner {
|
||||
public static void main(String[] args) {
|
||||
Properties properties = new Properties();
|
||||
properties.put("bootstrap.servers", "nn1:9092");
|
||||
properties.put("batch.size", 16384);
|
||||
properties.put("acks", "all"); // 建议将-1改为all
|
||||
properties.put("retries", 3);
|
||||
properties.put("linger.ms", 50);
|
||||
properties.put("buffer.memory", 33554432);
|
||||
properties.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
|
||||
properties.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
|
||||
properties.put(ProducerConfig.PARTITIONER_CLASS_CONFIG, MyPartitioner.class);
|
||||
KafkaProducer<String, String> kafkaProducer = new KafkaProducer<>(properties);
|
||||
for (int i = 0; i < 10; i++) {
|
||||
kafkaProducer.send(new ProducerRecord<>("topic1", "key" + i, "value" + i));
|
||||
}
|
||||
|
||||
kafkaProducer.close();
|
||||
|
||||
|
||||
}
|
||||
public static class MyPartitioner implements Partitioner {
|
||||
|
||||
@Override
|
||||
public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
|
||||
if (keyBytes == null) {
|
||||
return 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void configure(Map<String, ?> configs) {
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
log4j.rootLogger=info,console
|
||||
log4j.appender.console=org.apache.log4j.ConsoleAppender
|
||||
log4j.appender.console.target=System.out
|
||||
log4j.appender.console.layout=org.apache.log4j.PatternLayout
|
||||
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c %M(): %m%n
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
2024-10-01 www.example.com
|
||||
2024-10-01 www.example.com
|
||||
2024-10-01 www.test.com
|
||||
2024-10-02 www.example.com
|
||||
2024-10-02 www.test.com
|
||||
2024-10-02 www.sample.com
|
||||
2024-10-03 www.example.com
|
||||
2024-10-03 www.test.com
|
||||
2024-10-03 www.sample.com
|
||||
2024-10-03 www.example.com
|
||||
|
|
@ -33,6 +33,14 @@
|
|||
<artifactId>log4j</artifactId>
|
||||
<version>1.2.17</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.junit.jupiter</groupId>
|
||||
<artifactId>junit-jupiter</artifactId>
|
||||
<version>5.9.3</version> <!-- 使用最新的稳定版本 -->
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
|
||||
|
||||
</dependencies>
|
||||
<build>
|
||||
|
|
@ -60,28 +68,28 @@
|
|||
</filter>
|
||||
</filters>
|
||||
<shadedArtifactAttached>true</shadedArtifactAttached>
|
||||
<shadedArtifactPrimary>true</shadedArtifactPrimary>
|
||||
<!-- <shadedArtifactPrimary>true</shadedArtifactPrimary>-->
|
||||
<outputFile>${project.build.directory}/${project.build.finalName}-shaded.jar</outputFile>
|
||||
<relocators>
|
||||
<relocator>
|
||||
<pattern>org.apache.commons</pattern>
|
||||
<shadedPattern>shade.org.apache.commons</shadedPattern>
|
||||
</relocator>
|
||||
</relocators>
|
||||
<!-- <relocators>-->
|
||||
<!-- <relocator>-->
|
||||
<!-- <pattern>org.apache.commons</pattern>-->
|
||||
<!-- <shadedPattern>shade.org.apache.commons</shadedPattern>-->
|
||||
<!-- </relocator>-->
|
||||
<!-- </relocators>-->
|
||||
<filters>
|
||||
<filter>
|
||||
<artifact>*:*</artifact>
|
||||
<excludes>
|
||||
<exclude>META-INF/*.SF</exclude>
|
||||
<exclude>META-INF/*.DSA</exclude>
|
||||
<exclude>META-INF/*.RSA</exclude>
|
||||
</excludes>
|
||||
</filter>
|
||||
<!-- <filter>-->
|
||||
<!-- <artifact>*:*</artifact>-->
|
||||
<!-- <excludes>-->
|
||||
<!-- <exclude>META-INF/*.SF</exclude>-->
|
||||
<!-- <exclude>META-INF/*.DSA</exclude>-->
|
||||
<!-- <exclude>META-INF/*.RSA</exclude>-->
|
||||
<!-- </excludes>-->
|
||||
<!-- </filter>-->
|
||||
</filters>
|
||||
<transformers>
|
||||
<transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
|
||||
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
|
||||
<mainClass>com.aisi.wordcount.WordCountDriver</mainClass>
|
||||
<mainClass>com.aisi.accesscount.VisitCountDriver</mainClass>
|
||||
</transformer>
|
||||
</transformers>
|
||||
</configuration>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,22 @@
|
|||
package com.aisi.accesscount;
|
||||
|
||||
import org.apache.hadoop.io.IntWritable;
|
||||
import org.apache.hadoop.io.LongWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.Mapper;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class SortMapper extends Mapper<LongWritable, Text, IntWritable, Text> {
|
||||
@Override
|
||||
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
|
||||
// 数据格式:日期 总访问次数
|
||||
String[] fields = value.toString().split("\t");
|
||||
if (fields.length == 2) {
|
||||
String date = fields[0];
|
||||
int count = Integer.parseInt(fields[1]);
|
||||
// 以访问次数作为 key,日期作为 value
|
||||
context.write(new IntWritable(count), new Text(date));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
package com.aisi.accesscount;
|
||||
|
||||
import org.apache.hadoop.io.IntWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.Reducer;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class SortReducer extends Reducer<IntWritable, Text, Text, IntWritable> {
|
||||
@Override
|
||||
protected void reduce(IntWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
|
||||
for (Text date : values) {
|
||||
context.write(date, key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
package com.aisi.accesscount;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.IntWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.Job;
|
||||
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
|
||||
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
||||
|
||||
public class VisitCountDriver {
|
||||
public static void main(String[] args) throws Exception {
|
||||
Configuration conf = new Configuration();
|
||||
|
||||
// 第一个 Job:统计每日访问次数
|
||||
Job countJob = Job.getInstance(conf, "Visit Count");
|
||||
countJob.setJarByClass(VisitCountDriver.class);
|
||||
countJob.setMapperClass(VisitCountMapper.class);
|
||||
countJob.setReducerClass(VisitCountReducer.class);
|
||||
|
||||
countJob.setMapOutputKeyClass(Text.class);
|
||||
countJob.setMapOutputValueClass(IntWritable.class);
|
||||
countJob.setOutputKeyClass(Text.class);
|
||||
countJob.setOutputValueClass(IntWritable.class);
|
||||
|
||||
FileInputFormat.addInputPath(countJob, new Path(args[0]));
|
||||
Path tempOutput = new Path("temp_output");
|
||||
FileOutputFormat.setOutputPath(countJob, tempOutput);
|
||||
|
||||
boolean countJobSuccess = countJob.waitForCompletion(true);
|
||||
if (!countJobSuccess) {
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
// 第二个 Job:将访问次数进行升序排序
|
||||
Job sortJob = Job.getInstance(conf, "Sort Visits");
|
||||
sortJob.setJarByClass(VisitCountDriver.class);
|
||||
sortJob.setMapperClass(SortMapper.class);
|
||||
sortJob.setReducerClass(SortReducer.class);
|
||||
|
||||
sortJob.setMapOutputKeyClass(IntWritable.class);
|
||||
sortJob.setMapOutputValueClass(Text.class);
|
||||
sortJob.setOutputKeyClass(Text.class);
|
||||
sortJob.setOutputValueClass(IntWritable.class);
|
||||
|
||||
FileInputFormat.addInputPath(sortJob, tempOutput);
|
||||
FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
|
||||
|
||||
System.exit(sortJob.waitForCompletion(true) ? 0 : 1);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
package com.aisi.accesscount;
|
||||
|
||||
import org.apache.hadoop.io.IntWritable;
|
||||
import org.apache.hadoop.io.LongWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.Mapper;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class VisitCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
|
||||
@Override
|
||||
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
|
||||
// 解析输入的每一行数据,假设格式为 "日期 URL"
|
||||
String[] fields = value.toString().split(" ");
|
||||
if (fields.length == 2) {
|
||||
String date = fields[0];
|
||||
context.write(new Text(date), new IntWritable(1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
package com.aisi.accesscount;
|
||||
|
||||
import org.apache.hadoop.io.IntWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.Reducer;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class VisitCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
|
||||
@Override
|
||||
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
|
||||
int sum = 0;
|
||||
for (IntWritable value : values) {
|
||||
sum += value.get();
|
||||
}
|
||||
context.write(key, new IntWritable(sum));
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
package com.aisi.api;
|
||||
|
||||
|
||||
|
||||
public class Test {
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,107 @@
|
|||
import org.apache.commons.compress.utils.IOUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.*;
|
||||
import org.apache.hadoop.io.SequenceFile;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.junit.jupiter.api.AfterAll;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
public class Example {
|
||||
static FileSystem fs = null;
|
||||
@BeforeAll
|
||||
public static void setup() throws IOException {
|
||||
Configuration conf = new Configuration();
|
||||
fs = FileSystem.get(conf);
|
||||
}
|
||||
@AfterAll
|
||||
public static void teardown() throws IOException {
|
||||
if (fs != null) {
|
||||
fs.close();
|
||||
}
|
||||
}
|
||||
@Test
|
||||
public void list() throws IOException {
|
||||
FileStatus[] fileStatuses = fs.listStatus(new Path("/"));
|
||||
for (FileStatus fileStatus : fileStatuses) {
|
||||
System.out.println(fileStatus.getPath());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void mkdir() throws IOException {
|
||||
boolean mkdirsed = fs.mkdirs(new Path("/test"));
|
||||
if (mkdirsed) {
|
||||
System.out.println("mkdirsed");
|
||||
}else
|
||||
System.out.println("mkdir failed");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void delete() throws IOException {
|
||||
boolean deleted = fs.delete(new Path("/test"), true);
|
||||
if (deleted) {
|
||||
System.out.println("delete");
|
||||
}else
|
||||
System.out.println("delete failed");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void upload() throws IOException {
|
||||
fs.copyFromLocalFile(new Path("d:\\tmp\\process.xml"), new Path("/test/process.xml"));
|
||||
System.out.println("upload success");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void download() throws IOException {
|
||||
fs.copyToLocalFile(new Path("/test/process.xml"), new Path("d:\\tmp\\process_download.xml"));
|
||||
System.out.println("download success");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void read() throws IOException {
|
||||
FSDataInputStream fsDataInputStream = fs.open(new Path("/test/process.xml"));
|
||||
new BufferedReader(new InputStreamReader(fsDataInputStream)).lines().forEach(System.out::println);
|
||||
fsDataInputStream.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void write() throws IOException {
|
||||
FSDataOutputStream fsDataOutputStream = fs.create(new Path("/test/process_replication.xml"));
|
||||
FSDataInputStream fsDataInputStream = fs.open(new Path("/test/process.xml"));
|
||||
BufferedReader reader = new BufferedReader(new InputStreamReader(fsDataInputStream, "utf-8"));
|
||||
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, "utf-8"));
|
||||
String line = "";
|
||||
while ((line = reader.readLine()) != null) {
|
||||
writer.write(line);
|
||||
writer.newLine();
|
||||
}
|
||||
writer.close();
|
||||
reader.close();
|
||||
fsDataOutputStream.close();
|
||||
fsDataInputStream.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void read1() throws IOException {
|
||||
Configuration conf=new Configuration();
|
||||
//获取SequenceFile.Reader对象
|
||||
SequenceFile.Reader reader=new SequenceFile.Reader(fs,new Path("/example/part-m-00000"),conf);
|
||||
//获取序列化中使用的键和值类型
|
||||
Text key=new Text();
|
||||
Text value=new Text();
|
||||
//将读取的数据写入janfeb.txt文件
|
||||
BufferedWriter out=new BufferedWriter(new OutputStreamWriter(new FileOutputStream("D:\\tmp\\5-12.txt")));
|
||||
while(reader.next(key,value)){
|
||||
out.write(key.toString()+"\t"+value.toString()+"\r\n");
|
||||
}
|
||||
out.close();
|
||||
reader.close();
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,56 @@
|
|||
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
|
||||
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
||||
import org.apache.hadoop.mapreduce.Job;
|
||||
import org.apache.hadoop.mapreduce.Mapper;
|
||||
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
|
||||
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
|
||||
import org.apache.hadoop.util.GenericOptionsParser;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
|
||||
//代码5-3
|
||||
import java.io.IOException;
|
||||
|
||||
public class SelectData {
|
||||
public static class MyMap extends Mapper<Object, Text, Text, Text> {
|
||||
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
|
||||
String line = value.toString();
|
||||
String arr[] = line.split(",");
|
||||
if (arr[4].contains("2021/1") || arr[4].contains("2021/2")) {
|
||||
context.write(new Text(arr[2]),
|
||||
new Text(arr[4].substring(0, 9)));
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
Configuration conf = new Configuration();
|
||||
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
|
||||
if(otherArgs.length<2){
|
||||
System.err.println("必须输入读取文件路径和输出路径");
|
||||
System.exit(2);
|
||||
}
|
||||
Job job =Job.getInstance(conf,"Select Data");
|
||||
job.setJarByClass(SelectData.class);
|
||||
job.setMapperClass(MyMap.class);
|
||||
job.setOutputKeyClass(Text.class);
|
||||
job.setOutputValueClass(Text.class);
|
||||
|
||||
//设置输入格式
|
||||
job.setInputFormatClass(TextInputFormat.class);
|
||||
//设置输出格式
|
||||
job.setOutputFormatClass(SequenceFileOutputFormat.class);
|
||||
//设置reduce的任务数是0
|
||||
job.setNumReduceTasks(0);
|
||||
for(int i=0;i<otherArgs.length-1;++i){
|
||||
FileInputFormat.addInputPath(job,new Path(otherArgs[i]));
|
||||
}
|
||||
FileOutputFormat.setOutputPath(job,new Path(otherArgs[otherArgs.length-1]));
|
||||
System.exit(job.waitForCompletion(true)?0:1);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
sid1,2022-01-18,500
|
||||
sid1,2022-02-10,500
|
||||
sid1,2022-02-10,200
|
||||
sid1,2022-02-11,600
|
||||
sid1,2022-02-12,400
|
||||
sid1,2022-02-13,200
|
||||
sid1,2022-02-15,100
|
||||
sid1,2022-03-05,180
|
||||
sid1,2022-04-05,280
|
||||
sid1,2022-04-06,220
|
||||
sid2,2022-02-10,100
|
||||
sid2,2022-02-11,100
|
||||
sid2,2022-02-13,100
|
||||
sid2,2022-03-15,100
|
||||
sid2,2022-04-15,100
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
id01,2022-02-28
|
||||
id01,2022-03-01
|
||||
id01,2022-03-01
|
||||
id01,2022-03-02
|
||||
id01,2022-03-05
|
||||
id01,2022-03-04
|
||||
id01,2022-03-06
|
||||
id01,2022-03-07
|
||||
id02,2022-03-01
|
||||
id02,2022-03-02
|
||||
id02,2022-03-03
|
||||
id02,2022-03-06
|
||||
|
|
@ -55,6 +55,13 @@
|
|||
<artifactId>hive-jdbc</artifactId>
|
||||
<version>3.1.2</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Spark Streaming -->
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-streaming_2.12</artifactId>
|
||||
<version>3.1.2</version> <!-- 选择你需要的版本 -->
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<build>
|
||||
<plugins>
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
package com.aisi.spark
|
||||
package com.aisi.sparkSql
|
||||
|
||||
import org.apache.spark.{SparkConf, SparkContext}
|
||||
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package com.aisi.spark
|
||||
package com.aisi.sparkSql
|
||||
|
||||
import org.apache.spark.SparkContext
|
||||
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
log4j.rootLogger=info,console
|
||||
log4j.rootLogger=error,console
|
||||
|
||||
log4j.appender.console=org.apache.log4j.ConsoleAppender
|
||||
log4j.appender.console.target=System.out
|
||||
|
|
|
|||
|
|
@ -0,0 +1,115 @@
|
|||
import java.io.{File, PrintWriter}
|
||||
import java.text.SimpleDateFormat
|
||||
import java.util.{Date, Properties, Random}
|
||||
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
|
||||
|
||||
object MockData {
|
||||
|
||||
def randomNum(index: Int, random: Random): String = {
|
||||
var str = ""
|
||||
for (i <- 0 until index) {
|
||||
str += random.nextInt(10)
|
||||
}
|
||||
str
|
||||
}
|
||||
|
||||
def fillZero(random: Random, num: Int, index: Int): String = {
|
||||
val randomNum = random.nextInt(num)
|
||||
var randomNumStr = randomNum.toString
|
||||
|
||||
if (randomNum < 10) {
|
||||
randomNumStr = ("%0" + index + "d").format(randomNum)
|
||||
}
|
||||
|
||||
randomNumStr
|
||||
}
|
||||
|
||||
def initFile(path: String): PrintWriter = {
|
||||
new PrintWriter(new File(path))
|
||||
}
|
||||
|
||||
def writeDataToFile(pw: PrintWriter, content: String): Unit = {
|
||||
pw.write(content + "\n")
|
||||
pw.flush()
|
||||
}
|
||||
|
||||
def closeFile(pw: PrintWriter): Unit = {
|
||||
pw.close()
|
||||
}
|
||||
|
||||
def initKafkaProducer(): KafkaProducer[String, String] = {
|
||||
val props = new Properties()
|
||||
props.put("bootstrap.servers", "localhost:9092")
|
||||
props.put("acks", "all")
|
||||
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer")
|
||||
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer")
|
||||
|
||||
new KafkaProducer[String, String](props)
|
||||
}
|
||||
|
||||
def writeDataToKafka(producer: KafkaProducer[String, String], content: String): Unit = {
|
||||
producer.send(new ProducerRecord[String, String]("RoadRealTimeLog", content))
|
||||
}
|
||||
|
||||
def closeKafka(producer: KafkaProducer[String, String]): Unit = {
|
||||
producer.close()
|
||||
}
|
||||
|
||||
def mock(): Unit = {
|
||||
val pw = initFile("d:\\tmp\\data.txt")
|
||||
val producer = initKafkaProducer()
|
||||
val random = new Random()
|
||||
val locations = Array("鲁", "京", "豫", "京", "沪", "赣", "津", "深", "黑", "粤")
|
||||
val day = new SimpleDateFormat("yyyy-MM-dd").format(new Date())
|
||||
|
||||
for (i <- 0 until 30) {
|
||||
val car = locations(random.nextInt(10)) + (65 + random.nextInt(26)).asInstanceOf[Char] + randomNum(5, random)
|
||||
var baseActionTime = day + " " + fillZero(random, 24, 2)
|
||||
|
||||
for (j <- 0 until random.nextInt(300)) {
|
||||
|
||||
if (j % 30 == 0 && j != 0) {
|
||||
var nextHour = ""
|
||||
val baseHourParts = baseActionTime.split(" ")
|
||||
|
||||
if (baseHourParts.length > 1) {
|
||||
val baseHour = baseHourParts(1)
|
||||
if (baseHour.startsWith("0")) {
|
||||
if (baseHour.endsWith("9")) {
|
||||
nextHour = "10"
|
||||
} else {
|
||||
nextHour = "0" + (baseHour.substring(1).toInt + 1).toString
|
||||
}
|
||||
} else if (baseHour == "23") {
|
||||
nextHour = fillZero(random, 24, 2)
|
||||
} else {
|
||||
nextHour = (baseHour.toInt + 1).toString
|
||||
}
|
||||
baseActionTime = day + " " + nextHour
|
||||
} else {
|
||||
baseActionTime = day + " 00" // 如果 baseActionTime 无法正确分割,默认使用 00 时
|
||||
}
|
||||
}
|
||||
|
||||
val actionTime = baseActionTime + ":" + fillZero(random, 60, 2) + ":" + fillZero(random, 60, 2)
|
||||
val monitorId = fillZero(random, 10, 4)
|
||||
val speed = random.nextInt(200) + 1
|
||||
val roadId = random.nextInt(50) + 1
|
||||
val cameraId = "0" + randomNum(4, random)
|
||||
val areald = fillZero(random, random.nextInt(8) + 1, 2)
|
||||
|
||||
val content = day + "\t" + monitorId + "\t" + cameraId + "\t" + car + "\t" + actionTime + "\t" + speed + "\t" + roadId + "\t" + areald
|
||||
writeDataToFile(pw, content)
|
||||
writeDataToKafka(producer, content)
|
||||
Thread.sleep(50)
|
||||
}
|
||||
}
|
||||
|
||||
closeFile(pw)
|
||||
closeKafka(producer)
|
||||
}
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
mock()
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
package com.aisi.sparkSql;
|
||||
|
||||
import org.apache.spark.{SparkConf, SparkContext}
|
||||
import org.apache.spark.sql.{DataFrame, SQLContext, SparkSession}
|
||||
import org.apache.spark.sql.expressions.Window
|
||||
import org.apache.spark.sql.functions._
|
||||
|
||||
object A1 {
|
||||
def main(args: Array[String]): Unit = {
|
||||
val conf = new SparkConf()
|
||||
conf.setMaster("local[*]")
|
||||
conf.setAppName("user active")
|
||||
val sc = new SparkContext(conf)
|
||||
val sqlSc = new SQLContext(sc)
|
||||
import sqlSc.implicits._
|
||||
|
||||
val userDF: DataFrame = sc.textFile("Spark/data/user.txt")
|
||||
.map(t => {
|
||||
val line = t.split(",")
|
||||
val strDataTime = line(1).split("-")
|
||||
val year = strDataTime(0)
|
||||
val month = if (strDataTime(1).indexOf(0) == -1) strDataTime(1).substring(1) else strDataTime(1)
|
||||
val day = if (strDataTime(2).indexOf(0) == -1) strDataTime(2).substring(1) else strDataTime(2)
|
||||
(userRecord(line(0), year, month, day))
|
||||
}).toDF()
|
||||
|
||||
userDF.show()
|
||||
|
||||
// 根据用户ID和月份进行分组
|
||||
val groupedDF = userDF.groupBy("uid", "year", "month")
|
||||
.agg(collect_list("day").as("days"))
|
||||
|
||||
// 展开 days 列,并转换为整型
|
||||
val explodedDF = groupedDF
|
||||
.withColumn("day", explode($"days"))
|
||||
.withColumn("day", $"day".cast("int"))
|
||||
|
||||
// 定义窗口函数,按用户、年份、月份排序天数
|
||||
val windowSpec = Window.partitionBy("uid", "year", "month").orderBy("day")
|
||||
|
||||
// 计算相邻天数之间的差值
|
||||
val resultDF = explodedDF
|
||||
.withColumn("prev_day", lag("day", 1).over(windowSpec))
|
||||
.withColumn("day_diff", $"day" - $"prev_day")
|
||||
.withColumn("is_active", when($"day_diff" === 1, 1).otherwise(0))
|
||||
|
||||
resultDF.show()
|
||||
}
|
||||
}
|
||||
|
||||
case class userRecord(uid: String, year: String, month: String, day: String)
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
package com.aisi.sparkSql
|
||||
import org.apache.spark.rdd.RDD
|
||||
import org.apache.spark.{SparkConf, SparkContext}
|
||||
import org.apache.spark.sql.{DataFrame, RelationalGroupedDataset, SQLContext, SparkSession}
|
||||
|
||||
import java.text.DateFormat
|
||||
import java.time.format.DateTimeFormatter
|
||||
|
||||
/**
|
||||
* 计算连续活跃用户的记录
|
||||
*/
|
||||
object A2 {
|
||||
def main(args: Array[String]): Unit = {
|
||||
val conf = new SparkConf()
|
||||
conf.setMaster("local[*]")
|
||||
conf.setAppName("shop count")
|
||||
val sc = new SparkContext(conf)
|
||||
val sqlSc = new SQLContext(sc)
|
||||
import sqlSc.implicits._
|
||||
// sid,dt,money
|
||||
val userDF: DataFrame = sc.textFile("Spark/data/shops.txt")
|
||||
.map(t => {
|
||||
val line = t.split(",")
|
||||
val sid = line(0)
|
||||
val strDataTime = line(1).split("-")
|
||||
val year = strDataTime(0)
|
||||
val month = if (strDataTime(1).indexOf(0) == -1 ) strDataTime(1).substring(1) else strDataTime(1)
|
||||
val day = if (strDataTime(2).indexOf(0) == -1 ) strDataTime(2).substring(1) else strDataTime(2)
|
||||
val money = line(2).toInt
|
||||
shopRecord(sid,year, month, day,money)
|
||||
}).toDF()
|
||||
// userDF.show()
|
||||
userDF.show()
|
||||
// RelationalGroupedDataset: [grouping expressions: [sid: string, month: string], value: [sid: string, year: string ... 3 more fields], type: GroupBy]
|
||||
val dataset = userDF.groupBy("sid","month")
|
||||
println(dataset)
|
||||
dataset.sum().show()
|
||||
|
||||
// val sparkSession = SparkSession.builder().appName("user active").master("local[*]").getOrCreate()
|
||||
// userDF.groupBy("")
|
||||
// userDF.show()
|
||||
}
|
||||
}
|
||||
case class shopRecord (sid:String, year:String,month:String,day:String,money:Int){}
|
||||
|
|
@ -0,0 +1,152 @@
|
|||
package com.aisi.sparkSql;
|
||||
|
||||
import java.io.{File, PrintWriter}
|
||||
import java.text.SimpleDateFormat
|
||||
import java.util.{Date, Properties, Random}
|
||||
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
|
||||
|
||||
object MockData {
|
||||
|
||||
def randomNum(index: Int, random: Random): String = {
|
||||
|
||||
var str = ""
|
||||
|
||||
for (i <- 0 until index) {
|
||||
|
||||
str += random.nextInt(10)
|
||||
|
||||
}
|
||||
str
|
||||
}
|
||||
|
||||
def fillZero(random: Random, num: Int, index: Int): String = {
|
||||
|
||||
val randomNum = random.nextInt(num)
|
||||
|
||||
|
||||
var randomNumStr = randomNum.toString
|
||||
|
||||
if (randomNum < 10) {
|
||||
|
||||
randomNumStr = ("%0" + index + "d").format(randomNum)
|
||||
|
||||
}
|
||||
|
||||
randomNumStr
|
||||
|
||||
}
|
||||
|
||||
def initFile(path: String): PrintWriter = {
|
||||
|
||||
new PrintWriter(new File(path))
|
||||
|
||||
}
|
||||
|
||||
def writeDataToFile(pw: PrintWriter, content: String): Unit = {
|
||||
pw.write(content + "\n")
|
||||
|
||||
pw.flush()
|
||||
}
|
||||
|
||||
def closeFile(pw: PrintWriter): Unit = {
|
||||
|
||||
pw.close()
|
||||
|
||||
}
|
||||
|
||||
def initKafkaProducer(): KafkaProducer[String,String] ={
|
||||
|
||||
val props = new Properties ()
|
||||
|
||||
props.put ("bootstrap.servers", "localhost:9092")
|
||||
|
||||
props.put ("acks", "all")
|
||||
|
||||
props.put ("key.serializer","org.apache.kafka.common.serialization.StringSerializer")
|
||||
|
||||
props.put ("value.serializer","org.apache.kafka.common.serialization.StringSerializer")
|
||||
|
||||
new KafkaProducer[String, String] (props)
|
||||
|
||||
}
|
||||
|
||||
def writeDataToKafka(producer:KafkaProducer[String,String],content:String):Unit = {
|
||||
|
||||
producer.send(new ProducerRecord[String,String]("RoadRealTimeLog",content))
|
||||
|
||||
}
|
||||
|
||||
def closeKafka(producer:KafkaProducer[String,String]):Unit = {
|
||||
|
||||
producer.close()
|
||||
}
|
||||
|
||||
def mock(): Unit = {
|
||||
|
||||
val pw = initFile("路径")
|
||||
|
||||
val producer = initKafkaProducer()
|
||||
|
||||
val random = new Random()
|
||||
|
||||
val locations = Array("鲁","京","豫","京","沪","赣","津","深","黑","粤")
|
||||
|
||||
val day = new SimpleDateFormat ("yyyy-MM-dd").format (new Date())
|
||||
|
||||
for(i<-0 until 3000) {
|
||||
|
||||
val car = locations (random.nextInt (10)) + (65 + random.nextInt (26)).asInstanceOf[Char]+ randomNum(5, random)
|
||||
|
||||
var baseActionTime = day +""+ fillZero(random, 24,2)
|
||||
|
||||
for(j <- 0 until random.nextInt (300)) {
|
||||
|
||||
if (j % 30 == 0&j!=0) {
|
||||
var nextHour = ""
|
||||
|
||||
val baseHour = baseActionTime.split(" ")(1)
|
||||
|
||||
if (baseHour.startsWith("0")) {
|
||||
|
||||
if (baseHour.endsWith("9")) {
|
||||
|
||||
nextHour = "10"
|
||||
|
||||
} else {
|
||||
|
||||
nextHour = "0" + (baseHour.substring(1).toInt + 1).toString
|
||||
}
|
||||
} else if (baseHour == "23") {
|
||||
nextHour = fillZero(random, 24, 2)
|
||||
} else {
|
||||
nextHour = (baseHour.toInt + 1).toString
|
||||
}
|
||||
baseActionTime = day + " " + nextHour
|
||||
}
|
||||
|
||||
val actionTime = baseActionTime + ":" + fillZero(random, 60, 2) + ":" + fillZero(random, 60, 2)
|
||||
|
||||
val monitorId = fillZero(random, 10, 4)
|
||||
|
||||
val speed = random.nextInt (200) +1
|
||||
|
||||
val roadId = random.nextInt (50)+1
|
||||
|
||||
val cameraId= "0"+ randomNum(4, random)
|
||||
|
||||
val areald = fillZero(random, random.nextInt(8) +1, 2)
|
||||
|
||||
val content = day + "\t"+ monitorId +"\t" + cameraId + "\t" + car + "\t" + actionTime + "\t" + speed + "\t" + roadId + "\t"+areald
|
||||
|
||||
writeDataToFile(pw, content)
|
||||
writeDataToKafka(producer,content)
|
||||
Thread.sleep(50)
|
||||
}
|
||||
}
|
||||
closeFile(pw)
|
||||
closeKafka(producer)
|
||||
}
|
||||
def main(args:Array[String]):Unit = {
|
||||
mock()
|
||||
}
|
||||
}
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package com.aisi.spark
|
||||
package com.aisi.sparkSql
|
||||
|
||||
import org.apache.hive.jdbc.HiveDriver
|
||||
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package com.aisi.spark
|
||||
package com.aisi.sparkSql
|
||||
|
||||
import org.apache.spark.sql.expressions.Window
|
||||
import org.apache.spark.sql.{DataFrame, SQLContext}
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package com.aisi.spark
|
||||
package com.aisi.sparkSql
|
||||
|
||||
import org.apache.spark.rdd.RDD
|
||||
import org.apache.spark.sql.expressions.Window
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package com.aisi.spark
|
||||
package com.aisi.sparkSql
|
||||
|
||||
import org.apache.spark.sql.{DataFrame, SQLContext}
|
||||
import org.apache.spark.{SparkConf, SparkContext}
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package com.aisi.spark
|
||||
package com.aisi.sparkSql
|
||||
|
||||
import org.apache.spark.rdd.RDD
|
||||
import org.apache.spark.sql.expressions.Window
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package com.aisi.spark
|
||||
package com.aisi.sparkSql
|
||||
|
||||
import org.apache.spark.rdd.RDD
|
||||
import org.apache.spark.sql.{DataFrame, SQLContext}
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package com.aisi.spark
|
||||
package com.aisi.sparkSql
|
||||
|
||||
|
||||
import org.apache.spark.sql.expressions.Aggregator
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package com.aisi.spark
|
||||
package com.aisi.sparkSql
|
||||
|
||||
import org.apache.spark.rdd.RDD
|
||||
import org.apache.spark.sql.{DataFrame, SparkSession}
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package com.aisi.spark
|
||||
package com.aisi.sparkSql
|
||||
|
||||
import org.apache.spark.sql.SparkSession
|
||||
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
package com.aisi.sparkSreaming
|
||||
|
||||
import org.apache.spark.SparkConf
|
||||
import org.apache.spark.streaming.{Seconds, StreamingContext}
|
||||
|
||||
object TestStreaming {
|
||||
def main(args: Array[String]): Unit = {
|
||||
val conf = new SparkConf()
|
||||
conf.setMaster("local[*]")
|
||||
conf.setAppName("testStreaming")
|
||||
val ssc = new StreamingContext(conf, Seconds(5))
|
||||
val ds = ssc.socketTextStream("localhost", 6666)
|
||||
val ds1 = ds.flatMap(_.split(" "))
|
||||
.map((_, 1))
|
||||
.reduceByKey(_ + _)
|
||||
ds1.print()
|
||||
|
||||
ssc.start()
|
||||
ssc.awaitTermination()
|
||||
}
|
||||
}
|
||||
1
pom.xml
1
pom.xml
|
|
@ -11,6 +11,7 @@
|
|||
<modules>
|
||||
<module>MapReduceTest</module>
|
||||
<module>Spark</module>
|
||||
<module>Kafka</module>
|
||||
</modules>
|
||||
|
||||
<properties>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,30 @@
|
|||
2024-10-24 0009 04378 沪K21792 2024-10-2406:11:57 1 20 00
|
||||
2024-10-24 0007 05341 沪K21792 2024-10-2406:56:05 43 36 03
|
||||
2024-10-24 0003 04301 沪K21792 2024-10-2406:20:17 62 49 00
|
||||
2024-10-24 0006 02290 沪K21792 2024-10-2406:13:45 5 43 02
|
||||
2024-10-24 0006 08637 沪K21792 2024-10-2406:23:29 33 21 04
|
||||
2024-10-24 0003 09518 沪K21792 2024-10-2406:39:04 26 39 00
|
||||
2024-10-24 0008 08202 沪K21792 2024-10-2406:41:44 171 34 05
|
||||
2024-10-24 0002 09586 沪K21792 2024-10-2406:22:43 69 9 01
|
||||
2024-10-24 0004 06210 沪K21792 2024-10-2406:57:41 55 16 00
|
||||
2024-10-24 0003 06017 沪K21792 2024-10-2406:22:51 114 49 01
|
||||
2024-10-24 0006 07356 沪K21792 2024-10-2406:18:28 95 21 01
|
||||
2024-10-24 0001 02689 沪K21792 2024-10-2406:05:04 92 9 03
|
||||
2024-10-24 0001 00143 沪K21792 2024-10-2406:12:22 175 44 00
|
||||
2024-10-24 0005 01871 沪K21792 2024-10-2406:55:25 184 23 03
|
||||
2024-10-24 0004 00887 沪K21792 2024-10-2406:20:25 166 28 07
|
||||
2024-10-24 0001 08940 沪K21792 2024-10-2406:59:38 10 3 04
|
||||
2024-10-24 0008 02450 沪K21792 2024-10-2406:56:13 122 6 03
|
||||
2024-10-24 0005 08706 沪K21792 2024-10-2406:24:02 128 5 01
|
||||
2024-10-24 0004 04151 沪K21792 2024-10-2406:42:51 46 24 05
|
||||
2024-10-24 0006 07990 沪K21792 2024-10-2406:30:50 88 23 01
|
||||
2024-10-24 0000 04371 沪K21792 2024-10-2406:41:15 123 14 00
|
||||
2024-10-24 0002 01350 沪K21792 2024-10-2406:16:00 134 22 00
|
||||
2024-10-24 0006 08116 沪K21792 2024-10-2406:17:44 17 34 06
|
||||
2024-10-24 0000 06980 沪K21792 2024-10-2406:14:28 104 47 03
|
||||
2024-10-24 0009 06814 沪K21792 2024-10-2406:05:47 173 29 00
|
||||
2024-10-24 0007 01068 沪K21792 2024-10-2406:06:57 169 18 00
|
||||
2024-10-24 0007 05685 沪K21792 2024-10-2406:05:57 165 20 02
|
||||
2024-10-24 0005 07818 沪K21792 2024-10-2406:08:37 157 11 00
|
||||
2024-10-24 0004 06503 沪K21792 2024-10-2406:51:26 9 22 03
|
||||
2024-10-24 0008 02831 沪K21792 2024-10-2406:46:46 25 16 03
|
||||
Loading…
Reference in New Issue