update kafka consumer-01
This commit is contained in:
15
Spark/data/shops.txt
Normal file
15
Spark/data/shops.txt
Normal file
@@ -0,0 +1,15 @@
|
||||
sid1,2022-01-18,500
|
||||
sid1,2022-02-10,500
|
||||
sid1,2022-02-10,200
|
||||
sid1,2022-02-11,600
|
||||
sid1,2022-02-12,400
|
||||
sid1,2022-02-13,200
|
||||
sid1,2022-02-15,100
|
||||
sid1,2022-03-05,180
|
||||
sid1,2022-04-05,280
|
||||
sid1,2022-04-06,220
|
||||
sid2,2022-02-10,100
|
||||
sid2,2022-02-11,100
|
||||
sid2,2022-02-13,100
|
||||
sid2,2022-03-15,100
|
||||
sid2,2022-04-15,100
|
||||
12
Spark/data/user.txt
Normal file
12
Spark/data/user.txt
Normal file
@@ -0,0 +1,12 @@
|
||||
id01,2022-02-28
|
||||
id01,2022-03-01
|
||||
id01,2022-03-01
|
||||
id01,2022-03-02
|
||||
id01,2022-03-05
|
||||
id01,2022-03-04
|
||||
id01,2022-03-06
|
||||
id01,2022-03-07
|
||||
id02,2022-03-01
|
||||
id02,2022-03-02
|
||||
id02,2022-03-03
|
||||
id02,2022-03-06
|
||||
@@ -55,6 +55,13 @@
|
||||
<artifactId>hive-jdbc</artifactId>
|
||||
<version>3.1.2</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Spark Streaming -->
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-streaming_2.12</artifactId>
|
||||
<version>3.1.2</version> <!-- 选择你需要的版本 -->
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<build>
|
||||
<plugins>
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
package com.aisi.spark
|
||||
package com.aisi.sparkSql
|
||||
|
||||
import org.apache.spark.{SparkConf, SparkContext}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
package com.aisi.spark
|
||||
package com.aisi.sparkSql
|
||||
|
||||
import org.apache.spark.SparkContext
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
log4j.rootLogger=info,console
|
||||
log4j.rootLogger=error,console
|
||||
|
||||
log4j.appender.console=org.apache.log4j.ConsoleAppender
|
||||
log4j.appender.console.target=System.out
|
||||
|
||||
115
Spark/src/main/scala/MockData.scala
Normal file
115
Spark/src/main/scala/MockData.scala
Normal file
@@ -0,0 +1,115 @@
|
||||
import java.io.{File, PrintWriter}
|
||||
import java.text.SimpleDateFormat
|
||||
import java.util.{Date, Properties, Random}
|
||||
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
|
||||
|
||||
object MockData {
|
||||
|
||||
def randomNum(index: Int, random: Random): String = {
|
||||
var str = ""
|
||||
for (i <- 0 until index) {
|
||||
str += random.nextInt(10)
|
||||
}
|
||||
str
|
||||
}
|
||||
|
||||
def fillZero(random: Random, num: Int, index: Int): String = {
|
||||
val randomNum = random.nextInt(num)
|
||||
var randomNumStr = randomNum.toString
|
||||
|
||||
if (randomNum < 10) {
|
||||
randomNumStr = ("%0" + index + "d").format(randomNum)
|
||||
}
|
||||
|
||||
randomNumStr
|
||||
}
|
||||
|
||||
def initFile(path: String): PrintWriter = {
|
||||
new PrintWriter(new File(path))
|
||||
}
|
||||
|
||||
def writeDataToFile(pw: PrintWriter, content: String): Unit = {
|
||||
pw.write(content + "\n")
|
||||
pw.flush()
|
||||
}
|
||||
|
||||
def closeFile(pw: PrintWriter): Unit = {
|
||||
pw.close()
|
||||
}
|
||||
|
||||
def initKafkaProducer(): KafkaProducer[String, String] = {
|
||||
val props = new Properties()
|
||||
props.put("bootstrap.servers", "localhost:9092")
|
||||
props.put("acks", "all")
|
||||
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer")
|
||||
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer")
|
||||
|
||||
new KafkaProducer[String, String](props)
|
||||
}
|
||||
|
||||
def writeDataToKafka(producer: KafkaProducer[String, String], content: String): Unit = {
|
||||
producer.send(new ProducerRecord[String, String]("RoadRealTimeLog", content))
|
||||
}
|
||||
|
||||
def closeKafka(producer: KafkaProducer[String, String]): Unit = {
|
||||
producer.close()
|
||||
}
|
||||
|
||||
def mock(): Unit = {
|
||||
val pw = initFile("d:\\tmp\\data.txt")
|
||||
val producer = initKafkaProducer()
|
||||
val random = new Random()
|
||||
val locations = Array("鲁", "京", "豫", "京", "沪", "赣", "津", "深", "黑", "粤")
|
||||
val day = new SimpleDateFormat("yyyy-MM-dd").format(new Date())
|
||||
|
||||
for (i <- 0 until 30) {
|
||||
val car = locations(random.nextInt(10)) + (65 + random.nextInt(26)).asInstanceOf[Char] + randomNum(5, random)
|
||||
var baseActionTime = day + " " + fillZero(random, 24, 2)
|
||||
|
||||
for (j <- 0 until random.nextInt(300)) {
|
||||
|
||||
if (j % 30 == 0 && j != 0) {
|
||||
var nextHour = ""
|
||||
val baseHourParts = baseActionTime.split(" ")
|
||||
|
||||
if (baseHourParts.length > 1) {
|
||||
val baseHour = baseHourParts(1)
|
||||
if (baseHour.startsWith("0")) {
|
||||
if (baseHour.endsWith("9")) {
|
||||
nextHour = "10"
|
||||
} else {
|
||||
nextHour = "0" + (baseHour.substring(1).toInt + 1).toString
|
||||
}
|
||||
} else if (baseHour == "23") {
|
||||
nextHour = fillZero(random, 24, 2)
|
||||
} else {
|
||||
nextHour = (baseHour.toInt + 1).toString
|
||||
}
|
||||
baseActionTime = day + " " + nextHour
|
||||
} else {
|
||||
baseActionTime = day + " 00" // 如果 baseActionTime 无法正确分割,默认使用 00 时
|
||||
}
|
||||
}
|
||||
|
||||
val actionTime = baseActionTime + ":" + fillZero(random, 60, 2) + ":" + fillZero(random, 60, 2)
|
||||
val monitorId = fillZero(random, 10, 4)
|
||||
val speed = random.nextInt(200) + 1
|
||||
val roadId = random.nextInt(50) + 1
|
||||
val cameraId = "0" + randomNum(4, random)
|
||||
val areald = fillZero(random, random.nextInt(8) + 1, 2)
|
||||
|
||||
val content = day + "\t" + monitorId + "\t" + cameraId + "\t" + car + "\t" + actionTime + "\t" + speed + "\t" + roadId + "\t" + areald
|
||||
writeDataToFile(pw, content)
|
||||
writeDataToKafka(producer, content)
|
||||
Thread.sleep(50)
|
||||
}
|
||||
}
|
||||
|
||||
closeFile(pw)
|
||||
closeKafka(producer)
|
||||
}
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
mock()
|
||||
}
|
||||
}
|
||||
51
Spark/src/main/scala/com/aisi/sparkSql/A1.scala
Normal file
51
Spark/src/main/scala/com/aisi/sparkSql/A1.scala
Normal file
@@ -0,0 +1,51 @@
|
||||
package com.aisi.sparkSql;
|
||||
|
||||
import org.apache.spark.{SparkConf, SparkContext}
|
||||
import org.apache.spark.sql.{DataFrame, SQLContext, SparkSession}
|
||||
import org.apache.spark.sql.expressions.Window
|
||||
import org.apache.spark.sql.functions._
|
||||
|
||||
object A1 {
|
||||
def main(args: Array[String]): Unit = {
|
||||
val conf = new SparkConf()
|
||||
conf.setMaster("local[*]")
|
||||
conf.setAppName("user active")
|
||||
val sc = new SparkContext(conf)
|
||||
val sqlSc = new SQLContext(sc)
|
||||
import sqlSc.implicits._
|
||||
|
||||
val userDF: DataFrame = sc.textFile("Spark/data/user.txt")
|
||||
.map(t => {
|
||||
val line = t.split(",")
|
||||
val strDataTime = line(1).split("-")
|
||||
val year = strDataTime(0)
|
||||
val month = if (strDataTime(1).indexOf(0) == -1) strDataTime(1).substring(1) else strDataTime(1)
|
||||
val day = if (strDataTime(2).indexOf(0) == -1) strDataTime(2).substring(1) else strDataTime(2)
|
||||
(userRecord(line(0), year, month, day))
|
||||
}).toDF()
|
||||
|
||||
userDF.show()
|
||||
|
||||
// 根据用户ID和月份进行分组
|
||||
val groupedDF = userDF.groupBy("uid", "year", "month")
|
||||
.agg(collect_list("day").as("days"))
|
||||
|
||||
// 展开 days 列,并转换为整型
|
||||
val explodedDF = groupedDF
|
||||
.withColumn("day", explode($"days"))
|
||||
.withColumn("day", $"day".cast("int"))
|
||||
|
||||
// 定义窗口函数,按用户、年份、月份排序天数
|
||||
val windowSpec = Window.partitionBy("uid", "year", "month").orderBy("day")
|
||||
|
||||
// 计算相邻天数之间的差值
|
||||
val resultDF = explodedDF
|
||||
.withColumn("prev_day", lag("day", 1).over(windowSpec))
|
||||
.withColumn("day_diff", $"day" - $"prev_day")
|
||||
.withColumn("is_active", when($"day_diff" === 1, 1).otherwise(0))
|
||||
|
||||
resultDF.show()
|
||||
}
|
||||
}
|
||||
|
||||
case class userRecord(uid: String, year: String, month: String, day: String)
|
||||
44
Spark/src/main/scala/com/aisi/sparkSql/A2.scala
Normal file
44
Spark/src/main/scala/com/aisi/sparkSql/A2.scala
Normal file
@@ -0,0 +1,44 @@
|
||||
package com.aisi.sparkSql
|
||||
import org.apache.spark.rdd.RDD
|
||||
import org.apache.spark.{SparkConf, SparkContext}
|
||||
import org.apache.spark.sql.{DataFrame, RelationalGroupedDataset, SQLContext, SparkSession}
|
||||
|
||||
import java.text.DateFormat
|
||||
import java.time.format.DateTimeFormatter
|
||||
|
||||
/**
|
||||
* 计算连续活跃用户的记录
|
||||
*/
|
||||
object A2 {
|
||||
def main(args: Array[String]): Unit = {
|
||||
val conf = new SparkConf()
|
||||
conf.setMaster("local[*]")
|
||||
conf.setAppName("shop count")
|
||||
val sc = new SparkContext(conf)
|
||||
val sqlSc = new SQLContext(sc)
|
||||
import sqlSc.implicits._
|
||||
// sid,dt,money
|
||||
val userDF: DataFrame = sc.textFile("Spark/data/shops.txt")
|
||||
.map(t => {
|
||||
val line = t.split(",")
|
||||
val sid = line(0)
|
||||
val strDataTime = line(1).split("-")
|
||||
val year = strDataTime(0)
|
||||
val month = if (strDataTime(1).indexOf(0) == -1 ) strDataTime(1).substring(1) else strDataTime(1)
|
||||
val day = if (strDataTime(2).indexOf(0) == -1 ) strDataTime(2).substring(1) else strDataTime(2)
|
||||
val money = line(2).toInt
|
||||
shopRecord(sid,year, month, day,money)
|
||||
}).toDF()
|
||||
// userDF.show()
|
||||
userDF.show()
|
||||
// RelationalGroupedDataset: [grouping expressions: [sid: string, month: string], value: [sid: string, year: string ... 3 more fields], type: GroupBy]
|
||||
val dataset = userDF.groupBy("sid","month")
|
||||
println(dataset)
|
||||
dataset.sum().show()
|
||||
|
||||
// val sparkSession = SparkSession.builder().appName("user active").master("local[*]").getOrCreate()
|
||||
// userDF.groupBy("")
|
||||
// userDF.show()
|
||||
}
|
||||
}
|
||||
case class shopRecord (sid:String, year:String,month:String,day:String,money:Int){}
|
||||
152
Spark/src/main/scala/com/aisi/sparkSql/MockData.scala
Normal file
152
Spark/src/main/scala/com/aisi/sparkSql/MockData.scala
Normal file
@@ -0,0 +1,152 @@
|
||||
package com.aisi.sparkSql;
|
||||
|
||||
import java.io.{File, PrintWriter}
|
||||
import java.text.SimpleDateFormat
|
||||
import java.util.{Date, Properties, Random}
|
||||
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
|
||||
|
||||
object MockData {
|
||||
|
||||
def randomNum(index: Int, random: Random): String = {
|
||||
|
||||
var str = ""
|
||||
|
||||
for (i <- 0 until index) {
|
||||
|
||||
str += random.nextInt(10)
|
||||
|
||||
}
|
||||
str
|
||||
}
|
||||
|
||||
def fillZero(random: Random, num: Int, index: Int): String = {
|
||||
|
||||
val randomNum = random.nextInt(num)
|
||||
|
||||
|
||||
var randomNumStr = randomNum.toString
|
||||
|
||||
if (randomNum < 10) {
|
||||
|
||||
randomNumStr = ("%0" + index + "d").format(randomNum)
|
||||
|
||||
}
|
||||
|
||||
randomNumStr
|
||||
|
||||
}
|
||||
|
||||
def initFile(path: String): PrintWriter = {
|
||||
|
||||
new PrintWriter(new File(path))
|
||||
|
||||
}
|
||||
|
||||
def writeDataToFile(pw: PrintWriter, content: String): Unit = {
|
||||
pw.write(content + "\n")
|
||||
|
||||
pw.flush()
|
||||
}
|
||||
|
||||
def closeFile(pw: PrintWriter): Unit = {
|
||||
|
||||
pw.close()
|
||||
|
||||
}
|
||||
|
||||
def initKafkaProducer(): KafkaProducer[String,String] ={
|
||||
|
||||
val props = new Properties ()
|
||||
|
||||
props.put ("bootstrap.servers", "localhost:9092")
|
||||
|
||||
props.put ("acks", "all")
|
||||
|
||||
props.put ("key.serializer","org.apache.kafka.common.serialization.StringSerializer")
|
||||
|
||||
props.put ("value.serializer","org.apache.kafka.common.serialization.StringSerializer")
|
||||
|
||||
new KafkaProducer[String, String] (props)
|
||||
|
||||
}
|
||||
|
||||
def writeDataToKafka(producer:KafkaProducer[String,String],content:String):Unit = {
|
||||
|
||||
producer.send(new ProducerRecord[String,String]("RoadRealTimeLog",content))
|
||||
|
||||
}
|
||||
|
||||
def closeKafka(producer:KafkaProducer[String,String]):Unit = {
|
||||
|
||||
producer.close()
|
||||
}
|
||||
|
||||
def mock(): Unit = {
|
||||
|
||||
val pw = initFile("路径")
|
||||
|
||||
val producer = initKafkaProducer()
|
||||
|
||||
val random = new Random()
|
||||
|
||||
val locations = Array("鲁","京","豫","京","沪","赣","津","深","黑","粤")
|
||||
|
||||
val day = new SimpleDateFormat ("yyyy-MM-dd").format (new Date())
|
||||
|
||||
for(i<-0 until 3000) {
|
||||
|
||||
val car = locations (random.nextInt (10)) + (65 + random.nextInt (26)).asInstanceOf[Char]+ randomNum(5, random)
|
||||
|
||||
var baseActionTime = day +""+ fillZero(random, 24,2)
|
||||
|
||||
for(j <- 0 until random.nextInt (300)) {
|
||||
|
||||
if (j % 30 == 0&j!=0) {
|
||||
var nextHour = ""
|
||||
|
||||
val baseHour = baseActionTime.split(" ")(1)
|
||||
|
||||
if (baseHour.startsWith("0")) {
|
||||
|
||||
if (baseHour.endsWith("9")) {
|
||||
|
||||
nextHour = "10"
|
||||
|
||||
} else {
|
||||
|
||||
nextHour = "0" + (baseHour.substring(1).toInt + 1).toString
|
||||
}
|
||||
} else if (baseHour == "23") {
|
||||
nextHour = fillZero(random, 24, 2)
|
||||
} else {
|
||||
nextHour = (baseHour.toInt + 1).toString
|
||||
}
|
||||
baseActionTime = day + " " + nextHour
|
||||
}
|
||||
|
||||
val actionTime = baseActionTime + ":" + fillZero(random, 60, 2) + ":" + fillZero(random, 60, 2)
|
||||
|
||||
val monitorId = fillZero(random, 10, 4)
|
||||
|
||||
val speed = random.nextInt (200) +1
|
||||
|
||||
val roadId = random.nextInt (50)+1
|
||||
|
||||
val cameraId= "0"+ randomNum(4, random)
|
||||
|
||||
val areald = fillZero(random, random.nextInt(8) +1, 2)
|
||||
|
||||
val content = day + "\t"+ monitorId +"\t" + cameraId + "\t" + car + "\t" + actionTime + "\t" + speed + "\t" + roadId + "\t"+areald
|
||||
|
||||
writeDataToFile(pw, content)
|
||||
writeDataToKafka(producer,content)
|
||||
Thread.sleep(50)
|
||||
}
|
||||
}
|
||||
closeFile(pw)
|
||||
closeKafka(producer)
|
||||
}
|
||||
def main(args:Array[String]):Unit = {
|
||||
mock()
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
package com.aisi.spark
|
||||
package com.aisi.sparkSql
|
||||
|
||||
import org.apache.hive.jdbc.HiveDriver
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
package com.aisi.spark
|
||||
package com.aisi.sparkSql
|
||||
|
||||
import org.apache.spark.sql.expressions.Window
|
||||
import org.apache.spark.sql.{DataFrame, SQLContext}
|
||||
@@ -1,4 +1,4 @@
|
||||
package com.aisi.spark
|
||||
package com.aisi.sparkSql
|
||||
|
||||
import org.apache.spark.rdd.RDD
|
||||
import org.apache.spark.sql.expressions.Window
|
||||
@@ -1,4 +1,4 @@
|
||||
package com.aisi.spark
|
||||
package com.aisi.sparkSql
|
||||
|
||||
import org.apache.spark.sql.{DataFrame, SQLContext}
|
||||
import org.apache.spark.{SparkConf, SparkContext}
|
||||
@@ -1,4 +1,4 @@
|
||||
package com.aisi.spark
|
||||
package com.aisi.sparkSql
|
||||
|
||||
import org.apache.spark.rdd.RDD
|
||||
import org.apache.spark.sql.expressions.Window
|
||||
@@ -1,4 +1,4 @@
|
||||
package com.aisi.spark
|
||||
package com.aisi.sparkSql
|
||||
|
||||
import org.apache.spark.rdd.RDD
|
||||
import org.apache.spark.sql.{DataFrame, SQLContext}
|
||||
@@ -1,4 +1,4 @@
|
||||
package com.aisi.spark
|
||||
package com.aisi.sparkSql
|
||||
|
||||
|
||||
import org.apache.spark.sql.expressions.Aggregator
|
||||
@@ -1,4 +1,4 @@
|
||||
package com.aisi.spark
|
||||
package com.aisi.sparkSql
|
||||
|
||||
import org.apache.spark.rdd.RDD
|
||||
import org.apache.spark.sql.{DataFrame, SparkSession}
|
||||
@@ -1,4 +1,4 @@
|
||||
package com.aisi.spark
|
||||
package com.aisi.sparkSql
|
||||
|
||||
import org.apache.spark.sql.SparkSession
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
package com.aisi.sparkSreaming
|
||||
|
||||
import org.apache.spark.SparkConf
|
||||
import org.apache.spark.streaming.{Seconds, StreamingContext}
|
||||
|
||||
object TestStreaming {
|
||||
def main(args: Array[String]): Unit = {
|
||||
val conf = new SparkConf()
|
||||
conf.setMaster("local[*]")
|
||||
conf.setAppName("testStreaming")
|
||||
val ssc = new StreamingContext(conf, Seconds(5))
|
||||
val ds = ssc.socketTextStream("localhost", 6666)
|
||||
val ds1 = ds.flatMap(_.split(" "))
|
||||
.map((_, 1))
|
||||
.reduceByKey(_ + _)
|
||||
ds1.print()
|
||||
|
||||
ssc.start()
|
||||
ssc.awaitTermination()
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user