package com.aisi.sparkSql import org.apache.spark.rdd.RDD import org.apache.spark.sql.expressions.Window import org.apache.spark.sql.functions._ import org.apache.spark.sql.{DataFrame, SQLContext} import org.apache.spark.{SparkConf, SparkContext} // 导入 Window object TestSparkPureSql { def main(args: Array[String]): Unit = { val conf = new SparkConf() conf.setMaster("local[*]") conf.setAppName("test sql") val sc = new SparkContext(conf) val sqlSc = new SQLContext(sc) //引入环境信息 import sqlSc.implicits._ val rdd: RDD[(Int, String, Int, String)] = sc.textFile("Spark/data/a.txt") .map(t => { val line = t.split(" ") val id = line(0).toInt val name = line(1) val age = line(2).toInt val gender = line(3) (id, name, age, gender) }) val df: DataFrame = rdd.toDF("id", "name", "age", "gender") df.show() //展示表数据 df.printSchema() //展示表格字段信息 // df.where("age > 20").groupBy("gender").sum("age").show() // df.orderBy($"age".desc).show() // 聚合并排序 // val result = df.groupBy("gender") // .agg( // count("id").as("count_id"), // 计数 id // sum("age").as("sum_age") // 求和 age // ) // .orderBy($"sum_age".desc) // 按 sum_age 降序排序 // result.show() df.createTempView("stu") val df1 = sqlSc.sql("select count(1) as gender_count,gender from stu group by gender") df1.show() } }