HaiNiuProjects/Spark/src/main/scala/com/aisi/sparkSql/TestSparkPureSql.scala

package com.aisi.sparkSql

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.expressions.Window
import org.apache.spark.sql.functions._
import org.apache.spark.sql.{DataFrame, SQLContext}
import org.apache.spark.{SparkConf, SparkContext} // 导入 Window

object TestSparkPureSql {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf()
    conf.setMaster("local[*]")
    conf.setAppName("test sql")
    val sc = new SparkContext(conf)
    val sqlSc = new SQLContext(sc)
    //引入环境信息
    import sqlSc.implicits._
    val rdd: RDD[(Int, String, Int, String)] = sc.textFile("Spark/data/a.txt")
      .map(t => {
        val line = t.split(" ")
        val id = line(0).toInt
        val name = line(1)
        val age = line(2).toInt
        val gender = line(3)
        (id, name, age, gender)
      })
    val df: DataFrame = rdd.toDF("id", "name", "age", "gender")
    df.show() //展示表数据
    df.printSchema() //展示表格字段信息
    //    df.where("age > 20").groupBy("gender").sum("age").show()
    //    df.orderBy($"age".desc).show()
    // 聚合并排序
//    val result = df.groupBy("gender")
//      .agg(
//        count("id").as("count_id"), // 计数 id
//        sum("age").as("sum_age") // 求和 age
//      )
//      .orderBy($"sum_age".desc) // 按 sum_age 降序排序
//    result.show()

    df.createTempView("stu")
    val df1 = sqlSc.sql("select count(1) as gender_count,gender from stu group by gender")
    df1.show()
  }
}