第45课 Spark 2.0实战之Dataset:map、flatMap、mapPartitions、dropDuplicate、coalesce、repartition等
package com.dt.spark200
import org.apache.spark.sql.SparkSession import scala.collection.mutable.ArrayBuffer
object DataSetsops { case class Person(name:String,age:Long) def main(args: Array[String]): Unit = { val spark = SparkSession .builder() .appName("DatasetOps") .master("local") .config("spark.sql.warehouse.dir", "file:///G:/IMFBigDataSpark2016/IMFScalaWorkspace_spark200/Spark200/spark-warehouse") .getOrCreate() import spark.implicits._ import org.apache.spark.sql.functions._ val personDF= spark.read.json("G:\\IMFBigDataSpark2016\\spark-2.0.0-bin-hadoop2.6\\examples\\src\\main\\resources\\people.json") val p