1.目的:
scala实现二次排序(第一列正序,第二列倒序)
2.素材:
twosort.txt
20 2150 5150 5460 5160 5370 5860 6170 5470 5770 5810 55
3.代码
/** * Created by puwenchao on 2016-07-25. */import org.apache.spark.{SparkConf, SparkContext}object twosort { def main(args: Array[String]) { //设置运行环境 val conf = new SparkConf().setAppName("twosort").setMaster("local") val sc = new SparkContext(conf) //引入RDD val file = sc.textFile("e:\\twosort.txt") //排序并二次排序 val rdd = file.map(line => line.split(" ")). map(x => (x(0),x(1))).groupByKey(). sortByKey(true).map(x => (x._1,x._2.toList.sortWith(_>_))) //按照二元组格式输出 val rdd2=rdd.flatMap{ x => val len=x._2.length val arr=new Array[(String,String)](len) for(i <- 0 until len){ arr(i)=(x._1,x._2(i)) } arr } rdd2.foreach(println) sc.stop() }}
4.输出
(10,55) (20,21) (50,54) (50,51) (60,61) (60,53) (60,51) (70,58) (70,58) (70,57) (70,54)