1. PageRank
2. Connected Components
3. Triangle Counting
例子:
users.txt
1,BarackObama,Barack Obama2,ladygaga,Goddess of Love3,jeresig,John Resig4,justinbieber,Justin Bieber6,matei_zaharia,Matei Zaharia7,odersky,Martin Odersky8,anonsys
followers.txt
2 14 11 26 37 37 66 73 7
算法实战:
package main.scalaimport org.apache.spark.graphx.GraphLoaderimport org.apache.spark.{SparkConf, SparkContext}object graphx_algorism { System.setProperty("hadoop.home.dir","E:/zhuangji/winutil/") def main(args:Array[String]):Unit={ val conf=new SparkConf().setMaster("local[2]").setAppName("graph_algorism").set("spark.cores.max","10") //set spark.cores.max 可以设置核数 val sc=new SparkContext(conf) // graph初始化,从文件中读 val graph=GraphLoader.edgeListFile(sc,"E:/Java_WS/ScalaDemo/data/followers.txt") val users=sc.textFile("E:/Java_WS/ScalaDemo/data/users.txt").map{ line=>val fields=line.split(",") (fields(0).toLong,fields(1)) } // 1. //PageRank val ranks=graph.pageRank(0.001).vertices // 0.001 是PageRank 的参数,尚未知道是什么意思 ranks.collect.foreach(println) val ranksByUsername=users.join(ranks).map{ case(id,(username,rank))=>(username,rank) } println(ranksByUsername.collect().mkString("\n")) //2. // Connected Components: LianTongTi val cc=graph.connectedComponents().vertices println(cc.collect) val ccByUsername=users.join(cc).map{ case(id,(username,cc))=>(username,cc) } println(ccByUsername.collect().mkString("\n")) //3. //Triangle Count val graphT=GraphLoader.edgeListFile(sc,"E:/Java_WS/ScalaDemo/data/followers.txt",true).partitionBy(PartitionStrategy.RandomVertexCut) val triCounts=graphT.triangleCount().vertices val triCountByUsername=users.join(triCounts).map{case(id,(username,tc))=>(username,tc)} println(triCountByUsername.collect().mkString("\n"))}
posted on 2016-09-28 16:35 阅读( ...) 评论( ...)