首页 > 代码库 > 根据PV统计出前三的热门板块,并统计出热门板块下的用户数--方式二

根据PV统计出前三的热门板块,并统计出热门板块下的用户数--方式二

 


根据PV统计出前三的热门板块,并统计出热门板块下的用户数--方式二

技术分享
技术分享
技术分享
测试数据
技术分享
java代码
技术分享
  1 package com.hzf.spark.study;
  2 
  3 import java.util.ArrayList;
  4 import java.util.Collections;
  5 import java.util.Comparator;
  6 import java.util.HashMap;
  7 import java.util.Iterator;
  8 import java.util.List;
  9 import java.util.Map;
 10 import java.util.Set;
 11 
 12 import org.apache.spark.SparkConf;
 13 import org.apache.spark.api.java.JavaPairRDD;
 14 import org.apache.spark.api.java.JavaRDD;
 15 import org.apache.spark.api.java.JavaSparkContext;
 16 import org.apache.spark.api.java.function.Function;
 17 import org.apache.spark.api.java.function.PairFlatMapFunction;
 18 import org.apache.spark.api.java.function.PairFunction;
 19 import org.apache.spark.api.java.function.VoidFunction;
 20 import org.apache.spark.broadcast.Broadcast;
 21 
 22 import scala.Tuple2;
 23 
 24 public class HotChannel02 {
 25     public static void main(String[] args) {
 26         SparkConf conf = new SparkConf()
 27                 .setAppName("HotChannel")
 28                 .setMaster("local")
 29                 .set("spark.testing.memory", "2147480000");
 30         JavaSparkContext sc = new JavaSparkContext(conf);
 31         JavaRDD<String> logRDD = sc.textFile("f:/userLog");
 32         String str = "View";
 33         final Broadcast<String> broadcast = sc.broadcast(str);
 34         hotChannel(sc, logRDD, broadcast);
 35     }
 36     private static void hotChannel(JavaSparkContext sc, JavaRDD<String> logRDD, final Broadcast<String> broadcast) {
 37         JavaRDD<String> filteredLogRDD = logRDD.filter(new Function<String, Boolean>() {
 38             
 39             private static final long serialVersionUID = 1L;
 40 
 41             @Override
 42             public Boolean call(String v1) throws Exception {
 43                 String actionParam = broadcast.value();
 44                 String action = v1.split("\t")[5];
 45                 return actionParam.equals(action);
 46             }
 47         });
 48         
 49         JavaPairRDD<String, String> channel2nullRDD = filteredLogRDD.mapToPair(new PairFunction<String, String,String>() {
 50             
 51             private static final long serialVersionUID = 1L;
 52 
 53             @Override
 54             public Tuple2<String, String> call(String val) throws Exception {
 55                 String channel = val.split("\t")[4];
 56                 
 57                 return new Tuple2<String, String>(channel,null);
 58             }
 59         });
 60         Map<String, Object> channelPVMap = channel2nullRDD.countByKey();
 61         Set<String> keySet = channelPVMap.keySet();
 62         List<SortObj> channels  = new ArrayList<>();
 63         for(String channel : keySet){ 
 64             channels.add(new SortObj(channel, Integer.valueOf(channelPVMap.get(channel)+"")));
 65         }
 66         Collections.sort(channels, new Comparator<SortObj>() {
 67 
 68             @Override
 69             public int compare(SortObj o1, SortObj o2) {
 70                 return o2.getValue() - o1.getValue();
 71             }
 72         });
 73         
 74         List<String> hotChannelList = new ArrayList<>();
 75         for (int i = 0; i < 3; i++) {
 76             hotChannelList.add(channels.get(i).getKey());
 77         }
 78         
 79         
 80         final Broadcast<List<String>> hotChannelListBroadcast = sc.broadcast(hotChannelList);
 81         
 82          
 83         JavaRDD<String> filtedRDD = logRDD.filter(new Function<String, Boolean>() {
 84 
 85             @Override
 86             public Boolean call(String v1) throws Exception {
 87                 List<String> hostChannels = hotChannelListBroadcast.value();
 88                 String channel = v1.split("\t")[4];
 89                  String userId = v1.split("\t")[2];
 90                 return hostChannels.contains(channel) && !"null".equals(userId);
 91             }
 92         });
 93         
 94         JavaPairRDD<String, String> user2ChannelRDD = filtedRDD.mapToPair(new PairFunction<String, String,String>() {
 95 
 96             private static final long serialVersionUID = 1L;
 97 
 98             @Override
 99             public Tuple2<String, String> call(String val) throws Exception {
100                 String[] splited = val.split("\t");
101                 String userId = splited[2];
102                 String channel = splited[4];
103                 return new Tuple2<String, String>(userId,channel);
104             }
105         });
106         
107         JavaPairRDD<String, String> userVistChannelsRDD = user2ChannelRDD.groupByKey().flatMapToPair(new PairFlatMapFunction<Tuple2<String,Iterable<String>>, String, String>() {
108 
109             private static final long serialVersionUID = 1L;
110 
111             @Override
112             public Iterable<Tuple2<String, String>> call(Tuple2<String, Iterable<String>> tuple) throws Exception {
113                 String userId = tuple._1;
114                 Iterator<String> iterator = tuple._2.iterator();
115                 Map<String, Integer> channelMap = new HashMap<>();
116                 while (iterator.hasNext()) {
117                     String channel = iterator.next();
118                     Integer count = channelMap.get(channel);
119                     if(count == null)
120                         count = 1;
121                     else
122                         count++;
123                     channelMap.put(channel, count);
124                 }
125                 
126                 List<Tuple2<String, String>> list = new ArrayList<>();
127                 Set<String> keys = channelMap.keySet();
128                 for(String channel : keys){
129                      Integer channelNum  = channelMap.get(channel);
130                      list.add(new Tuple2<String, String>(channel, userId + "_" + channelNum));
131                 }
132                 return list;
133             }
134         });
135         
136         
137         userVistChannelsRDD.groupByKey().foreach(new VoidFunction<Tuple2<String,Iterable<String>>>() {
138 
139             private static final long serialVersionUID = 1L;
140 
141             @Override
142             public void call(Tuple2<String, Iterable<String>> tuple) throws Exception {
143                 String channel = tuple._1;
144                 Iterator<String> iterator = tuple._2.iterator();
145                 List<SortObj> list = new ArrayList<>();
146                 while (iterator.hasNext()) {
147                     String ucs = iterator.next();
148                     String[] splited = ucs.split("_");
149                     String userId = splited[0];
150                     Integer num = Integer.valueOf(splited[1]);
151                     list.add(new SortObj(userId, num));
152                 }
153                 
154                 Collections.sort(list,new Comparator<SortObj>() {
155 
156                     @Override
157                     public int compare(SortObj o1, SortObj o2) {
158                         return o2.getValue() - o1.getValue();
159                     }
160                 });
161                 
162                 System.out.println("HOT_CHANNLE:"+channel);
163                 for(int i = 0 ; i < 3 ; i++){
164                     SortObj sortObj = list.get(i);
165                     System.out.println(sortObj.getKey() + "===" + sortObj.getValue());
166                 }
167             }
168         });
169     }
170 }
View Code

 

result
技术分享

 


 

 

根据PV统计出前三的热门板块,并统计出热门板块下的用户数--方式二