Scala_spark-电商平台离线分析项目-需求一Session访问步长时长占比统计
共十个需求
需求一
代码实现
sessionStat.scala
import java.util.{Date, UUID}
import commons.conf.ConfigurationManager
import commons.constant.Constants
import commons.model.{UserInfo, UserVisitAction}
import commons.utils.{DateUtils, NumberUtils, ParamUtils, StringUtils, ValidUtils}
import net.sf.json.JSONObject
import org.apache.spark.SparkConf
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{SaveMode, SparkSession}
import scala.collection.mutable
/**
* sessionStat
* 算是这部分程序的入口吧
*/
/**
* 新建一个session module ;
* 然后 scala添加进去session module ;project structure -- global lib --scala 2.11.8 右键 addToModules
*/
object SessionStat {
def main(args: Array[String]): Unit = {
// 获取筛选条件
val jsonStr = ConfigurationManager.config.getString(Constants.TASK_PARAMS)
// 获取筛选条件的JsonObject
val taskParam = JSONObject.fromObject(jsonStr)
// 创建全局唯一的主键
val taskUUID = UUID.randomUUID().toString
// 创建SparkConf
val sparkConf = new SparkConf().setMaster("local[*]").setAppName("session")
// 创建SparkSession (包含SparkContext)
val sparkSession = SparkSession.builder().config(sparkConf).enableHiveSupport().getOrCreate()
// 获取原始动作表
// actionRDD:RDD[UserVisitAction]
val actionRDD = getOriActionRDD(sparkSession,taskParam)
// 测试1打印输出 先确认下数据获取成功
// actionRDD.foreach(println(_))
// map-----sessionID2ActionRDD:RDD[(sessionID,UserVieitAction)]
val sessionID2ActionRDD = actionRDD.map(item => (item.session_id, item)) //item就是平时练习的x
// groupByKey-----sessionID2GroupActionRDD: RDD[(sessionID, Iterable[UserVisitAction])]
val session2GroupActionRDD = sessionID2ActionRDD.groupByKey()
session2GroupActionRDD.cache()
//todo:聚合数据
//测试2打印输出
// session2GroupActionRDD.foreach(println(_))
//测试3打印输出
// val userId2AggrInfoRDD = getSessionFullInfo(sparkSession, session2GroupActionRDD)
// userId2AggrInfoRDD.foreach(println(_))
//4打印输出
val sessionId2FullInfoRDD = getSessionFullInfo(sparkSession, session2GroupActionRDD)
sessionId2FullInfoRDD .foreach(println(_))
//至此聚合完成,开始过滤操作
//5 过滤
//todo:过滤
//对自定义累加器进行注册
val sessionAccumulator =new SessionAccumulator
sparkSession.sparkContext.register(sessionAccumulator)
//在过滤过程中完成了累加器的更新操作
//sessionId2FilterRDD:RDD[(sessionId,fullInfo)]是所有符合过滤条件的数组组成的RDD
//getSessionFilteredRDD:实现根据限制条件对session数据进行过滤,并完成累加的更新
val sessionId2FilteredRDD =getSessionFilteredRDD(taskParam,sessionId2FullInfoRDD,sessionAccumulator) //sessionAccumulator作为参数传进去
//s输出
sessionId2FilteredRDD.foreach(println(_))
//6 计算比率 存入mysql数据库
//todo: 计算比率 存入mysql数据库
getSessionRatio(sparkSession,taskUUID,sessionAccumulator.value)
}
//计算比率 存入数据库(步骤6里的)
def getSessionRatio(sparkSession: SparkSession, taskUUID: String, value: mutable.HashMap[String, Int]): Unit = {
//拿到总个数 从累加器统计串中获取值 累加器的key传进去 如果没有的话默认它为1
val session_count: Double = value.getOrElse(Constants.SESSION_COUNT,1).toDouble
//不同范围访问[时长]的session个数
val visit_length_1s_3s = value.getOrElse(Constants.TIME_PERIOD_1s_3s,0) //Spark累加器Key名称常量
val visit_length_4s_6s = value.getOrElse(Constants.TIME_PERIOD_4s_6s,0)
val visit_length_7s_9s = value.getOrElse(Constants.TIME_PERIOD_7s_9s, 0)
val visit_length_10s_30s = value.getOrElse(Constants.TIME_PERIOD_10s_30s, 0)
val visit_length_30s_60s = value.getOrElse(Constants.TIME_PERIOD_30s_60s, 0)
val visit_length_1m_3m = value.getOrElse(Constants.TIME_PERIOD_1m_3m, 0)
val visit_length_3m_10m = value.getOrElse(Constants.TIME_PERIOD_3m_10m, 0)
val visit_length_10m_30m = value.getOrElse(Constants.TIME_PERIOD_10m_30m, 0)
val visit_length_30m = value.getOrElse(Constants.TIME_PERIOD_30m, 0)
//不同访问[步长]的session个数
val step_length_1_3 = value.getOrElse(Constants.STEP_PERIOD_1_3,0)
val step_length_4_6 = value.getOrElse(Constants.STEP_PERIOD_4_6, 0)
val step_length_7_9 = value.getOrElse(Constants.STEP_PERIOD_7_9, 0)
val step_length_10_30 = value.getOrElse(Constants.STEP_PERIOD_10_30, 0)
val step_length_30_60 = value.getOrElse(Constants.STEP_PERIOD_30_60, 0)
val step_length_60 = value.getOrElse(Constants.STEP_PERIOD_60, 0)
//计算各个访问时长和访问步长的范围占比
val visit_length_1s_3s_ratio = NumberUtils.formatDouble(visit_length_1s_3s/session_count,2) //对这个比率保存为两位小数
val visit_length_4s_6s_ratio = NumberUtils.formatDouble(visit_length_4s_6s/session_count,2)
val visit_length_7s_9s_ratio = NumberUtils.formatDouble(visit_length_7s_9s/session_count,2)
val visit_length_10s_30s_ratio = NumberUtils.formatDouble(visit_length_10s_30s/session_count,2)
val visit_length_30s_60s_ratio = NumberUtils.formatDouble(visit_length_30s_60s/session_count,2)
val visit_length_1m_3m_ratio = NumberUtils.formatDouble(visit_length_1m_3m/session_count,2)
val visit_length_3m_10m_ratio = NumberUtils.formatDouble(visit_length_3m_10m/session_count,2)
val visit_length_10m_30m_ratio = NumberUtils.formatDouble(visit_length_10m_30m/session_count,2)
val visit_length_30m_ratio = NumberUtils.formatDouble(visit_length_30m/session_count,2)
val step_length_1_3_ratio = NumberUtils.formatDouble(step_length_1_3 / session_count, 2)
val step_length_4_6_ratio = NumberUtils.formatDouble(step_length_4_6 / session_count, 2)
val step_length_7_9_ratio = NumberUtils.formatDouble(step_length_7_9 / session_count, 2)
val step_length_10_30_ratio = NumberUtils.formatDouble(step_length_10_30 / session_count, 2)
val step_length_30_60_ratio = NumberUtils.formatDouble(step_length_30_60 / session_count, 2)
val step_legth_60_ratio = NumberUtils.formatDouble(step_length_60 / session_count, 2)
//将统计结果封装为Domain对象
val stat = SessionAggrStat(
taskUUID,
session_count.toInt,
visit_length_1s_3s_ratio,
visit_length_4s_6s_ratio,
visit_length_7s_9s_ratio,
visit_length_10s_30s_ratio,
visit_length_30s_60s_ratio,
visit_length_1m_3m_ratio,
visit_length_3m_10m_ratio,
visit_length_10m_30m_ratio,
visit_length_30m_ratio,
step_length_1_3_ratio,
step_length_4_6_ratio,
step_length_7_9_ratio,
step_length_10_30_ratio,
step_length_30_60_ratio,
step_legth_60_ratio
)
val sessionRatioRDD = sparkSession.sparkContext.makeRDD(Array(stat))
import sparkSession.implicits._
sessionRatioRDD.toDF().write
.format("jdbc")
.option("url",ConfigurationManager.config.getString(Constants.JDBC_URL)) //配置类工具 "jdbc.user"
.option("user",ConfigurationManager.config.getString(Constants.JDBC_USER))
.option("password",ConfigurationManager.config.getString(Constants.JDBC_PASSWORD))
.option("dbtable","session_stat_ratio_0416")
.mode(SaveMode.Append)
.save()
}
//封装方法(步骤5里的)
def calculateVisitLength(visitLength:Long,sessionAggrStatAccumulator:SessionAccumulator)={
if(visitLength >= 1 && visitLength <= 3){
sessionAggrStatAccumulator.add(Constants.TIME_PERIOD_1s_3s)
}else if (visitLength >= 4 && visitLength <= 6) {
sessionAggrStatAccumulator.add(Constants.TIME_PERIOD_4s_6s);
} else if (visitLength >= 7 && visitLength <= 9) {
sessionAggrStatAccumulator.add(Constants.TIME_PERIOD_7s_9s);
} else if (visitLength >= 10 && visitLength <= 30) {
sessionAggrStatAccumulator.add(Constants.TIME_PERIOD_10s_30s);
} else if (visitLength > 30 && visitLength <= 60) {
sessionAggrStatAccumulator.add(Constants.TIME_PERIOD_30s_60s);
} else if (visitLength > 60 && visitLength <= 180) {
sessionAggrStatAccumulator.add(Constants.TIME_PERIOD_1m_3m);
} else if (visitLength > 180 && visitLength <= 600) {
sessionAggrStatAccumulator.add(Constants.TIME_PERIOD_3m_10m);
} else if (visitLength > 600 && visitLength <= 1800) {
sessionAggrStatAccumulator.add(Constants.TIME_PERIOD_10m_30m);
} else if (visitLength > 1800) {
sessionAggrStatAccumulator.add(Constants.TIME_PERIOD_30m);
}
}
//封装方法(步骤5里的)
def calculateStepLength(stepLength: Long, sessionAggrStatAccumulator: SessionAccumulator) = {
if (stepLength >= 1 && stepLength <= 3) {
sessionAggrStatAccumulator.add(Constants.STEP_PERIOD_1_3);
} else if (stepLength >= 4 && stepLength <= 6) {
sessionAggrStatAccumulator.add(Constants.STEP_PERIOD_4_6);
} else if (stepLength >= 7 && stepLength <= 9) {
sessionAggrStatAccumulator.add(Constants.STEP_PERIOD_7_9);
} else if (stepLength >= 10 && stepLength <= 30) {
sessionAggrStatAccumulator.add(Constants.STEP_PERIOD_10_30);
} else if (stepLength > 30 && stepLength <= 60) {
sessionAggrStatAccumulator.add(Constants.STEP_PERIOD_30_60);
} else if (stepLength > 60) {
sessionAggrStatAccumulator.add(Constants.STEP_PERIOD_60);
}
}
/**
* step5 过滤
*
* @param taskParam
* @param sessionId2FullInfoRDD
* @return
*/
def getSessionFilteredRDD(taskParam: JSONObject, sessionId2FullInfoRDD: RDD[(String, String)],sessionAccumulator:SessionAccumulator) ={
//根据task.params.json{startDate:"2018-08-01",......}
val startAge = ParamUtils.getParam(taskParam,Constants.PARAM_START_AGE)
val endAge = ParamUtils.getParam(taskParam,Constants.PARAM_END_AGE)
val professinals = ParamUtils.getParam(taskParam,Constants.PARAM_PROFESSIONALS)
val cities = ParamUtils.getParam(taskParam,Constants.PARAM_CITIES)
val sex = ParamUtils.getParam(taskParam,Constants.PARAM_SEX)
val keywords = ParamUtils.getParam(taskParam,Constants.PARAM_KEYWORDS)
val categoryIds = ParamUtils.getParam(taskParam,Constants.PARAM_CATEGORY_IDS)
//先判断 再拼接
var filterInfo =
(if(startAge != null) Constants.PARAM_START_AGE + "=" + startAge +"|" else "") + //Contants是一个常量接口 所以可以这样用
(if(endAge != null) Constants.PARAM_END_AGE + "=" + endAge + "|" else "")+
(if(professinals != null) Constants.PARAM_PROFESSIONALS+"="+professinals+"|" else "")+
(if(cities != null) Constants.PARAM_CITIES + "=" +cities else "") +
(if(sex != null) Constants.PARAM_SEX + "=" + sex +"|" else "")+
(if(keywords != null) Constants.PARAM_KEYWORDS +"="+ keywords +"|" else "")+
(if(categoryIds != null) Constants.PARAM_CATEGORY_IDS + "=" +categoryIds+"|" else "")
if(filterInfo.endsWith("\\|")){ //后面有些字段会是null 就空了
filterInfo=filterInfo.substring(0,filterInfo.length-1)
}
sessionId2FullInfoRDD.filter{
case(sessionId,fullInfo) =>
var success = true
if(!ValidUtils.between(fullInfo,Constants.FIELD_AGE,filterInfo,Constants.PARAM_START_AGE,Constants.PARAM_END_AGE)){ //数据 数据字段 参数 参数字段 数据字段和参数字段是一样的
success = false
}else if(!ValidUtils.in(fullInfo,Constants.FIELD_PROFESSIONAL,filterInfo,Constants.FIELD_PROFESSIONAL)){
success = false
}else if(!ValidUtils.equal(fullInfo,Constants.FIELD_SEX,filterInfo,Constants.FIELD_SEX)){
success = false
}else if(!ValidUtils.in(fullInfo,Constants.FIELD_SEARCH_KEYWORDS,filterInfo,Constants.PARAM_KEYWORDS)){
success = false
}else if(!ValidUtils.in(fullInfo,Constants.FIELD_CLICK_CATEGORY_IDS,filterInfo,Constants.PARAM_CATEGORY_IDS)){
success = false
}
//以下部分是指在过滤基础上增加了一步累加器的更新
if(success){ //sessssionid里面的每一个字段都符合限制条件的要求
//自动维护了我们的key 会自动在key对应的值上加一
sessionAccumulator.add(Constants.SESSION_COUNT)
val visitLength = StringUtils.getFieldFromConcatString(fullInfo,"\\|",Constants.FIELD_VISIT_LENGTH).toLong
val stepLength = StringUtils.getFieldFromConcatString(fullInfo,"\\|",Constants.FIELD_STEP_LENGTH).toLong
// if(visitLength>=1 && visitLength <= 3){ //封装成方法
// sessionAccumulator.add(Constants.TIME_PERIOD_1s_3s)
// }else if(visitLength>=4 && visitLength <= 6)
// sessionAccumulator.add(Constants.TIME_PERIOD_4s_6s)
calculateVisitLength(visitLength,sessionAccumulator)
calculateStepLength(stepLength,sessionAccumulator)
}
success //filter必须要有一个布尔类型的返回
}
}
/**
* step2+3+4
* 原数据
* sparkSession,(4bc33302668f4331aba52c8328a781c7,CompactBuffer(UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,6,2019-10-10 12:45:35,联想笔记本,-1,-1,null,null,null,null,0), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,5,2019-10-10 12:46:24,吸尘器,-1,-1,null,null,null,null,5), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,2,2019-10-10 12:18:10,保温杯,-1,-1,null,null,null,null,5), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,8,2019-10-10 12:58:49,null,-1,-1,59,20,null,null,3), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,3,2019-10-10 12:03:04,null,-1,-1,59,78,null,null,5), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,4,2019-10-10 12:02:11,null,-1,-1,22,56,null,null,1), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,2,2019-10-10 12:31:47,null,-1,-1,98,64,null,null,6), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,3,2019-10-10 12:40:32,null,-1,-1,14,89,null,null,8), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,5,2019-10-10 12:47:58,null,-1,-1,null,null,36,75,9), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,3,2019-10-10 12:28:19,null,-1,-1,96,79,null,null,7), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,8,2019-10-10 12:31:50,null,83,3,null,null,null,null,3), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,7,2019-10-10 12:11:05,机器学习,-1,-1,null,null,null,null,5), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,6,2019-10-10 12:58:18,null,-1,-1,null,null,66,25,0), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,9,2019-10-10 12:14:06,null,24,97,null,null,null,null,9), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,4,2019-10-10 12:56:48,null,-1,-1,32,46,null,null,9), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,1,2019-10-10 12:50:12,null,33,84,null,null,null,null,2), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,7,2019-10-10 12:32:32,null,-1,-1,4,15,null,null,5), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,7,2019-10-10 12:02:53,吸尘器,-1,-1,null,null,null,null,5), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,0,2019-10-10 12:04:02,null,-1,-1,44,80,null,null,4), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,1,2019-10-10 12:36:43,null,-1,-1,null,null,60,54,3), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,8,2019-10-10 12:58:52,华为手机,-1,-1,null,null,null,null,5), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,6,2019-10-10 12:54:11,洗面奶,-1,-1,null,null,null,null,5), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,6,2019-10-10 12:09:19,null,42,67,null,null,null,null,2), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,8,2019-10-10 12:44:23,null,44,17,null,null,null,null,7), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,6,2019-10-10 12:09:38,卫生纸,-1,-1,null,null,null,null,8), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,1,2019-10-10 12:05:18,null,18,74,null,null,null,null,1), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,2,2019-10-10 12:24:06,卫生纸,-1,-1,null,null,null,null,4), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,6,2019-10-10 12:32:27,null,74,85,null,null,null,null,8), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,9,2019-10-10 12:18:19,null,-1,-1,null,null,12,89,4), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,3,2019-10-10 12:26:21,保温杯,-1,-1,null,null,null,null,6), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,5,2019-10-10 12:41:48,联想笔记本,-1,-1,null,null,null,null,2), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,7,2019-10-10 12:04:17,null,12,79,null,null,null,null,5), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,0,2019-10-10 12:17:24,null,-1,-1,93,98,null,null,1), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,7,2019-10-10 12:27:52,null,-1,-1,null,null,58,44,8), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,9,2019-10-10 12:34:27,null,-1,-1,null,null,52,69,7), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,3,2019-10-10 12:16:49,null,-1,-1,null,null,13,90,1), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,4,2019-10-10 12:37:47,null,-1,-1,86,67,null,null,7), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,8,2019-10-10 12:10:56,null,-1,-1,null,null,62,31,7), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,0,2019-10-10 12:01:41,null,4,0,null,null,null,null,7), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,3,2019-10-10 12:48:00,null,-1,-1,87,86,null,null,6)))
*
* step3 我想要得到的聚合信息数据
* Session_Id|Search_Keywords|Click_Categary_Id|Visit_Length|Step_Length|Start_Time
*
* step4 我想要得到的聚合信息
* Session_Id|Search_Keywords|Click_Categary_Id|Visit_Length|Step_Length|Start_Time|Age|Professional|Sex|City
*
* @param sparkSession
* @param session2GroupActionRDD
*/
def getSessionFullInfo(sparkSession: SparkSession, session2GroupActionRDD: RDD[(String, Iterable[UserVisitAction])]) = { //这里的unit要去掉 否则返回的值会被认为unit
//step3
//userId2AggrInfoRDD:RDD[(userId,aggrInfo)] key:userId value:agrInfo
val userId2AggrInfoRDD = session2GroupActionRDD.map{
case (sessionId,iterableAction)=>
var userId = -1L
var startTime:Date = null
var endTime:Date = null
var stepLength = 0
val searchKeywords = new StringBuffer("")
val clickCategories = new StringBuffer("")
for(action <- iterableAction){
//userId
if(userId == -1L){
userId = action.user_id
}
//actionTime 类似在逐步扩大时间区间[null,null]---[14:22:22,14:26:30]
val actionTime = DateUtils.parseTime(action.action_time) //点击行为的时间点
if(startTime == null || startTime.after(actionTime)){
startTime = actionTime
}
if(endTime == null || endTime.before(actionTime)){
endTime = actionTime
}
//searchKeyword
val searchKeyword = action.search_keyword
if(StringUtils.isNotEmpty(searchKeyword) && !searchKeywords.toString.contains(searchKeyword)){
searchKeywords.append(searchKeyword+",")
}
//clickCategoryId 某一个商品品类的ID
val clickCategoryId = action.click_category_id
if(clickCategoryId != -1 && !clickCategories.toString.contains(clickCategoryId)){
clickCategories.append(clickCategoryId+",")
}
//stepLength
stepLength += 1
}
//目的:去除","
//searchKeywords.toString.substring(0,searchKeywords.toString.length)
val searchKw = StringUtils.trimComma(searchKeywords.toString) //截断字符串两侧的逗号
val clickCg = StringUtils.trimComma(clickCategories.toString)
val visitLength = (endTime.getTime - startTime.getTime) /1000
val aggrInfo = Constants.FIELD_SESSION_ID+"="+sessionId+"|"+
Constants.FIELD_SEARCH_KEYWORDS+"="+searchKw+"|"+
Constants.FIELD_CLICK_CATEGORY_IDS+"="+clickCg+"|"+
Constants.FIELD_VISIT_LENGTH+"="+visitLength+"|"+
Constants.FIELD_STEP_LENGTH+"="+stepLength+"|"+
Constants.FIELD_START_TIME+"="+DateUtils.formatDate(startTime) //格式化日期(yyyy-MM-dd)
//(sessionId,aggrInfo)
//返回一下 因为下一步要去脸公共的user表,user表中是没有sessionId字段的
//所以 找到公共的字段 userId
(userId,aggrInfo)
}
//返回一下(测试3输出)
// userId2AggrInfoRDD
//step4 再把已经获取到的userId2AggrInfoRDD和userinfo表做一次map,得到一个完整信息的RDD
val sql = "select * from user_info"
import sparkSession.implicits._
//userId2InfoIdRDD: RDD[(userId, UserInfo)]
val userId2InfoIdRDD: RDD[(Long, UserInfo)] = sparkSession.sql(sql).as[UserInfo].rdd.map(item => (item.user_id, item)) //UserInfo 用户信息表样例类
val sessionId2FullInfoRDD = userId2AggrInfoRDD.join(userId2InfoIdRDD).map{
case (userId,(aggrInfo,userInfo)) =>
val age = userInfo.age
val professional = userInfo.professional
val sex = userInfo.sex
val city = userInfo.city
val fullInfo = aggrInfo +"|"+
Constants.FIELD_AGE + "=" + age +"|"+
Constants.FIELD_PROFESSIONAL +"="+professional+"|"+
Constants.FIELD_SEX + "=" +sex+"|"+
Constants.FIELD_CITY+"="+city
//之前是为了聚合用的userId 先在聚合完成 就用回sessionId
val sessionId = StringUtils.getFieldFromConcatString(aggrInfo,"\\|",Constants.FIELD_SESSION_ID) //从拼接的字符串中提取字段
//返回
(sessionId,fullInfo)
}
//4 返回完整数据
sessionId2FullInfoRDD
}
def getOriActionRDD(sparkSession: SparkSession, taskParam: JSONObject) = {
// 从JSON对象中提取参数 ParamUtils.getParam
val startDate = ParamUtils.getParam(taskParam,Constants.PARAM_START_DATE)
val endDate = ParamUtils.getParam(taskParam,Constants.PARAM_END_DATE)
val sql = "select * from user_visit_action where date>='" + startDate + "' and date<='" + endDate + "'"
// !!!隐式转换
import sparkSession.implicits._
sparkSession.sql(sql).as[UserVisitAction].rdd
}
}
/*
step1 确认数据获取成功
actionRDD.foreach(println(_))
UserVisitAction(2019-10-10,21,d95c6c3cd7164e45ad483525b2132577,3,2019-10-10 16:13:29,null,-1,-1,55,62,null,null,2)
UserVisitAction(2019-10-10,21,d95c6c3cd7164e45ad483525b2132577,5,2019-10-10 16:57:26,null,3,93,null,null,null,null,8)
UserVisitAction(2019-10-10,21,d95c6c3cd7164e45ad483525b2132577,2,2019-10-10 16:34:28,null,-1,-1,null,null,48,35,1)
UserVisitAction(2019-10-10,21,d95c6c3cd7164e45ad483525b2132577,4,2019-10-10 16:07:18,洗面奶,-1,-1,null,null,null,null,8)
UserVisitAction(2019-10-10,21,d95c6c3cd7164e45ad483525b2132577,6,2019-10-10 16:46:07,null,-1,-1,54,14,null,null,6)
UserVisitAction(2019-10-10,21,d95c6c3cd7164e45ad483525b2132577,2,2019-10-10 16:07:43,苹果,-1,-1,null,null,null,null,0)
UserVisitAction(2019-10-10,21,a69c2dc66dfb47a796c9dd0ac15bd623,7,2019-10-10 17:18:43,null,-1,-1,null,null,35,51,6)
UserVisitAction(2019-10-10,21,a69c2dc66dfb47a7
step2 斧子形的 以session为key 以CompactBuffer(一条条action)为value
session2GroupActionRDD.foreach(println(_))
(4bc33302668f4331aba52c8328a781c7,CompactBuffer(UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,6,2019-10-10 12:45:35,联想笔记本,-1,-1,null,null,null,null,0), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,5,2019-10-10 12:46:24,吸尘器,-1,-1,null,null,null,null,5), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,2,2019-10-10 12:18:10,保温杯,-1,-1,null,null,null,null,5), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,8,2019-10-10 12:58:49,null,-1,-1,59,20,null,null,3), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,3,2019-10-10 12:03:04,null,-1,-1,59,78,null,null,5), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,4,2019-10-10 12:02:11,null,-1,-1,22,56,null,null,1), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,2,2019-10-10 12:31:47,null,-1,-1,98,64,null,null,6), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,3,2019-10-10 12:40:32,null,-1,-1,14,89,null,null,8), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,5,2019-10-10 12:47:58,null,-1,-1,null,null,36,75,9), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,3,2019-10-10 12:28:19,null,-1,-1,96,79,null,null,7), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,8,2019-10-10 12:31:50,null,83,3,null,null,null,null,3), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,7,2019-10-10 12:11:05,机器学习,-1,-1,null,null,null,null,5), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,6,2019-10-10 12:58:18,null,-1,-1,null,null,66,25,0), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,9,2019-10-10 12:14:06,null,24,97,null,null,null,null,9), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,4,2019-10-10 12:56:48,null,-1,-1,32,46,null,null,9), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,1,2019-10-10 12:50:12,null,33,84,null,null,null,null,2), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,7,2019-10-10 12:32:32,null,-1,-1,4,15,null,null,5), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,7,2019-10-10 12:02:53,吸尘器,-1,-1,null,null,null,null,5), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,0,2019-10-10 12:04:02,null,-1,-1,44,80,null,null,4), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,1,2019-10-10 12:36:43,null,-1,-1,null,null,60,54,3), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,8,2019-10-10 12:58:52,华为手机,-1,-1,null,null,null,null,5), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,6,2019-10-10 12:54:11,洗面奶,-1,-1,null,null,null,null,5), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,6,2019-10-10 12:09:19,null,42,67,null,null,null,null,2), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,8,2019-10-10 12:44:23,null,44,17,null,null,null,null,7), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,6,2019-10-10 12:09:38,卫生纸,-1,-1,null,null,null,null,8), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,1,2019-10-10 12:05:18,null,18,74,null,null,null,null,1), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,2,2019-10-10 12:24:06,卫生纸,-1,-1,null,null,null,null,4), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,6,2019-10-10 12:32:27,null,74,85,null,null,null,null,8), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,9,2019-10-10 12:18:19,null,-1,-1,null,null,12,89,4), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,3,2019-10-10 12:26:21,保温杯,-1,-1,null,null,null,null,6), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,5,2019-10-10 12:41:48,联想笔记本,-1,-1,null,null,null,null,2), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,7,2019-10-10 12:04:17,null,12,79,null,null,null,null,5), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,0,2019-10-10 12:17:24,null,-1,-1,93,98,null,null,1), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,7,2019-10-10 12:27:52,null,-1,-1,null,null,58,44,8), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,9,2019-10-10 12:34:27,null,-1,-1,null,null,52,69,7), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,3,2019-10-10 12:16:49,null,-1,-1,null,null,13,90,1), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,4,2019-10-10 12:37:47,null,-1,-1,86,67,null,null,7), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,8,2019-10-10 12:10:56,null,-1,-1,null,null,62,31,7), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,0,2019-10-10 12:01:41,null,4,0,null,null,null,null,7), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,3,2019-10-10 12:48:00,null,-1,-1,87,86,null,null,6)))
(209cff604b4a4d5db0e53ab7adf80946,CompactBuffer(UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,9,2019-10-10 20:21:41,null,61,73,null,null,null,null,5), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,1,2019-10-10 20:10:55,保温杯,-1,-1,null,null,null,null,3), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,5,2019-10-10 20:26:47,机器学习,-1,-1,null,null,null,null,6), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,4,2019-10-10 20:07:52,null,-1,-1,56,96,null,null,1), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,5,2019-10-10 20:46:01,null,36,56,null,null,null,null,3), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,6,2019-10-10 20:45:58,null,55,62,null,null,null,null,1), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,3,2019-10-10 20:46:12,null,41,4,null,null,null,null,8), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,2,2019-10-10 20:48:00,null,-1,-1,18,85,null,null,6), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,0,2019-10-10 20:36:57,null,69,16,null,null,null,null,3), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,4,2019-10-10 20:32:34,null,-1,-1,null,null,12,62,4), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,7,2019-10-10 20:53:19,null,-1,-1,19,44,null,null,2), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,7,2019-10-10 20:07:54,null,-1,-1,26,16,null,null,2), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,5,2019-10-10 20:10:14,null,-1,-1,null,null,85,36,7), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,5,2019-10-10 20:35:18,null,92,79,null,null,null,null,4), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,1,2019-10-10 20:09:46,卫生纸,-1,-1,null,null,null,null,2), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,6,2019-10-10 20:52:57,null,35,53,null,null,null,null,1), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,2,2019-10-10 20:31:08,null,34,58,null,null,null,null,4), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,0,2019-10-10 20:21:42,null,-1,-1,14,97,null,null,3), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,0,2019-10-10 20:46:30,机器学习,-1,-1,null,null,null,null,3), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,7,2019-10-10 20:38:57,null,-1,-1,null,null,63,7,1), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,5,2019-10-10 20:34:19,洗面奶,-1,-1,null,null,null,null,4), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,2,2019-10-10 20:36:24,小龙虾,-1,-1,null,null,null,null,8), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,1,2019-10-10 20:38:10,null,-1,-1,59,0,null,null,2), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,4,2019-10-10 20:50:45,null,0,13,null,null,null,null,9), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,5,2019-10-10 20:33:28,null,-1,-1,null,null,13,37,6), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,2,2019-10-10 20:33:56,卫生纸,-1,-1,null,null,null,null,2), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,8,2019-10-10 20:10:03,null,58,72,null,null,null,null,6), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,2,2019-10-10 20:32:36,卫生纸,-1,-1,null,null,null,null,2), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,5,2019-10-10 20:26:17,null,-1,-1,12,67,null,null,7), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,5,2019-10-10 20:54:41,null,-1,-1,93,17,null,null,8), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,5,2019-10-10 20:07:23,洗面奶,-1,-1,null,null,null,null,0), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,6,2019-10-10 20:10:33,Lamer,-1,-1,null,null,null,null,6), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,6,2019-10-10 20:51:04,null,-1,-1,null,null,64,13,6), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,6,2019-10-10 20:00:53,洗面奶,-1,-1,null,null,null,null,8), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,8,2019-10-10 20:18:09,null,65,57,null,null,null,null,3), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,4,2019-10-10 20:25:05,null,-1,-1,58,73,null,null,4), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,8,2019-10-10 20:27:06,null,-1,-1,null,null,73,73,0), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,2,2019-10-10 20:36:19,null,-1,-1,null,null,66,43,4), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,4,2019-10-10 20:56:23,null,34,60,null,null,null,null,9), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,9,2019-10-10 20:09:31,null,12,14,null,null,null,null,9), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,6,2019-10-10 20:29:15,null,55,56,null,null,null,null,5), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,2,2019-10-10 20:07:22,null,27,28,null,null,null,null,0), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,1,2019-10-10 20:37:16,null,-1,-1,null,null,71,2,9), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,7,2019-10-10 20:04:34,小龙虾,-1,-1,null,null,null,null,8), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,5,2019-10-10 20:36:35,null,91,36,null,null,null,null,4), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,0,2019-10-10 20:25:52,null,68,92,null,null,null,null,1), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,5,2019-10-10 20:43:40,null,-1,-1,25,27,null,null,1), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,5,2019-10-10 20:38:49,null,-1,-1,null,null,84,72,1), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,5,2019-10-10 20:31:15,卫生纸,-1,-1,null,null,null,null,6), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,6,2019-10-10 20:35:02,null,-1,-1,1,0,null,null,7), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,7,2019-10-10 20:38:16,苹果,-1,-1,null,null,null,null,2), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,2,2019-10-10 20:33:43,null,-1,-1,18,50,null,null,0), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,2,2019-10-10 20:32:28,null,-1,-1,null,null,43,28,0), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,6,2019-10-10 20:29:36,苹果,-1,-1,null,null,null,null,9), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,8,2019-10-10 20:29:37,Lamer,-1,-1,null,null,null,null,5), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,7,2019-10-10 20:10:40,卫生纸,-1,-1,null,null,null,null,6), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,3,2019-10-10 20:17:40,null,-1,-1,19,16,null,null,0), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,2,2019-10-10 20:44:17,保温杯,-1,-1,null,null,null,null,2), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,6,2019-10-10 20:14:53,吸尘器,-1,-1,null,null,null,null,4), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,0,2019-10-10 20:39:44,null,35,81,null,null,null,null,9), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,5,2019-10-10 20:55:45,null,-1,-1,null,null,54,95,8), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,7,2019-10-10 20:08:52,null,19,37,null,null,null,null,6), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,0,2019-10-10 20:03:50,null,-1,-1,null,null,94,37,8), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,1,2019-10-10 20:09:21,null,25,77,null,null,null,null,8), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,7,2019-10-10 20:27:20,null,-1,-1,63,58,null,null,2), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,0,2019-10-10 20:09:35,保温杯,-1,-1,null,null,null,null,2), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,9,2019-10-10 20:17:39,null,3,75,null,null,null,null,9), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,1,2019-10-10 20:15:48,null,74,63,null,null,null,null,0), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,3,2019-10-10 20:22:16,null,-1,-1,31,68,null,null,4), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,9,2019-10-10 20:28:45,卫生纸,-1,-1,null,null,null,null,7)))
(51fb751de8fe466798e28f3b5f650570,CompactBuffer(UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,2,2019-10-10 18:34:30,null,50,99,null,null,null,null,9), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,6,2019-10-10 18:45:27,null,81,37,null,null,null,null,4), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,1,2019-10-10 18:12:23,null,-1,-1,null,null,44,16,8), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,2,2019-10-10 18:47:48,Lamer,-1,-1,null,null,null,null,9), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,3,2019-10-10 18:04:21,null,55,49,null,null,null,null,5), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,4,2019-10-10 18:43:34,null,53,82,null,null,null,null,0), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,2,2019-10-10 18:20:56,null,83,53,null,null,null,null,1), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,2,2019-10-10 18:11:11,null,-1,-1,null,null,23,65,2), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,7,2019-10-10 18:41:37,保温杯,-1,-1,null,null,null,null,6), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,1,2019-10-10 18:37:34,null,-1,-1,52,69,null,null,9), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,0,2019-10-10 18:08:44,null,71,24,null,null,null,null,1), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,9,2019-10-10 18:01:18,null,89,84,null,null,null,null,8), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,0,2019-10-10 18:31:10,null,-1,-1,7,14,null,null,5), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,8,2019-10-10 18:29:04,null,69,63,null,null,null,null,3), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,3,2019-10-10 18:58:29,null,91,29,null,null,null,null,0), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,6,2019-10-10 18:01:36,机器学习,-1,-1,null,null,null,null,1), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,7,2019-10-10 18:01:12,null,-1,-1,62,48,null,null,0), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,3,2019-10-10 18:22:21,null,57,14,null,null,null,null,6), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,7,2019-10-10 18:05:57,null,6,21,null,null,null,null,0), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,0,2019-10-10 18:19:25,null,-1,-1,null,null,25,93,6), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,1,2019-10-10 18:02:11,null,-1,-1,5,81,null,null,7), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,2,2019-10-10 18:42:42,null,-1,-1,null,null,28,58,5), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,8,2019-10-10 18:18:45,null,36,18,null,null,null,null,4), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,1,2019-10-10 18:31:37,null,-1,-1,null,null,27,47,6)))
(3fd2029afa4e4a7a833192864a8c259e,CompactBuffer(UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,6,2019-10-10 20:51:51,null,-1,-1,null,null,31,88,4), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,1,2019-10-10 20:17:44,保温杯,-1,-1,null,null,null,null,1), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,6,2019-10-10 20:45:43,null,-1,-1,91,29,null,null,3), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,2,2019-10-10 20:22:06,Lamer,-1,-1,null,null,null,null,5), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,6,2019-10-10 20:54:25,null,-1,-1,70,86,null,null,9), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,6,2019-10-10 20:48:15,null,43,12,null,null,null,null,8), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,3,2019-10-10 20:23:27,null,61,63,null,null,null,null,9), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,6,2019-10-10 20:53:22,null,6,54,null,null,null,null,9), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,0,2019-10-10 20:31:43,null,68,2,null,null,null,null,8), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,8,2019-10-10 20:18:58,null,-1,-1,null,null,52,56,7), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,4,2019-10-10 20:27:02,null,-1,-1,null,null,17,6,1), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,1,2019-10-10 20:25:05,null,91,65,null,null,null,null,0), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,6,2019-10-10 20:09:17,null,51,51,null,null,null,null,4), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,0,2019-10-10 20:11:38,null,36,53,null,null,null,null,5), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,4,2019-10-10 20:53:27,null,0,86,null,null,null,null,2), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,8,2019-10-10 20:39:33,null,-1,-1,null,null,75,65,0), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,8,2019-10-10 20:24:51,卫生纸,-1,-1,null,null,null,null,3), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,0,2019-10-10 20:19:22,Lamer,-1,-1,null,null,null,null,8), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,3,2019-10-10 20:15:01,null,-1,-1,null,null,16,15,4), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,2,2019-10-10 20:07:44,null,-1,-1,70,5,null,null,9), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,4,2019-10-10 20:50:33,null,-1,-1,75,5,null,null,3), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,6,2019-10-10 20:55:21,null,-1,-1,91,67,null,null,7), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,6,2019-10-10 20:04:06,null,75,0,null,null,null,null,5), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,8,2019-10-10 20:22:38,null,28,88,null,null,null,null,1), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,6,2019-10-10 20:03:21,null,-1,-1,34,42,null,null,1), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,7,2019-10-10 20:03:26,null,-1,-1,null,null,34,95,2), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,9,2019-10-10 20:54:22,null,-1,-1,null,null,65,20,3), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,7,2019-10-10 20:35:01,null,-1,-1,null,null,22,33,1), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,8,2019-10-10 20:20:53,null,-1,-1,null,null,75,31,2), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,6,2019-10-10 20:13:00,null,21,24,null,null,null,null,4), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,8,2019-10-10 20:09:04,华为手机,-1,-1,null,null,null,null,3), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,0,2019-10-10 20:51:07,null,-1,-1,77,28,null,null,3), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,5,2019-10-10 20:00:05,null,-1,-1,null,null,44,22,6), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,0,2019-10-10 20:12:32,null,19,42,null,null,null,null,7), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,0,2019-10-10 20:25:31,null,-1,-1,54,94,null,null,2), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,3,2019-10-10 20:04:24,null,-1,-1,null,null,55,49,9), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,2,2019-10-10 20:53:49,null,-1,-1,76,3,null,null,8), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,0,2019-10-10 20:06:07,null,-1,-1,null,null,64,98,1), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,0,2019-10-10 20:57:30,Lamer,-1,-1,null,null,null,null,4), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,4,2019-10-10 20:24:12,小龙虾,-1,-1,null,null,null,null,4), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,1,2019-10-10 20:18:45,null,-1,-1,null,null,88,89,5), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,2,2019-10-10 20:50:04,小龙虾,-1,-1,null,null,null,null,8), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,6,2019-10-10 20:36:11,吸尘器,-1,-1,null,null,null,null,0), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,6,2019-10-10 20:51:27,null,54,72,null,null,null,null,9), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,4,2019-10-10 20:00:02,null,60,52,null,null,null,null,7), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,8,2019-10-10 20:35:25,null,13,31,null,null,null,null,4), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,7,2019-10-10 20:36:29,null,49,92,null,null,null,null,2), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,7,2019-10-10 20:01:30,null,-1,-1,null,null,25,89,2), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,9,2019-10-10 20:50:31,null,-1,-1,41,86,null,null,7), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,0,2019-10-10 20:53:16,null,22,83,null,null,null,null,9)))
(0dfb1ee48afc4e3cb897e7949acba433,CompactBuffer(UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,6,2019-10-10 16:04:54,null,-1,-1,41,34,null,null,1), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,4,2019-10-10 16:29:26,null,-1,-1,57,14,null,null,2), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,2,2019-10-10 16:11:01,null,-1,-1,null,null,40,25,2), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,0,2019-10-10 16:01:38,null,90,12,null,null,null,null,6), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,4,2019-10-10 16:26:09,null,71,86,null,null,null,null,2), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,2,2019-10-10 16:20:05,null,-1,-1,null,null,14,24,7), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,2,2019-10-10 16:52:54,null,41,59,null,null,null,null,9), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,3,2019-10-10 16:54:21,null,47,43,null,null,null,null,7), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,4,2019-10-10 16:55:29,null,-1,-1,null,null,1,97,6), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,0,2019-10-10 16:04:02,null,-1,-1,84,60,null,null,2), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,6,2019-10-10 16:08:02,null,75,67,null,null,null,null,4), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,2,2019-10-10 16:16:41,null,-1,-1,98,28,null,null,1), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,5,2019-10-10 16:22:20,null,37,30,null,null,null,null,2), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,5,2019-10-10 16:53:00,null,-1,-1,null,null,42,2,1), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,7,2019-10-10 16:57:49,null,96,93,null,null,null,null,3), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,5,2019-10-10 16:24:46,null,-1,-1,null,null,74,6,1), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,7,2019-10-10 16:23:00,null,-1,-1,null,null,37,84,4), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,1,2019-10-10 16:23:47,Lamer,-1,-1,null,null,null,null,8), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,4,2019-10-10 16:28:12,null,-1,-1,17,47,null,null,3), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,4,2019-10-10 16:26:52,null,-1,-1,null,null,30,28,5), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,5,2019-10-10 16:39:00,null,-1,-1,95,79,null,null,6), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,8,2019-10-10 16:24:19,null,-1,-1,21,93,null,null,9), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,0,2019-10-10 16:52:33,null,49,40,null,null,null,null,9), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,5,2019-10-10 16:17:56,null,-1,-1,null,null,13,75,4), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,7,2019-10-10 16:14:58,null,-1,-1,null,null,70,76,5), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,0,2019-10-10 16:23:38,null,80,64,null,null,null,null,1), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,0,2019-10-10 16:25:25,null,-1,-1,null,null,29,4,9), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,8,2019-10-10 16:26:09,null,-1,-1,18,11,null,null,4), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,8,2019-10-10 16:38:35,null,78,86,null,null,null,null,9), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,4,2019-10-10 16:53:14,null,-1,-1,82,15,null,null,3), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,2,2019-10-10 16:28:02,小龙虾,-1,-1,null,null,null,null,7), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,9,2019-10-10 16:09:44,null,87,36,null,null,null,null,1), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,9,2019-10-10 16:55:39,null,21,65,null,null,null,null,5), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,3,2019-10-10 16:21:02,吸尘器,-1,-1,null,null,null,null,4), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,6,2019-10-10 16:07:20,null,-1,-1,36,85,null,null,1), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,4,2019-10-10 16:58:12,null,3,33,null,null,null,null,0), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,4,2019-10-10 16:46:33,null,-1,-1,null,null,72,76,3), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,7,2019-10-10 16:01:19,null,-1,-1,null,null,19,36,6), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,5,2019-10-10 16:34:21,null,70,66,null,null,null,null,3), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,7,2019-10-10 16:32:33,小龙虾,-1,-1,null,null,null,null,0), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,7,2019-10-10 16:01:54,华为手机,-1,-1,null,null,null,null,6), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,2,2019-10-10 16:56:25,null,-1,-1,79,63,null,null,1), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,5,2019-10-10 16:30:08,null,-1,-1,53,54,null,null,4), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,6,2019-10-10 16:58:02,null,79,72,null,null,null,null,5), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,4,2019-10-10 16:03:30,null,50,3,null,null,null,null,3), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,2,2019-10-10 16:31:45,null,-1,-1,17,7,null,null,3), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,8,2019-10-10 16:29:31,null,-1,-1,40,49,null,null,1), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,9,2019-10-10 16:30:15,null,-1,-1,6,13,null,null,0), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,7,2019-10-10 16:20:40,null,-1,-1,23,74,null,null,5), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,5,2019-10-10 16:22:31,null,-1,-1,51,84,null,null,2), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,1,2019-10-10 16:47:15,null,-1,-1,null,null,36,36,7), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,5,2019-10-10 16:38:55,null,12,35,null,null,null,null,7), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,1,2019-10-10 16:18:31,null,-1,-1,58,70,null,null,4), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,3,2019-10-10 16:17:50,null,72,4,null,null,null,null,1), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,1,2019-10-10 16:32:23,null,27,90,null,null,null,null,3), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,7,2019-10-10 16:37:42,null,-1,-1,null,null,72,62,6), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,1,2019-10-10 16:10:42,null,34,71,null,null,null,null,8), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,4,2019-10-10 16:17:17,null,81,27,null,null,null,null,8), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,4,2019-10-10 16:33:30,null,72,36,null,null,null,null,1), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,9,2019-10-10 16:30:02,null,31,42,null,null,null,null,3), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,4,2019-10-10 16:12:16,null,-1,-1,null,null,49,39,0), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,6,2019-10-10 16:38:13,null,57,28,null,null,null,null,0), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,2,2019-10-10 16:09:08,null,-1,-1,9,6,null,null,5), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,6,2019-10-10 16:17:08,null,-1,-1,45,39,null,null,1), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,5,2019-10-10 16:02:29,null,-1,-1,null,null,53,88,1), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,1,2019-10-10 16:11:26,null,-1,-1,95,54,null,null,2), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,1,2019-10-10 16:35:43,null,62,54,null,null,null,null,9), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,6,2019-10-10 16:31:24,null,-1,-1,76,34,null,null,0), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,6,2019-10-10 16:20:41,Lamer,-1,-1,null,null,null,null,5), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,4,2019-10-10 16:55:03,null,72,79,null,null,null,null,6), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,7,2019-10-10 16:06:03,null,-1,-1,null,null,34,61,7), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,8,2019-10-10 16:01:54,卫生纸,-1,-1,null,null,null,null,8), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,9,2019-10-10 16:39:21,null,-1,-1,null,null,41,73,0), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,9,2019-10-10 16:24:04,null,97,76,null,null,null,null,8), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,2,2019-10-10 16:47:43,苹果,-1,-1,null,null,null,null,3), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,7,2019-10-10 16:03:13,null,20,8,null,null,null,null,2), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,6,2019-10-10 16:23:10,华为手机,-1,-1,null,null,null,null,1), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,8,2019-10-10 16:13:03,null,49,87,null,null,null,null,6), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,1,2019-10-10 16:17:23,null,83,14,null,null,null,null,1), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,3,2019-10-10 16:47:50,null,-1,-1,60,60,null,null,1), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,6,2019-10-10 16:30:18,null,-1,-1,null,null,65,21,9)))
step3
* Session_Id|Search_Keywords|Click_Categary_Id|Visit_Length|Step_Length|Start_Time
(13,sessionid=a300de33934d49c4b02c1525879454bd|searchKeywords=吸尘器,保温杯,Lamer,华为手机,机器学习,洗面奶,小龙虾|clickCategoryIds=26,38,30,17,6,83,40,65,41,4,34,97,41,34,60,82,38,11,79,42,99,35,47,80,1,80,99,64,2|visitLength=3491|stepLength=94|startTime=2019-10-10)
(25,sessionid=5ddf7f6d6c9d485db9287b5ef34e077e|searchKeywords=苹果,吸尘器,保温杯|clickCategoryIds=28,59,96,29,91,21|visitLength=3322|stepLength=21|startTime=2019-10-10)
(41,sessionid=838220c6e46445d7bad52f18d3171bd8|searchKeywords=吸尘器,苹果,保温杯,洗面奶,机器学习,Lamer,华为手机|clickCategoryIds=29,80,46,79,6,98,80,34,41,30,41,1,34,64,13,98,1|visitLength=3458|stepLength=52|startTime=2019-10-10)
step4 聚合信息全部完成
* Session_Id|Search_Keywords|Click_Categary_Id|Visit_Length|Step_Length|Start_Time|Age|Professional|Sex|City
(79bfe53461f146cc836f7219351588f0,sessionid=79bfe53461f146cc836f7219351588f0|searchKeywords=卫生纸,联想笔记本,华为手机,小龙虾,洗面奶,吸尘器,苹果,Lamer|clickCategoryIds=8,49,14,13,36,41,27,82,45,47,7,41,32|visitLength=3456|stepLength=67|startTime=2019-10-10|age=22|professional=professional2|sex=male|city=city57)
(228cf3d0777749799436de675ff441a4,sessionid=228cf3d0777749799436de675ff441a4|searchKeywords=洗面奶,卫生纸,联想笔记本,苹果,Lamer,机器学习,小龙虾,吸尘器,保温杯,华为手机|clickCategoryIds=15,90,38,45,66,28,12,15,81,58,62,90,94,90,82,99,11,69|visitLength=3432|stepLength=81|startTime=2019-10-10|age=29|professional=professional70|sex=female|city=city95)
(bd424c0bdfe64cd3af71de31a90c786b,sessionid=bd424c0bdfe64cd3af71de31a90c786b|searchKeywords=苹果,保温杯,洗面奶,Lamer,小龙虾,华为手机|clickCategoryIds=40,51,92,78,94|visitLength=3253|stepLength=42|startTime=2019-10-10|age=49|professional=professional29|sex=female|city=city34)
step5 过滤及累加器更新之后
0000-00-00 00:00:00,740 INFO --- [ Executor task launch worker for task 25] org.apache.spark.storage.ShuffleBlockFetcherIterator (line: 54) : Started 0 remote fetches in 6 ms
(4fe6f3eaf9914578bc03308618bdd7a5,sessionid=4fe6f3eaf9914578bc03308618bdd7a5|searchKeywords=苹果,洗面奶,保温杯,机器学习,Lamer,联想笔记本,华为手机,小龙虾|clickCategoryIds=12,92,10,25,75,28,28,18,51,92,39,63,82,70,60,86|visitLength=3391|stepLength=54|startTime=2019-10-10|age=47|professional=professional40|sex=male|city=city28)
(fa00c11085ec4e8eb409ddc35e351902,sessionid=fa00c11085ec4e8eb409ddc35e351902|searchKeywords=小龙虾,洗面奶,华为手机,苹果,联想笔记本,保温杯,机器学习|clickCategoryIds=51,2,84,89,97,82,79|visitLength=3400|stepLength=37|startTime=2019-10-10|age=29|professional=professional0|sex=male|city=city92)
(8c937c2d61a84937bbd37f03fb401123,sessionid=8c937c2d61a84937bbd37f03fb401123|searchKeywords=苹果,洗面奶,联想笔记本,机器学习,小龙虾,吸尘器,华为手机,Lamer,卫生纸|clickCategoryIds=15,0,38,60,65,7,96,23,13,20,74,69,95,16,9,84,22,33,73,96,70,77|visitLength=3503|stepLength=87|startTime=2019-10-10|age=47|professional=professional40|sex=male|city=city28)
(4a7986dbdb
step6 计算比率存入数据库
然后commerce数据库中就会有一张表session_stat_ratio_0416
*/
SessionAccumulator.scala自定义累加器
import org.apache.spark.util.AccumulatorV2
import scala.collection.mutable
/**
* SessionAccumulator
* 自定义累加器
* SessionStat里使用
*/
class SessionAccumulator extends AccumulatorV2[String,mutable.HashMap[String,Int]]{
val countMap = new mutable.HashMap[String,Int]()
override def isZero: Boolean = {
countMap.isEmpty
}
override def copy(): AccumulatorV2[String, mutable.HashMap[String, Int]] = {
val acc = new SessionAccumulator
acc.countMap ++= this.countMap
acc
}
override def reset(): Unit = {
countMap.clear()
}
override def add(v: String): Unit = {
if(!this.countMap.contains(v)){
this.countMap += (v->0)
}
this.countMap.update(v,countMap(v)+1)
}
override def merge(other: AccumulatorV2[String, mutable.HashMap[String, Int]]): Unit = {
//最核心的部分 实现两个map的整合
//(0 /: (1 to 100))(_+_)
//(0 /: (1 to 100)){case(int1,!)=> int1+int2}
//(1 /: 100).foldlest(0)
//(this.countMap /: acc.countMap)
other match {
case acc:SessionAccumulator=>acc.countMap.foldLeft(this.countMap){
case(map,(k,v)) => map += (k->(map.getOrElse(k,0) + v))
}
}
}
override def value: mutable.HashMap[String, Int] = {
this.countMap
}
}
SessionAggrStat.scala 样例类
/**
* SessionAggrStat
* 样例类
* @param taskUUID
* @param session_count
* @param visit_length_1s_3s_ratio
* @param visit_length_4s_6s_ratio
* @param visit_length_7s_9s_ratio
* @param visit_length_10s_30s_ratio
* @param visit_length_30s_60s_ratio
* @param visit_length_1m_3m_ratio
* @param visit_length_3m_10m_ratio
* @param visit_length_10m_30m_ratio
* @param visit_length_30m_ratio
* @param step_length_1_3_ratio
* @param step_length_4_6_ratio
* @param step_length_7_9_ratio
* @param step_length_10_30_ratio
* @param step_length_30_60_ratio
* @param step_legth_60_ratio
*/
case class SessionAggrStat(
taskUUID: String,
session_count:Long,
visit_length_1s_3s_ratio:Double,
visit_length_4s_6s_ratio:Double,
visit_length_7s_9s_ratio:Double,
visit_length_10s_30s_ratio:Double,
visit_length_30s_60s_ratio:Double,
visit_length_1m_3m_ratio:Double,
visit_length_3m_10m_ratio:Double,
visit_length_10m_30m_ratio:Double,
visit_length_30m_ratio:Double,
step_length_1_3_ratio:Double,
step_length_4_6_ratio:Double,
step_length_7_9_ratio:Double,
step_length_10_30_ratio:Double,
step_length_30_60_ratio:Double,
step_legth_60_ratio:Double
)
注:参考网络资源,自学当笔记使用。
静悄悄 乱纷纷
都输给了时间
却没有辜负青春
他诚恳
才不让你等
你失落了黄昏
却换来平静夜深
众里寻人
错爱只是为真爱作证
所谓魔鬼留下的伤痕
都是天使的指纹
灯火阑珊
何必急于看到那个人
能睡得安稳都只因为
那盏还没开的灯
亮晶晶 黑沉沉
开了窗 关上门
谁给了你寂寞
寂寞还给你新生
谁的吻
都值得感恩
泪淋熄了欲望
笑却雕琢了皱纹
众里寻人
错爱只是为真爱作证
所谓魔鬼留下的伤痕
都是天使的指纹
灯火阑珊
何必急于看到那个人
能睡的安稳
都只因为
那盏还没开的灯
最初总坚持自以为是的缘分
最后才顺其自然看花开无声
离开你那个人
同时释放了你
你为何不转身
众里寻人
错爱只是为真爱作证
每次告别留下的伤痕
都是天使的指纹
灯火阑珊
你急着要看到那个人
他也在寻找你的身影
你也让别人在等
天使的指纹
孙燕姿