Scala

mac2024-12-02  22

Scala_spark-电商平台离线分析项目-需求一Session访问步长时长占比统计

共十个需求

需求一

代码实现

sessionStat.scala

import java.util.{Date, UUID} import commons.conf.ConfigurationManager import commons.constant.Constants import commons.model.{UserInfo, UserVisitAction} import commons.utils.{DateUtils, NumberUtils, ParamUtils, StringUtils, ValidUtils} import net.sf.json.JSONObject import org.apache.spark.SparkConf import org.apache.spark.rdd.RDD import org.apache.spark.sql.{SaveMode, SparkSession} import scala.collection.mutable /** * sessionStat * 算是这部分程序的入口吧 */ /** * 新建一个session module ; * 然后 scala添加进去session module ;project structure -- global lib --scala 2.11.8 右键 addToModules */ object SessionStat { def main(args: Array[String]): Unit = { // 获取筛选条件 val jsonStr = ConfigurationManager.config.getString(Constants.TASK_PARAMS) // 获取筛选条件的JsonObject val taskParam = JSONObject.fromObject(jsonStr) // 创建全局唯一的主键 val taskUUID = UUID.randomUUID().toString // 创建SparkConf val sparkConf = new SparkConf().setMaster("local[*]").setAppName("session") // 创建SparkSession (包含SparkContext) val sparkSession = SparkSession.builder().config(sparkConf).enableHiveSupport().getOrCreate() // 获取原始动作表 // actionRDD:RDD[UserVisitAction] val actionRDD = getOriActionRDD(sparkSession,taskParam) // 测试1打印输出 先确认下数据获取成功 // actionRDD.foreach(println(_)) // map-----sessionID2ActionRDD:RDD[(sessionID,UserVieitAction)] val sessionID2ActionRDD = actionRDD.map(item => (item.session_id, item)) //item就是平时练习的x // groupByKey-----sessionID2GroupActionRDD: RDD[(sessionID, Iterable[UserVisitAction])] val session2GroupActionRDD = sessionID2ActionRDD.groupByKey() session2GroupActionRDD.cache() //todo:聚合数据 //测试2打印输出 // session2GroupActionRDD.foreach(println(_)) //测试3打印输出 // val userId2AggrInfoRDD = getSessionFullInfo(sparkSession, session2GroupActionRDD) // userId2AggrInfoRDD.foreach(println(_)) //4打印输出 val sessionId2FullInfoRDD = getSessionFullInfo(sparkSession, session2GroupActionRDD) sessionId2FullInfoRDD .foreach(println(_)) //至此聚合完成,开始过滤操作 //5 过滤 //todo:过滤 //对自定义累加器进行注册 val sessionAccumulator =new SessionAccumulator sparkSession.sparkContext.register(sessionAccumulator) //在过滤过程中完成了累加器的更新操作 //sessionId2FilterRDD:RDD[(sessionId,fullInfo)]是所有符合过滤条件的数组组成的RDD //getSessionFilteredRDD:实现根据限制条件对session数据进行过滤,并完成累加的更新 val sessionId2FilteredRDD =getSessionFilteredRDD(taskParam,sessionId2FullInfoRDD,sessionAccumulator) //sessionAccumulator作为参数传进去 //s输出 sessionId2FilteredRDD.foreach(println(_)) //6 计算比率 存入mysql数据库 //todo: 计算比率 存入mysql数据库 getSessionRatio(sparkSession,taskUUID,sessionAccumulator.value) } //计算比率 存入数据库(步骤6里的) def getSessionRatio(sparkSession: SparkSession, taskUUID: String, value: mutable.HashMap[String, Int]): Unit = { //拿到总个数 从累加器统计串中获取值 累加器的key传进去 如果没有的话默认它为1 val session_count: Double = value.getOrElse(Constants.SESSION_COUNT,1).toDouble //不同范围访问[时长]的session个数 val visit_length_1s_3s = value.getOrElse(Constants.TIME_PERIOD_1s_3s,0) //Spark累加器Key名称常量 val visit_length_4s_6s = value.getOrElse(Constants.TIME_PERIOD_4s_6s,0) val visit_length_7s_9s = value.getOrElse(Constants.TIME_PERIOD_7s_9s, 0) val visit_length_10s_30s = value.getOrElse(Constants.TIME_PERIOD_10s_30s, 0) val visit_length_30s_60s = value.getOrElse(Constants.TIME_PERIOD_30s_60s, 0) val visit_length_1m_3m = value.getOrElse(Constants.TIME_PERIOD_1m_3m, 0) val visit_length_3m_10m = value.getOrElse(Constants.TIME_PERIOD_3m_10m, 0) val visit_length_10m_30m = value.getOrElse(Constants.TIME_PERIOD_10m_30m, 0) val visit_length_30m = value.getOrElse(Constants.TIME_PERIOD_30m, 0) //不同访问[步长]的session个数 val step_length_1_3 = value.getOrElse(Constants.STEP_PERIOD_1_3,0) val step_length_4_6 = value.getOrElse(Constants.STEP_PERIOD_4_6, 0) val step_length_7_9 = value.getOrElse(Constants.STEP_PERIOD_7_9, 0) val step_length_10_30 = value.getOrElse(Constants.STEP_PERIOD_10_30, 0) val step_length_30_60 = value.getOrElse(Constants.STEP_PERIOD_30_60, 0) val step_length_60 = value.getOrElse(Constants.STEP_PERIOD_60, 0) //计算各个访问时长和访问步长的范围占比 val visit_length_1s_3s_ratio = NumberUtils.formatDouble(visit_length_1s_3s/session_count,2) //对这个比率保存为两位小数 val visit_length_4s_6s_ratio = NumberUtils.formatDouble(visit_length_4s_6s/session_count,2) val visit_length_7s_9s_ratio = NumberUtils.formatDouble(visit_length_7s_9s/session_count,2) val visit_length_10s_30s_ratio = NumberUtils.formatDouble(visit_length_10s_30s/session_count,2) val visit_length_30s_60s_ratio = NumberUtils.formatDouble(visit_length_30s_60s/session_count,2) val visit_length_1m_3m_ratio = NumberUtils.formatDouble(visit_length_1m_3m/session_count,2) val visit_length_3m_10m_ratio = NumberUtils.formatDouble(visit_length_3m_10m/session_count,2) val visit_length_10m_30m_ratio = NumberUtils.formatDouble(visit_length_10m_30m/session_count,2) val visit_length_30m_ratio = NumberUtils.formatDouble(visit_length_30m/session_count,2) val step_length_1_3_ratio = NumberUtils.formatDouble(step_length_1_3 / session_count, 2) val step_length_4_6_ratio = NumberUtils.formatDouble(step_length_4_6 / session_count, 2) val step_length_7_9_ratio = NumberUtils.formatDouble(step_length_7_9 / session_count, 2) val step_length_10_30_ratio = NumberUtils.formatDouble(step_length_10_30 / session_count, 2) val step_length_30_60_ratio = NumberUtils.formatDouble(step_length_30_60 / session_count, 2) val step_legth_60_ratio = NumberUtils.formatDouble(step_length_60 / session_count, 2) //将统计结果封装为Domain对象 val stat = SessionAggrStat( taskUUID, session_count.toInt, visit_length_1s_3s_ratio, visit_length_4s_6s_ratio, visit_length_7s_9s_ratio, visit_length_10s_30s_ratio, visit_length_30s_60s_ratio, visit_length_1m_3m_ratio, visit_length_3m_10m_ratio, visit_length_10m_30m_ratio, visit_length_30m_ratio, step_length_1_3_ratio, step_length_4_6_ratio, step_length_7_9_ratio, step_length_10_30_ratio, step_length_30_60_ratio, step_legth_60_ratio ) val sessionRatioRDD = sparkSession.sparkContext.makeRDD(Array(stat)) import sparkSession.implicits._ sessionRatioRDD.toDF().write .format("jdbc") .option("url",ConfigurationManager.config.getString(Constants.JDBC_URL)) //配置类工具 "jdbc.user" .option("user",ConfigurationManager.config.getString(Constants.JDBC_USER)) .option("password",ConfigurationManager.config.getString(Constants.JDBC_PASSWORD)) .option("dbtable","session_stat_ratio_0416") .mode(SaveMode.Append) .save() } //封装方法(步骤5里的) def calculateVisitLength(visitLength:Long,sessionAggrStatAccumulator:SessionAccumulator)={ if(visitLength >= 1 && visitLength <= 3){ sessionAggrStatAccumulator.add(Constants.TIME_PERIOD_1s_3s) }else if (visitLength >= 4 && visitLength <= 6) { sessionAggrStatAccumulator.add(Constants.TIME_PERIOD_4s_6s); } else if (visitLength >= 7 && visitLength <= 9) { sessionAggrStatAccumulator.add(Constants.TIME_PERIOD_7s_9s); } else if (visitLength >= 10 && visitLength <= 30) { sessionAggrStatAccumulator.add(Constants.TIME_PERIOD_10s_30s); } else if (visitLength > 30 && visitLength <= 60) { sessionAggrStatAccumulator.add(Constants.TIME_PERIOD_30s_60s); } else if (visitLength > 60 && visitLength <= 180) { sessionAggrStatAccumulator.add(Constants.TIME_PERIOD_1m_3m); } else if (visitLength > 180 && visitLength <= 600) { sessionAggrStatAccumulator.add(Constants.TIME_PERIOD_3m_10m); } else if (visitLength > 600 && visitLength <= 1800) { sessionAggrStatAccumulator.add(Constants.TIME_PERIOD_10m_30m); } else if (visitLength > 1800) { sessionAggrStatAccumulator.add(Constants.TIME_PERIOD_30m); } } //封装方法(步骤5里的) def calculateStepLength(stepLength: Long, sessionAggrStatAccumulator: SessionAccumulator) = { if (stepLength >= 1 && stepLength <= 3) { sessionAggrStatAccumulator.add(Constants.STEP_PERIOD_1_3); } else if (stepLength >= 4 && stepLength <= 6) { sessionAggrStatAccumulator.add(Constants.STEP_PERIOD_4_6); } else if (stepLength >= 7 && stepLength <= 9) { sessionAggrStatAccumulator.add(Constants.STEP_PERIOD_7_9); } else if (stepLength >= 10 && stepLength <= 30) { sessionAggrStatAccumulator.add(Constants.STEP_PERIOD_10_30); } else if (stepLength > 30 && stepLength <= 60) { sessionAggrStatAccumulator.add(Constants.STEP_PERIOD_30_60); } else if (stepLength > 60) { sessionAggrStatAccumulator.add(Constants.STEP_PERIOD_60); } } /** * step5 过滤 * * @param taskParam * @param sessionId2FullInfoRDD * @return */ def getSessionFilteredRDD(taskParam: JSONObject, sessionId2FullInfoRDD: RDD[(String, String)],sessionAccumulator:SessionAccumulator) ={ //根据task.params.json{startDate:"2018-08-01",......} val startAge = ParamUtils.getParam(taskParam,Constants.PARAM_START_AGE) val endAge = ParamUtils.getParam(taskParam,Constants.PARAM_END_AGE) val professinals = ParamUtils.getParam(taskParam,Constants.PARAM_PROFESSIONALS) val cities = ParamUtils.getParam(taskParam,Constants.PARAM_CITIES) val sex = ParamUtils.getParam(taskParam,Constants.PARAM_SEX) val keywords = ParamUtils.getParam(taskParam,Constants.PARAM_KEYWORDS) val categoryIds = ParamUtils.getParam(taskParam,Constants.PARAM_CATEGORY_IDS) //先判断 再拼接 var filterInfo = (if(startAge != null) Constants.PARAM_START_AGE + "=" + startAge +"|" else "") + //Contants是一个常量接口 所以可以这样用 (if(endAge != null) Constants.PARAM_END_AGE + "=" + endAge + "|" else "")+ (if(professinals != null) Constants.PARAM_PROFESSIONALS+"="+professinals+"|" else "")+ (if(cities != null) Constants.PARAM_CITIES + "=" +cities else "") + (if(sex != null) Constants.PARAM_SEX + "=" + sex +"|" else "")+ (if(keywords != null) Constants.PARAM_KEYWORDS +"="+ keywords +"|" else "")+ (if(categoryIds != null) Constants.PARAM_CATEGORY_IDS + "=" +categoryIds+"|" else "") if(filterInfo.endsWith("\\|")){ //后面有些字段会是null 就空了 filterInfo=filterInfo.substring(0,filterInfo.length-1) } sessionId2FullInfoRDD.filter{ case(sessionId,fullInfo) => var success = true if(!ValidUtils.between(fullInfo,Constants.FIELD_AGE,filterInfo,Constants.PARAM_START_AGE,Constants.PARAM_END_AGE)){ //数据 数据字段 参数 参数字段 数据字段和参数字段是一样的 success = false }else if(!ValidUtils.in(fullInfo,Constants.FIELD_PROFESSIONAL,filterInfo,Constants.FIELD_PROFESSIONAL)){ success = false }else if(!ValidUtils.equal(fullInfo,Constants.FIELD_SEX,filterInfo,Constants.FIELD_SEX)){ success = false }else if(!ValidUtils.in(fullInfo,Constants.FIELD_SEARCH_KEYWORDS,filterInfo,Constants.PARAM_KEYWORDS)){ success = false }else if(!ValidUtils.in(fullInfo,Constants.FIELD_CLICK_CATEGORY_IDS,filterInfo,Constants.PARAM_CATEGORY_IDS)){ success = false } //以下部分是指在过滤基础上增加了一步累加器的更新 if(success){ //sessssionid里面的每一个字段都符合限制条件的要求 //自动维护了我们的key 会自动在key对应的值上加一 sessionAccumulator.add(Constants.SESSION_COUNT) val visitLength = StringUtils.getFieldFromConcatString(fullInfo,"\\|",Constants.FIELD_VISIT_LENGTH).toLong val stepLength = StringUtils.getFieldFromConcatString(fullInfo,"\\|",Constants.FIELD_STEP_LENGTH).toLong // if(visitLength>=1 && visitLength <= 3){ //封装成方法 // sessionAccumulator.add(Constants.TIME_PERIOD_1s_3s) // }else if(visitLength>=4 && visitLength <= 6) // sessionAccumulator.add(Constants.TIME_PERIOD_4s_6s) calculateVisitLength(visitLength,sessionAccumulator) calculateStepLength(stepLength,sessionAccumulator) } success //filter必须要有一个布尔类型的返回 } } /** * step2+3+4 * 原数据 * sparkSession,(4bc33302668f4331aba52c8328a781c7,CompactBuffer(UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,6,2019-10-10 12:45:35,联想笔记本,-1,-1,null,null,null,null,0), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,5,2019-10-10 12:46:24,吸尘器,-1,-1,null,null,null,null,5), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,2,2019-10-10 12:18:10,保温杯,-1,-1,null,null,null,null,5), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,8,2019-10-10 12:58:49,null,-1,-1,59,20,null,null,3), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,3,2019-10-10 12:03:04,null,-1,-1,59,78,null,null,5), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,4,2019-10-10 12:02:11,null,-1,-1,22,56,null,null,1), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,2,2019-10-10 12:31:47,null,-1,-1,98,64,null,null,6), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,3,2019-10-10 12:40:32,null,-1,-1,14,89,null,null,8), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,5,2019-10-10 12:47:58,null,-1,-1,null,null,36,75,9), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,3,2019-10-10 12:28:19,null,-1,-1,96,79,null,null,7), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,8,2019-10-10 12:31:50,null,83,3,null,null,null,null,3), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,7,2019-10-10 12:11:05,机器学习,-1,-1,null,null,null,null,5), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,6,2019-10-10 12:58:18,null,-1,-1,null,null,66,25,0), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,9,2019-10-10 12:14:06,null,24,97,null,null,null,null,9), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,4,2019-10-10 12:56:48,null,-1,-1,32,46,null,null,9), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,1,2019-10-10 12:50:12,null,33,84,null,null,null,null,2), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,7,2019-10-10 12:32:32,null,-1,-1,4,15,null,null,5), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,7,2019-10-10 12:02:53,吸尘器,-1,-1,null,null,null,null,5), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,0,2019-10-10 12:04:02,null,-1,-1,44,80,null,null,4), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,1,2019-10-10 12:36:43,null,-1,-1,null,null,60,54,3), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,8,2019-10-10 12:58:52,华为手机,-1,-1,null,null,null,null,5), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,6,2019-10-10 12:54:11,洗面奶,-1,-1,null,null,null,null,5), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,6,2019-10-10 12:09:19,null,42,67,null,null,null,null,2), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,8,2019-10-10 12:44:23,null,44,17,null,null,null,null,7), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,6,2019-10-10 12:09:38,卫生纸,-1,-1,null,null,null,null,8), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,1,2019-10-10 12:05:18,null,18,74,null,null,null,null,1), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,2,2019-10-10 12:24:06,卫生纸,-1,-1,null,null,null,null,4), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,6,2019-10-10 12:32:27,null,74,85,null,null,null,null,8), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,9,2019-10-10 12:18:19,null,-1,-1,null,null,12,89,4), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,3,2019-10-10 12:26:21,保温杯,-1,-1,null,null,null,null,6), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,5,2019-10-10 12:41:48,联想笔记本,-1,-1,null,null,null,null,2), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,7,2019-10-10 12:04:17,null,12,79,null,null,null,null,5), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,0,2019-10-10 12:17:24,null,-1,-1,93,98,null,null,1), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,7,2019-10-10 12:27:52,null,-1,-1,null,null,58,44,8), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,9,2019-10-10 12:34:27,null,-1,-1,null,null,52,69,7), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,3,2019-10-10 12:16:49,null,-1,-1,null,null,13,90,1), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,4,2019-10-10 12:37:47,null,-1,-1,86,67,null,null,7), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,8,2019-10-10 12:10:56,null,-1,-1,null,null,62,31,7), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,0,2019-10-10 12:01:41,null,4,0,null,null,null,null,7), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,3,2019-10-10 12:48:00,null,-1,-1,87,86,null,null,6))) * * step3 我想要得到的聚合信息数据 * Session_Id|Search_Keywords|Click_Categary_Id|Visit_Length|Step_Length|Start_Time * * step4 我想要得到的聚合信息 * Session_Id|Search_Keywords|Click_Categary_Id|Visit_Length|Step_Length|Start_Time|Age|Professional|Sex|City * * @param sparkSession * @param session2GroupActionRDD */ def getSessionFullInfo(sparkSession: SparkSession, session2GroupActionRDD: RDD[(String, Iterable[UserVisitAction])]) = { //这里的unit要去掉 否则返回的值会被认为unit //step3 //userId2AggrInfoRDD:RDD[(userId,aggrInfo)] key:userId value:agrInfo val userId2AggrInfoRDD = session2GroupActionRDD.map{ case (sessionId,iterableAction)=> var userId = -1L var startTime:Date = null var endTime:Date = null var stepLength = 0 val searchKeywords = new StringBuffer("") val clickCategories = new StringBuffer("") for(action <- iterableAction){ //userId if(userId == -1L){ userId = action.user_id } //actionTime 类似在逐步扩大时间区间[null,null]---[14:22:22,14:26:30] val actionTime = DateUtils.parseTime(action.action_time) //点击行为的时间点 if(startTime == null || startTime.after(actionTime)){ startTime = actionTime } if(endTime == null || endTime.before(actionTime)){ endTime = actionTime } //searchKeyword val searchKeyword = action.search_keyword if(StringUtils.isNotEmpty(searchKeyword) && !searchKeywords.toString.contains(searchKeyword)){ searchKeywords.append(searchKeyword+",") } //clickCategoryId 某一个商品品类的ID val clickCategoryId = action.click_category_id if(clickCategoryId != -1 && !clickCategories.toString.contains(clickCategoryId)){ clickCategories.append(clickCategoryId+",") } //stepLength stepLength += 1 } //目的:去除"," //searchKeywords.toString.substring(0,searchKeywords.toString.length) val searchKw = StringUtils.trimComma(searchKeywords.toString) //截断字符串两侧的逗号 val clickCg = StringUtils.trimComma(clickCategories.toString) val visitLength = (endTime.getTime - startTime.getTime) /1000 val aggrInfo = Constants.FIELD_SESSION_ID+"="+sessionId+"|"+ Constants.FIELD_SEARCH_KEYWORDS+"="+searchKw+"|"+ Constants.FIELD_CLICK_CATEGORY_IDS+"="+clickCg+"|"+ Constants.FIELD_VISIT_LENGTH+"="+visitLength+"|"+ Constants.FIELD_STEP_LENGTH+"="+stepLength+"|"+ Constants.FIELD_START_TIME+"="+DateUtils.formatDate(startTime) //格式化日期(yyyy-MM-dd) //(sessionId,aggrInfo) //返回一下 因为下一步要去脸公共的user表,user表中是没有sessionId字段的 //所以 找到公共的字段 userId (userId,aggrInfo) } //返回一下(测试3输出) // userId2AggrInfoRDD //step4 再把已经获取到的userId2AggrInfoRDD和userinfo表做一次map,得到一个完整信息的RDD val sql = "select * from user_info" import sparkSession.implicits._ //userId2InfoIdRDD: RDD[(userId, UserInfo)] val userId2InfoIdRDD: RDD[(Long, UserInfo)] = sparkSession.sql(sql).as[UserInfo].rdd.map(item => (item.user_id, item)) //UserInfo 用户信息表样例类 val sessionId2FullInfoRDD = userId2AggrInfoRDD.join(userId2InfoIdRDD).map{ case (userId,(aggrInfo,userInfo)) => val age = userInfo.age val professional = userInfo.professional val sex = userInfo.sex val city = userInfo.city val fullInfo = aggrInfo +"|"+ Constants.FIELD_AGE + "=" + age +"|"+ Constants.FIELD_PROFESSIONAL +"="+professional+"|"+ Constants.FIELD_SEX + "=" +sex+"|"+ Constants.FIELD_CITY+"="+city //之前是为了聚合用的userId 先在聚合完成 就用回sessionId val sessionId = StringUtils.getFieldFromConcatString(aggrInfo,"\\|",Constants.FIELD_SESSION_ID) //从拼接的字符串中提取字段 //返回 (sessionId,fullInfo) } //4 返回完整数据 sessionId2FullInfoRDD } def getOriActionRDD(sparkSession: SparkSession, taskParam: JSONObject) = { // 从JSON对象中提取参数 ParamUtils.getParam val startDate = ParamUtils.getParam(taskParam,Constants.PARAM_START_DATE) val endDate = ParamUtils.getParam(taskParam,Constants.PARAM_END_DATE) val sql = "select * from user_visit_action where date>='" + startDate + "' and date<='" + endDate + "'" // !!!隐式转换 import sparkSession.implicits._ sparkSession.sql(sql).as[UserVisitAction].rdd } } /* step1 确认数据获取成功 actionRDD.foreach(println(_)) UserVisitAction(2019-10-10,21,d95c6c3cd7164e45ad483525b2132577,3,2019-10-10 16:13:29,null,-1,-1,55,62,null,null,2) UserVisitAction(2019-10-10,21,d95c6c3cd7164e45ad483525b2132577,5,2019-10-10 16:57:26,null,3,93,null,null,null,null,8) UserVisitAction(2019-10-10,21,d95c6c3cd7164e45ad483525b2132577,2,2019-10-10 16:34:28,null,-1,-1,null,null,48,35,1) UserVisitAction(2019-10-10,21,d95c6c3cd7164e45ad483525b2132577,4,2019-10-10 16:07:18,洗面奶,-1,-1,null,null,null,null,8) UserVisitAction(2019-10-10,21,d95c6c3cd7164e45ad483525b2132577,6,2019-10-10 16:46:07,null,-1,-1,54,14,null,null,6) UserVisitAction(2019-10-10,21,d95c6c3cd7164e45ad483525b2132577,2,2019-10-10 16:07:43,苹果,-1,-1,null,null,null,null,0) UserVisitAction(2019-10-10,21,a69c2dc66dfb47a796c9dd0ac15bd623,7,2019-10-10 17:18:43,null,-1,-1,null,null,35,51,6) UserVisitAction(2019-10-10,21,a69c2dc66dfb47a7 step2 斧子形的 以session为key 以CompactBuffer(一条条action)为value session2GroupActionRDD.foreach(println(_)) (4bc33302668f4331aba52c8328a781c7,CompactBuffer(UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,6,2019-10-10 12:45:35,联想笔记本,-1,-1,null,null,null,null,0), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,5,2019-10-10 12:46:24,吸尘器,-1,-1,null,null,null,null,5), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,2,2019-10-10 12:18:10,保温杯,-1,-1,null,null,null,null,5), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,8,2019-10-10 12:58:49,null,-1,-1,59,20,null,null,3), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,3,2019-10-10 12:03:04,null,-1,-1,59,78,null,null,5), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,4,2019-10-10 12:02:11,null,-1,-1,22,56,null,null,1), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,2,2019-10-10 12:31:47,null,-1,-1,98,64,null,null,6), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,3,2019-10-10 12:40:32,null,-1,-1,14,89,null,null,8), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,5,2019-10-10 12:47:58,null,-1,-1,null,null,36,75,9), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,3,2019-10-10 12:28:19,null,-1,-1,96,79,null,null,7), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,8,2019-10-10 12:31:50,null,83,3,null,null,null,null,3), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,7,2019-10-10 12:11:05,机器学习,-1,-1,null,null,null,null,5), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,6,2019-10-10 12:58:18,null,-1,-1,null,null,66,25,0), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,9,2019-10-10 12:14:06,null,24,97,null,null,null,null,9), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,4,2019-10-10 12:56:48,null,-1,-1,32,46,null,null,9), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,1,2019-10-10 12:50:12,null,33,84,null,null,null,null,2), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,7,2019-10-10 12:32:32,null,-1,-1,4,15,null,null,5), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,7,2019-10-10 12:02:53,吸尘器,-1,-1,null,null,null,null,5), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,0,2019-10-10 12:04:02,null,-1,-1,44,80,null,null,4), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,1,2019-10-10 12:36:43,null,-1,-1,null,null,60,54,3), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,8,2019-10-10 12:58:52,华为手机,-1,-1,null,null,null,null,5), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,6,2019-10-10 12:54:11,洗面奶,-1,-1,null,null,null,null,5), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,6,2019-10-10 12:09:19,null,42,67,null,null,null,null,2), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,8,2019-10-10 12:44:23,null,44,17,null,null,null,null,7), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,6,2019-10-10 12:09:38,卫生纸,-1,-1,null,null,null,null,8), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,1,2019-10-10 12:05:18,null,18,74,null,null,null,null,1), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,2,2019-10-10 12:24:06,卫生纸,-1,-1,null,null,null,null,4), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,6,2019-10-10 12:32:27,null,74,85,null,null,null,null,8), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,9,2019-10-10 12:18:19,null,-1,-1,null,null,12,89,4), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,3,2019-10-10 12:26:21,保温杯,-1,-1,null,null,null,null,6), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,5,2019-10-10 12:41:48,联想笔记本,-1,-1,null,null,null,null,2), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,7,2019-10-10 12:04:17,null,12,79,null,null,null,null,5), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,0,2019-10-10 12:17:24,null,-1,-1,93,98,null,null,1), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,7,2019-10-10 12:27:52,null,-1,-1,null,null,58,44,8), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,9,2019-10-10 12:34:27,null,-1,-1,null,null,52,69,7), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,3,2019-10-10 12:16:49,null,-1,-1,null,null,13,90,1), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,4,2019-10-10 12:37:47,null,-1,-1,86,67,null,null,7), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,8,2019-10-10 12:10:56,null,-1,-1,null,null,62,31,7), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,0,2019-10-10 12:01:41,null,4,0,null,null,null,null,7), UserVisitAction(2019-10-10,95,4bc33302668f4331aba52c8328a781c7,3,2019-10-10 12:48:00,null,-1,-1,87,86,null,null,6))) (209cff604b4a4d5db0e53ab7adf80946,CompactBuffer(UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,9,2019-10-10 20:21:41,null,61,73,null,null,null,null,5), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,1,2019-10-10 20:10:55,保温杯,-1,-1,null,null,null,null,3), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,5,2019-10-10 20:26:47,机器学习,-1,-1,null,null,null,null,6), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,4,2019-10-10 20:07:52,null,-1,-1,56,96,null,null,1), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,5,2019-10-10 20:46:01,null,36,56,null,null,null,null,3), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,6,2019-10-10 20:45:58,null,55,62,null,null,null,null,1), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,3,2019-10-10 20:46:12,null,41,4,null,null,null,null,8), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,2,2019-10-10 20:48:00,null,-1,-1,18,85,null,null,6), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,0,2019-10-10 20:36:57,null,69,16,null,null,null,null,3), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,4,2019-10-10 20:32:34,null,-1,-1,null,null,12,62,4), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,7,2019-10-10 20:53:19,null,-1,-1,19,44,null,null,2), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,7,2019-10-10 20:07:54,null,-1,-1,26,16,null,null,2), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,5,2019-10-10 20:10:14,null,-1,-1,null,null,85,36,7), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,5,2019-10-10 20:35:18,null,92,79,null,null,null,null,4), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,1,2019-10-10 20:09:46,卫生纸,-1,-1,null,null,null,null,2), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,6,2019-10-10 20:52:57,null,35,53,null,null,null,null,1), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,2,2019-10-10 20:31:08,null,34,58,null,null,null,null,4), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,0,2019-10-10 20:21:42,null,-1,-1,14,97,null,null,3), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,0,2019-10-10 20:46:30,机器学习,-1,-1,null,null,null,null,3), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,7,2019-10-10 20:38:57,null,-1,-1,null,null,63,7,1), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,5,2019-10-10 20:34:19,洗面奶,-1,-1,null,null,null,null,4), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,2,2019-10-10 20:36:24,小龙虾,-1,-1,null,null,null,null,8), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,1,2019-10-10 20:38:10,null,-1,-1,59,0,null,null,2), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,4,2019-10-10 20:50:45,null,0,13,null,null,null,null,9), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,5,2019-10-10 20:33:28,null,-1,-1,null,null,13,37,6), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,2,2019-10-10 20:33:56,卫生纸,-1,-1,null,null,null,null,2), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,8,2019-10-10 20:10:03,null,58,72,null,null,null,null,6), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,2,2019-10-10 20:32:36,卫生纸,-1,-1,null,null,null,null,2), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,5,2019-10-10 20:26:17,null,-1,-1,12,67,null,null,7), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,5,2019-10-10 20:54:41,null,-1,-1,93,17,null,null,8), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,5,2019-10-10 20:07:23,洗面奶,-1,-1,null,null,null,null,0), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,6,2019-10-10 20:10:33,Lamer,-1,-1,null,null,null,null,6), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,6,2019-10-10 20:51:04,null,-1,-1,null,null,64,13,6), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,6,2019-10-10 20:00:53,洗面奶,-1,-1,null,null,null,null,8), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,8,2019-10-10 20:18:09,null,65,57,null,null,null,null,3), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,4,2019-10-10 20:25:05,null,-1,-1,58,73,null,null,4), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,8,2019-10-10 20:27:06,null,-1,-1,null,null,73,73,0), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,2,2019-10-10 20:36:19,null,-1,-1,null,null,66,43,4), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,4,2019-10-10 20:56:23,null,34,60,null,null,null,null,9), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,9,2019-10-10 20:09:31,null,12,14,null,null,null,null,9), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,6,2019-10-10 20:29:15,null,55,56,null,null,null,null,5), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,2,2019-10-10 20:07:22,null,27,28,null,null,null,null,0), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,1,2019-10-10 20:37:16,null,-1,-1,null,null,71,2,9), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,7,2019-10-10 20:04:34,小龙虾,-1,-1,null,null,null,null,8), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,5,2019-10-10 20:36:35,null,91,36,null,null,null,null,4), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,0,2019-10-10 20:25:52,null,68,92,null,null,null,null,1), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,5,2019-10-10 20:43:40,null,-1,-1,25,27,null,null,1), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,5,2019-10-10 20:38:49,null,-1,-1,null,null,84,72,1), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,5,2019-10-10 20:31:15,卫生纸,-1,-1,null,null,null,null,6), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,6,2019-10-10 20:35:02,null,-1,-1,1,0,null,null,7), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,7,2019-10-10 20:38:16,苹果,-1,-1,null,null,null,null,2), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,2,2019-10-10 20:33:43,null,-1,-1,18,50,null,null,0), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,2,2019-10-10 20:32:28,null,-1,-1,null,null,43,28,0), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,6,2019-10-10 20:29:36,苹果,-1,-1,null,null,null,null,9), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,8,2019-10-10 20:29:37,Lamer,-1,-1,null,null,null,null,5), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,7,2019-10-10 20:10:40,卫生纸,-1,-1,null,null,null,null,6), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,3,2019-10-10 20:17:40,null,-1,-1,19,16,null,null,0), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,2,2019-10-10 20:44:17,保温杯,-1,-1,null,null,null,null,2), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,6,2019-10-10 20:14:53,吸尘器,-1,-1,null,null,null,null,4), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,0,2019-10-10 20:39:44,null,35,81,null,null,null,null,9), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,5,2019-10-10 20:55:45,null,-1,-1,null,null,54,95,8), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,7,2019-10-10 20:08:52,null,19,37,null,null,null,null,6), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,0,2019-10-10 20:03:50,null,-1,-1,null,null,94,37,8), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,1,2019-10-10 20:09:21,null,25,77,null,null,null,null,8), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,7,2019-10-10 20:27:20,null,-1,-1,63,58,null,null,2), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,0,2019-10-10 20:09:35,保温杯,-1,-1,null,null,null,null,2), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,9,2019-10-10 20:17:39,null,3,75,null,null,null,null,9), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,1,2019-10-10 20:15:48,null,74,63,null,null,null,null,0), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,3,2019-10-10 20:22:16,null,-1,-1,31,68,null,null,4), UserVisitAction(2019-10-10,76,209cff604b4a4d5db0e53ab7adf80946,9,2019-10-10 20:28:45,卫生纸,-1,-1,null,null,null,null,7))) (51fb751de8fe466798e28f3b5f650570,CompactBuffer(UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,2,2019-10-10 18:34:30,null,50,99,null,null,null,null,9), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,6,2019-10-10 18:45:27,null,81,37,null,null,null,null,4), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,1,2019-10-10 18:12:23,null,-1,-1,null,null,44,16,8), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,2,2019-10-10 18:47:48,Lamer,-1,-1,null,null,null,null,9), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,3,2019-10-10 18:04:21,null,55,49,null,null,null,null,5), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,4,2019-10-10 18:43:34,null,53,82,null,null,null,null,0), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,2,2019-10-10 18:20:56,null,83,53,null,null,null,null,1), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,2,2019-10-10 18:11:11,null,-1,-1,null,null,23,65,2), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,7,2019-10-10 18:41:37,保温杯,-1,-1,null,null,null,null,6), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,1,2019-10-10 18:37:34,null,-1,-1,52,69,null,null,9), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,0,2019-10-10 18:08:44,null,71,24,null,null,null,null,1), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,9,2019-10-10 18:01:18,null,89,84,null,null,null,null,8), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,0,2019-10-10 18:31:10,null,-1,-1,7,14,null,null,5), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,8,2019-10-10 18:29:04,null,69,63,null,null,null,null,3), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,3,2019-10-10 18:58:29,null,91,29,null,null,null,null,0), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,6,2019-10-10 18:01:36,机器学习,-1,-1,null,null,null,null,1), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,7,2019-10-10 18:01:12,null,-1,-1,62,48,null,null,0), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,3,2019-10-10 18:22:21,null,57,14,null,null,null,null,6), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,7,2019-10-10 18:05:57,null,6,21,null,null,null,null,0), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,0,2019-10-10 18:19:25,null,-1,-1,null,null,25,93,6), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,1,2019-10-10 18:02:11,null,-1,-1,5,81,null,null,7), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,2,2019-10-10 18:42:42,null,-1,-1,null,null,28,58,5), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,8,2019-10-10 18:18:45,null,36,18,null,null,null,null,4), UserVisitAction(2019-10-10,76,51fb751de8fe466798e28f3b5f650570,1,2019-10-10 18:31:37,null,-1,-1,null,null,27,47,6))) (3fd2029afa4e4a7a833192864a8c259e,CompactBuffer(UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,6,2019-10-10 20:51:51,null,-1,-1,null,null,31,88,4), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,1,2019-10-10 20:17:44,保温杯,-1,-1,null,null,null,null,1), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,6,2019-10-10 20:45:43,null,-1,-1,91,29,null,null,3), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,2,2019-10-10 20:22:06,Lamer,-1,-1,null,null,null,null,5), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,6,2019-10-10 20:54:25,null,-1,-1,70,86,null,null,9), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,6,2019-10-10 20:48:15,null,43,12,null,null,null,null,8), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,3,2019-10-10 20:23:27,null,61,63,null,null,null,null,9), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,6,2019-10-10 20:53:22,null,6,54,null,null,null,null,9), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,0,2019-10-10 20:31:43,null,68,2,null,null,null,null,8), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,8,2019-10-10 20:18:58,null,-1,-1,null,null,52,56,7), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,4,2019-10-10 20:27:02,null,-1,-1,null,null,17,6,1), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,1,2019-10-10 20:25:05,null,91,65,null,null,null,null,0), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,6,2019-10-10 20:09:17,null,51,51,null,null,null,null,4), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,0,2019-10-10 20:11:38,null,36,53,null,null,null,null,5), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,4,2019-10-10 20:53:27,null,0,86,null,null,null,null,2), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,8,2019-10-10 20:39:33,null,-1,-1,null,null,75,65,0), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,8,2019-10-10 20:24:51,卫生纸,-1,-1,null,null,null,null,3), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,0,2019-10-10 20:19:22,Lamer,-1,-1,null,null,null,null,8), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,3,2019-10-10 20:15:01,null,-1,-1,null,null,16,15,4), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,2,2019-10-10 20:07:44,null,-1,-1,70,5,null,null,9), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,4,2019-10-10 20:50:33,null,-1,-1,75,5,null,null,3), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,6,2019-10-10 20:55:21,null,-1,-1,91,67,null,null,7), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,6,2019-10-10 20:04:06,null,75,0,null,null,null,null,5), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,8,2019-10-10 20:22:38,null,28,88,null,null,null,null,1), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,6,2019-10-10 20:03:21,null,-1,-1,34,42,null,null,1), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,7,2019-10-10 20:03:26,null,-1,-1,null,null,34,95,2), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,9,2019-10-10 20:54:22,null,-1,-1,null,null,65,20,3), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,7,2019-10-10 20:35:01,null,-1,-1,null,null,22,33,1), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,8,2019-10-10 20:20:53,null,-1,-1,null,null,75,31,2), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,6,2019-10-10 20:13:00,null,21,24,null,null,null,null,4), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,8,2019-10-10 20:09:04,华为手机,-1,-1,null,null,null,null,3), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,0,2019-10-10 20:51:07,null,-1,-1,77,28,null,null,3), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,5,2019-10-10 20:00:05,null,-1,-1,null,null,44,22,6), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,0,2019-10-10 20:12:32,null,19,42,null,null,null,null,7), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,0,2019-10-10 20:25:31,null,-1,-1,54,94,null,null,2), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,3,2019-10-10 20:04:24,null,-1,-1,null,null,55,49,9), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,2,2019-10-10 20:53:49,null,-1,-1,76,3,null,null,8), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,0,2019-10-10 20:06:07,null,-1,-1,null,null,64,98,1), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,0,2019-10-10 20:57:30,Lamer,-1,-1,null,null,null,null,4), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,4,2019-10-10 20:24:12,小龙虾,-1,-1,null,null,null,null,4), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,1,2019-10-10 20:18:45,null,-1,-1,null,null,88,89,5), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,2,2019-10-10 20:50:04,小龙虾,-1,-1,null,null,null,null,8), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,6,2019-10-10 20:36:11,吸尘器,-1,-1,null,null,null,null,0), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,6,2019-10-10 20:51:27,null,54,72,null,null,null,null,9), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,4,2019-10-10 20:00:02,null,60,52,null,null,null,null,7), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,8,2019-10-10 20:35:25,null,13,31,null,null,null,null,4), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,7,2019-10-10 20:36:29,null,49,92,null,null,null,null,2), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,7,2019-10-10 20:01:30,null,-1,-1,null,null,25,89,2), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,9,2019-10-10 20:50:31,null,-1,-1,41,86,null,null,7), UserVisitAction(2019-10-10,40,3fd2029afa4e4a7a833192864a8c259e,0,2019-10-10 20:53:16,null,22,83,null,null,null,null,9))) (0dfb1ee48afc4e3cb897e7949acba433,CompactBuffer(UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,6,2019-10-10 16:04:54,null,-1,-1,41,34,null,null,1), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,4,2019-10-10 16:29:26,null,-1,-1,57,14,null,null,2), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,2,2019-10-10 16:11:01,null,-1,-1,null,null,40,25,2), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,0,2019-10-10 16:01:38,null,90,12,null,null,null,null,6), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,4,2019-10-10 16:26:09,null,71,86,null,null,null,null,2), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,2,2019-10-10 16:20:05,null,-1,-1,null,null,14,24,7), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,2,2019-10-10 16:52:54,null,41,59,null,null,null,null,9), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,3,2019-10-10 16:54:21,null,47,43,null,null,null,null,7), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,4,2019-10-10 16:55:29,null,-1,-1,null,null,1,97,6), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,0,2019-10-10 16:04:02,null,-1,-1,84,60,null,null,2), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,6,2019-10-10 16:08:02,null,75,67,null,null,null,null,4), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,2,2019-10-10 16:16:41,null,-1,-1,98,28,null,null,1), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,5,2019-10-10 16:22:20,null,37,30,null,null,null,null,2), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,5,2019-10-10 16:53:00,null,-1,-1,null,null,42,2,1), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,7,2019-10-10 16:57:49,null,96,93,null,null,null,null,3), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,5,2019-10-10 16:24:46,null,-1,-1,null,null,74,6,1), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,7,2019-10-10 16:23:00,null,-1,-1,null,null,37,84,4), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,1,2019-10-10 16:23:47,Lamer,-1,-1,null,null,null,null,8), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,4,2019-10-10 16:28:12,null,-1,-1,17,47,null,null,3), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,4,2019-10-10 16:26:52,null,-1,-1,null,null,30,28,5), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,5,2019-10-10 16:39:00,null,-1,-1,95,79,null,null,6), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,8,2019-10-10 16:24:19,null,-1,-1,21,93,null,null,9), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,0,2019-10-10 16:52:33,null,49,40,null,null,null,null,9), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,5,2019-10-10 16:17:56,null,-1,-1,null,null,13,75,4), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,7,2019-10-10 16:14:58,null,-1,-1,null,null,70,76,5), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,0,2019-10-10 16:23:38,null,80,64,null,null,null,null,1), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,0,2019-10-10 16:25:25,null,-1,-1,null,null,29,4,9), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,8,2019-10-10 16:26:09,null,-1,-1,18,11,null,null,4), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,8,2019-10-10 16:38:35,null,78,86,null,null,null,null,9), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,4,2019-10-10 16:53:14,null,-1,-1,82,15,null,null,3), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,2,2019-10-10 16:28:02,小龙虾,-1,-1,null,null,null,null,7), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,9,2019-10-10 16:09:44,null,87,36,null,null,null,null,1), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,9,2019-10-10 16:55:39,null,21,65,null,null,null,null,5), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,3,2019-10-10 16:21:02,吸尘器,-1,-1,null,null,null,null,4), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,6,2019-10-10 16:07:20,null,-1,-1,36,85,null,null,1), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,4,2019-10-10 16:58:12,null,3,33,null,null,null,null,0), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,4,2019-10-10 16:46:33,null,-1,-1,null,null,72,76,3), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,7,2019-10-10 16:01:19,null,-1,-1,null,null,19,36,6), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,5,2019-10-10 16:34:21,null,70,66,null,null,null,null,3), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,7,2019-10-10 16:32:33,小龙虾,-1,-1,null,null,null,null,0), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,7,2019-10-10 16:01:54,华为手机,-1,-1,null,null,null,null,6), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,2,2019-10-10 16:56:25,null,-1,-1,79,63,null,null,1), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,5,2019-10-10 16:30:08,null,-1,-1,53,54,null,null,4), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,6,2019-10-10 16:58:02,null,79,72,null,null,null,null,5), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,4,2019-10-10 16:03:30,null,50,3,null,null,null,null,3), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,2,2019-10-10 16:31:45,null,-1,-1,17,7,null,null,3), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,8,2019-10-10 16:29:31,null,-1,-1,40,49,null,null,1), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,9,2019-10-10 16:30:15,null,-1,-1,6,13,null,null,0), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,7,2019-10-10 16:20:40,null,-1,-1,23,74,null,null,5), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,5,2019-10-10 16:22:31,null,-1,-1,51,84,null,null,2), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,1,2019-10-10 16:47:15,null,-1,-1,null,null,36,36,7), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,5,2019-10-10 16:38:55,null,12,35,null,null,null,null,7), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,1,2019-10-10 16:18:31,null,-1,-1,58,70,null,null,4), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,3,2019-10-10 16:17:50,null,72,4,null,null,null,null,1), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,1,2019-10-10 16:32:23,null,27,90,null,null,null,null,3), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,7,2019-10-10 16:37:42,null,-1,-1,null,null,72,62,6), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,1,2019-10-10 16:10:42,null,34,71,null,null,null,null,8), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,4,2019-10-10 16:17:17,null,81,27,null,null,null,null,8), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,4,2019-10-10 16:33:30,null,72,36,null,null,null,null,1), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,9,2019-10-10 16:30:02,null,31,42,null,null,null,null,3), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,4,2019-10-10 16:12:16,null,-1,-1,null,null,49,39,0), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,6,2019-10-10 16:38:13,null,57,28,null,null,null,null,0), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,2,2019-10-10 16:09:08,null,-1,-1,9,6,null,null,5), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,6,2019-10-10 16:17:08,null,-1,-1,45,39,null,null,1), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,5,2019-10-10 16:02:29,null,-1,-1,null,null,53,88,1), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,1,2019-10-10 16:11:26,null,-1,-1,95,54,null,null,2), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,1,2019-10-10 16:35:43,null,62,54,null,null,null,null,9), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,6,2019-10-10 16:31:24,null,-1,-1,76,34,null,null,0), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,6,2019-10-10 16:20:41,Lamer,-1,-1,null,null,null,null,5), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,4,2019-10-10 16:55:03,null,72,79,null,null,null,null,6), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,7,2019-10-10 16:06:03,null,-1,-1,null,null,34,61,7), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,8,2019-10-10 16:01:54,卫生纸,-1,-1,null,null,null,null,8), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,9,2019-10-10 16:39:21,null,-1,-1,null,null,41,73,0), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,9,2019-10-10 16:24:04,null,97,76,null,null,null,null,8), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,2,2019-10-10 16:47:43,苹果,-1,-1,null,null,null,null,3), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,7,2019-10-10 16:03:13,null,20,8,null,null,null,null,2), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,6,2019-10-10 16:23:10,华为手机,-1,-1,null,null,null,null,1), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,8,2019-10-10 16:13:03,null,49,87,null,null,null,null,6), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,1,2019-10-10 16:17:23,null,83,14,null,null,null,null,1), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,3,2019-10-10 16:47:50,null,-1,-1,60,60,null,null,1), UserVisitAction(2019-10-10,97,0dfb1ee48afc4e3cb897e7949acba433,6,2019-10-10 16:30:18,null,-1,-1,null,null,65,21,9))) step3 * Session_Id|Search_Keywords|Click_Categary_Id|Visit_Length|Step_Length|Start_Time (13,sessionid=a300de33934d49c4b02c1525879454bd|searchKeywords=吸尘器,保温杯,Lamer,华为手机,机器学习,洗面奶,小龙虾|clickCategoryIds=26,38,30,17,6,83,40,65,41,4,34,97,41,34,60,82,38,11,79,42,99,35,47,80,1,80,99,64,2|visitLength=3491|stepLength=94|startTime=2019-10-10) (25,sessionid=5ddf7f6d6c9d485db9287b5ef34e077e|searchKeywords=苹果,吸尘器,保温杯|clickCategoryIds=28,59,96,29,91,21|visitLength=3322|stepLength=21|startTime=2019-10-10) (41,sessionid=838220c6e46445d7bad52f18d3171bd8|searchKeywords=吸尘器,苹果,保温杯,洗面奶,机器学习,Lamer,华为手机|clickCategoryIds=29,80,46,79,6,98,80,34,41,30,41,1,34,64,13,98,1|visitLength=3458|stepLength=52|startTime=2019-10-10) step4 聚合信息全部完成 * Session_Id|Search_Keywords|Click_Categary_Id|Visit_Length|Step_Length|Start_Time|Age|Professional|Sex|City (79bfe53461f146cc836f7219351588f0,sessionid=79bfe53461f146cc836f7219351588f0|searchKeywords=卫生纸,联想笔记本,华为手机,小龙虾,洗面奶,吸尘器,苹果,Lamer|clickCategoryIds=8,49,14,13,36,41,27,82,45,47,7,41,32|visitLength=3456|stepLength=67|startTime=2019-10-10|age=22|professional=professional2|sex=male|city=city57) (228cf3d0777749799436de675ff441a4,sessionid=228cf3d0777749799436de675ff441a4|searchKeywords=洗面奶,卫生纸,联想笔记本,苹果,Lamer,机器学习,小龙虾,吸尘器,保温杯,华为手机|clickCategoryIds=15,90,38,45,66,28,12,15,81,58,62,90,94,90,82,99,11,69|visitLength=3432|stepLength=81|startTime=2019-10-10|age=29|professional=professional70|sex=female|city=city95) (bd424c0bdfe64cd3af71de31a90c786b,sessionid=bd424c0bdfe64cd3af71de31a90c786b|searchKeywords=苹果,保温杯,洗面奶,Lamer,小龙虾,华为手机|clickCategoryIds=40,51,92,78,94|visitLength=3253|stepLength=42|startTime=2019-10-10|age=49|professional=professional29|sex=female|city=city34) step5 过滤及累加器更新之后 0000-00-00 00:00:00,740 INFO --- [ Executor task launch worker for task 25] org.apache.spark.storage.ShuffleBlockFetcherIterator (line: 54) : Started 0 remote fetches in 6 ms (4fe6f3eaf9914578bc03308618bdd7a5,sessionid=4fe6f3eaf9914578bc03308618bdd7a5|searchKeywords=苹果,洗面奶,保温杯,机器学习,Lamer,联想笔记本,华为手机,小龙虾|clickCategoryIds=12,92,10,25,75,28,28,18,51,92,39,63,82,70,60,86|visitLength=3391|stepLength=54|startTime=2019-10-10|age=47|professional=professional40|sex=male|city=city28) (fa00c11085ec4e8eb409ddc35e351902,sessionid=fa00c11085ec4e8eb409ddc35e351902|searchKeywords=小龙虾,洗面奶,华为手机,苹果,联想笔记本,保温杯,机器学习|clickCategoryIds=51,2,84,89,97,82,79|visitLength=3400|stepLength=37|startTime=2019-10-10|age=29|professional=professional0|sex=male|city=city92) (8c937c2d61a84937bbd37f03fb401123,sessionid=8c937c2d61a84937bbd37f03fb401123|searchKeywords=苹果,洗面奶,联想笔记本,机器学习,小龙虾,吸尘器,华为手机,Lamer,卫生纸|clickCategoryIds=15,0,38,60,65,7,96,23,13,20,74,69,95,16,9,84,22,33,73,96,70,77|visitLength=3503|stepLength=87|startTime=2019-10-10|age=47|professional=professional40|sex=male|city=city28) (4a7986dbdb step6 计算比率存入数据库 然后commerce数据库中就会有一张表session_stat_ratio_0416 */

SessionAccumulator.scala自定义累加器

import org.apache.spark.util.AccumulatorV2 import scala.collection.mutable /** * SessionAccumulator * 自定义累加器 * SessionStat里使用 */ class SessionAccumulator extends AccumulatorV2[String,mutable.HashMap[String,Int]]{ val countMap = new mutable.HashMap[String,Int]() override def isZero: Boolean = { countMap.isEmpty } override def copy(): AccumulatorV2[String, mutable.HashMap[String, Int]] = { val acc = new SessionAccumulator acc.countMap ++= this.countMap acc } override def reset(): Unit = { countMap.clear() } override def add(v: String): Unit = { if(!this.countMap.contains(v)){ this.countMap += (v->0) } this.countMap.update(v,countMap(v)+1) } override def merge(other: AccumulatorV2[String, mutable.HashMap[String, Int]]): Unit = { //最核心的部分 实现两个map的整合 //(0 /: (1 to 100))(_+_) //(0 /: (1 to 100)){case(int1,!)=> int1+int2} //(1 /: 100).foldlest(0) //(this.countMap /: acc.countMap) other match { case acc:SessionAccumulator=>acc.countMap.foldLeft(this.countMap){ case(map,(k,v)) => map += (k->(map.getOrElse(k,0) + v)) } } } override def value: mutable.HashMap[String, Int] = { this.countMap } }

SessionAggrStat.scala 样例类

/** * SessionAggrStat * 样例类 * @param taskUUID * @param session_count * @param visit_length_1s_3s_ratio * @param visit_length_4s_6s_ratio * @param visit_length_7s_9s_ratio * @param visit_length_10s_30s_ratio * @param visit_length_30s_60s_ratio * @param visit_length_1m_3m_ratio * @param visit_length_3m_10m_ratio * @param visit_length_10m_30m_ratio * @param visit_length_30m_ratio * @param step_length_1_3_ratio * @param step_length_4_6_ratio * @param step_length_7_9_ratio * @param step_length_10_30_ratio * @param step_length_30_60_ratio * @param step_legth_60_ratio */ case class SessionAggrStat( taskUUID: String, session_count:Long, visit_length_1s_3s_ratio:Double, visit_length_4s_6s_ratio:Double, visit_length_7s_9s_ratio:Double, visit_length_10s_30s_ratio:Double, visit_length_30s_60s_ratio:Double, visit_length_1m_3m_ratio:Double, visit_length_3m_10m_ratio:Double, visit_length_10m_30m_ratio:Double, visit_length_30m_ratio:Double, step_length_1_3_ratio:Double, step_length_4_6_ratio:Double, step_length_7_9_ratio:Double, step_length_10_30_ratio:Double, step_length_30_60_ratio:Double, step_legth_60_ratio:Double )

注:参考网络资源,自学当笔记使用。

静悄悄 乱纷纷 都输给了时间 却没有辜负青春   他诚恳   才不让你等   你失落了黄昏   却换来平静夜深   众里寻人   错爱只是为真爱作证   所谓魔鬼留下的伤痕   都是天使的指纹   灯火阑珊   何必急于看到那个人   能睡得安稳都只因为   那盏还没开的灯      亮晶晶 黑沉沉   开了窗 关上门   谁给了你寂寞   寂寞还给你新生   谁的吻   都值得感恩   泪淋熄了欲望   笑却雕琢了皱纹   众里寻人 错爱只是为真爱作证   所谓魔鬼留下的伤痕   都是天使的指纹   灯火阑珊   何必急于看到那个人   能睡的安稳   都只因为   那盏还没开的灯   最初总坚持自以为是的缘分   最后才顺其自然看花开无声   离开你那个人   同时释放了你   你为何不转身   众里寻人   错爱只是为真爱作证   每次告别留下的伤痕   都是天使的指纹   灯火阑珊   你急着要看到那个人   他也在寻找你的身影   你也让别人在等      天使的指纹   孙燕姿
最新回复(0)