2019年安徽省大数据网络赛数据预处理(二)

mac2022-06-30  20

数据

{"common":{"locationcity":0,"uid":"188495963831271424","uaid":"0","platform":"Android","app_version":"1007090002","net":"WIFI","pid":"5057","identifier":"869121033612809","cityid":"2503","iccid":"89860077221897301901","snsid":"","ts":"1557276436920","versionType":"1","pkg":"com.moji.mjweather"},"event":{"key":"NEWLIVEVIEW_QUIT_TAB","value":"0","du":""}} {"common":{"locationcity":0,"uid":"188495963831271424","uaid":"0","platform":"Android","app_version":"1007090002","net":"WIFI","pid":"5057","identifier":"869121033612809","cityid":"2503","iccid":"89860077221897301901","snsid":"","ts":"1557276436923","versionType":"1","pkg":"com.moji.mjweather"},"event":{"key":"EVENT_ZIP_UPLOAD","value":"1","du":""},"properties":{"property1":"1"}} {"common":{"locationcity":0,"uid":"188495963831271424","uaid":"0","platform":"Android","app_version":"1007090002","net":"WIFI","pid":"5057","identifier":"869121033612809","cityid":"2503","iccid":"89860077221897301901","snsid":"","ts":"1557276844841","versionType":"1","pkg":"com.moji.mjweather"},"event":{"key":"AMAP_LOCATION_UPDATE","value":"0","du":"446"},"properties":{"property1":"0","property3":"1"}} {"common":{"locationcity":0,"uid":"188495963831271424","uaid":"0","platform":"Android","app_version":"1007090002","net":"WIFI","pid":"5057","identifier":"869121033612809","cityid":"2503","iccid":"89860077221897301901","snsid":"","ts":"1557276844865","versionType":"1","pkg":"com.moji.mjweather"},"event":{"key":"HTTP_START","value":"http:\/\/weather.api.moji.com\/data\/detail","du":""},"properties":{"property1":"1","property2":"0","property3":"{\"common\":{\"platform\":\"Android\",\"identifier\":\"869121033612809\",\"app_version\":\"1007090002\",\"os_version\":\"23\",\"device\":\"MYA-AL10\",\"pid\":\"5057\",\"language\":\"CN\",\"uid\":\"188495963831271424\",\"uaid\":\"0\",\"width\":720,\"height\":1192,\"package_name\":\"com.moji.mjweather\",\"amp\":\"1557276844828\",\"locationcity\":0,\"current_city\":2503,\"token\":\"ac96b2c49daaeb0e8fdc9671ede79022\"},\"params\":{\"city\":[{\"avatarId\":8,\"type\":1,\"lat\":31.28037,\"lon\":104.452387,\"coordinate\":2,\"location\":\"四川省德阳市罗江区G5京昆高速靠近侯家湾\",\"voice\":{\"lang\":\"CN\",\"tu\":\"c\",\"wu\":\"beau\"},\"cr\":1}]}}","property4":"1557276844829-f0fceefb1c2f4ef6a1fc271ed97a9bdf-188495963831271424"}} {"common":{"locationcity":0,"uid":"188495963831271424","uaid":"0","platform":"Android","app_version":"1007090002","net":"WIFI","pid":"5057","identifier":"869121033612809","cityid":"2503","iccid":"89860077221897301901","snsid":"","ts":"1557276845076","versionType":"1","pkg":"com.moji.mjweather"},"event":{"key":"HTTP_UPDATE","value":"http:\/\/weather.api.moji.com\/data\/detail","du":"243"},"properties":{"property1":0,"property4":"1","property5":"1557276844829-f0fceefb1c2f4ef6a1fc271ed97a9bdf-188495963831271424","property6":"weather.api.moji.com\/111.13.70.18:80"}} {"common":{"locationcity":0,"uid":"188495963831271424","uaid":"0","platform":"Android","app_version":"1007090002","net":"WIFI","pid":"5057","identifier":"869121033612809","cityid":"2503","iccid":"89860077221897301901","snsid":"","ts":"1557276845226","versionType":"1","pkg":"com.moji.mjweather"},"event":{"key":"WEATHER_UPDATE","value":"1","du":"327"},"properties":{"property1":1,"property2":-1,"property3":"http:\/\/weather.api.moji.com\/data\/detail","property4":"2","property5":"RequestParams:[city=[{\"avatarId\":8,\"type\":1,\"lat\":31.28037,\"lon\":104.452387,\"coordinate\":2,\"location\":\"四川省德阳市罗江区G5京昆高速靠近侯家湾\",\"voice\":{\"lang\":\"CN\",\"tu\":\"c\",\"wu\":\"beau\"},\"cr\":1}]], commonParams:{\"platform\":\"Android\",\"identifier\":\"869121033612809\",\"app_version\":\"1007090002\",\"os_version\":\"23\",\"device\":\"MYA-AL10\",\"pid\":\"5057\",\"language\":\"CN\",\"uid\":\"188495963831271424\",\"uaid\":\"0\",\"width\":720,\"height\":1192,\"package_name\":\"com.moji.mjweather\",\"amp\":\"1557276844828\",\"locationcity\":0,\"current_city\":2503,\"token\":\"ac96b2c49daaeb0e8fdc9671ede79022\"}","property6":"1557276844829-f0fceefb1c2f4ef6a1fc271ed97a9bdf-188495963831271424"}} {"common":{"locationcity":0,"uid":"188495963831271424","uaid":"0","platform":"Android","app_version":"1007090002","net":"WIFI","pid":"5057","identifier":"869121033612809","cityid":"2503","iccid":"89860077221897301901","snsid":"","ts":"1557276845304","versionType":"1","pkg":"com.moji.mjweather"},"event":{"key":"SHOWER_CONDITION_CONSIS_MONITOR","value":"1","du":""},"properties":{"property1":0,"property2":0,"property3":"31.28037,104.452387"}} {"common":{"locationcity":0,"uid":"188495963831271424","uaid":"0","platform":"Android","app_version":"1007090002","net":"WIFI","pid":"5057","identifier":"869121033612809","cityid":"2503","iccid":"89860077221897301901","snsid":"","ts":"1557276845312","versionType":"1","pkg":"com.moji.mjweather"},"event":{"key":"LOCATION_UPDATE","value":"0","du":"1096"},"properties":{"property1":"0","property3":"1"}}

题目要求

将原始数据中用户的"uid",“platform”,“app_version”,“pid”,"cityid"五个字段和其对应的值提取出来。

代码

package com.mr2; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class preTwo { public static class MyMapper extends Mapper<LongWritable,Text,Text,NullWritable> { /* * 数据 * {"common":{"locationcity":0,"uid":"188495963831271424","uaid":"0","platform":"Android", * "app_version":"1007090002","net":"WIFI","pid":"5057","identifier":"869121033612809","cityid":"2503", * "iccid":"89860077221897301901","snsid":"","ts":"1557282063721","versionType":"1","pkg":"com.moji.mjweather"},"event":{"key":"SHOWER_CONDITION_CONSIS_MONITOR","value":"1","du":""},"properties":{"property1":0,"property2":0,"property3":"31.280233,104.452469"}} */ protected void map(LongWritable key,Text value,Context context) throws IOException,InterruptedException { StringBuffer k = new StringBuffer(); String s = String.valueOf(value); //将原始数据进行切分 String[] split = s.split(","); for(int i=0;i<split.length;i++) { //利用条件"uid\":"等可以过滤数据commonParams:{\"platform\":\"Android\",\"identifier\":\"869121033612809\",\"app_version\":\"1007090002\",\"os_version\":\"23\",\"device\":\"MYA-AL10\",\"pid\":\"5057\",\"language\":\"CN\",\"uid\": if(split[i].contains("uid\":")||split[i].contains("platform\":")||split[i].contains("app_version\":")||split[i].contains("pid\":")||split[i].contains("cityid\":")) { //利用这个if()过滤掉数据{"common":{"uid":"417705234" "platform":"Android" "app_version":"1001010000" "pid":"4025" "cityid":"" if(split[i].contains("uid\":")) { int m = split[i].indexOf("uid"); //从"uid"开始截取split[i]而不是从{"common": k.append(split[i].substring(m-1)+" "); } else { k.append(split[i]+" "); } } } String k1 = k.substring(0,k.length()-1); context.write(new Text(k1), NullWritable.get()); } } public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { // TODO Auto-generated method stub Configuration conf = new Configuration(); Job job = Job.getInstance(conf,preTwo.class.getSimpleName()); job.setJarByClass(preTwo.class); job.setMapperClass(MyMapper.class); job.setReducerClass(Reducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); FileInputFormat.addInputPath(job,new Path(args[0])); FileOutputFormat.setOutputPath(job,new Path(args[1])); job.waitForCompletion(true); } }

结果

最新回复(0)