第2节 网站点击流项目(下):5、访客分析

mac2022-06-30  62

-- 独立访客--需求:按照时间维度来统计独立访客及其产生的pv量

按照时间维度比如小时来统计独立访客及其产生的 pv 。

时间维度:时drop table dw_user_dstc_ip_h;create table dw_user_dstc_ip_h(remote_addr string,pvs bigint,hour string);

insert into table dw_user_dstc_ip_h select remote_addr,count(1) as pvs,concat(month,day,hour) as hour from ods_weblog_detailWhere datestr='20130918'group by concat(month,day,hour),remote_addr;

--在上述基础之上,可以继续分析,比如每小时独立访客总数select count(1) as dstc_ip_cnts,hour from dw_user_dstc_ip_h group by hour;

时间维度:日select remote_addr,count(1) as counts,concat(month,day) as dayfrom ods_weblog_detailWhere datestr='20130918'group by concat(month,day),remote_addr;

时间维度: 月select remote_addr,count(1) as counts,month from ods_weblog_detailgroup by month,remote_addr;

------------------------------------------------------------------------------------------ 每日新访客-- 需求:将每天的新访客统计出来。

--历史去重访客累积表drop table dw_user_dsct_history;create table dw_user_dsct_history(day string,ip string)partitioned by(datestr string);

--每日新访客表drop table dw_user_new_d;create table dw_user_new_d (day string,ip string) partitioned by(datestr string);

 

select a.remote_addr ,a.dayfrom (select remote_addr,'20130918' as day from ods_weblog_detail newIpwhere datestr ='20130918'group by remote_addr) a left join dw_user_dsct_history histon a.remote_addr = hist.ipwhere hist.ip is null;

 

--每日新用户插入新访客表(写的太麻烦,有空优化之)insert into table dw_user_new_d partition(datestr='20130918')select tmp.day as day,tmp.today_addr as new_ip from(select today.day as day,today.remote_addr as today_addr,old.ip as old_addr from (select distinct remote_addr as remote_addr,"20130918" as day from ods_weblog_detail where datestr="20130918") todayleft outer join dw_user_dsct_history oldon today.remote_addr=old.ip) tmpwhere tmp.old_addr is null;

--每日新用户追加到历史累计表insert into table dw_user_dsct_history partition(datestr='20130918')select day,ip from dw_user_new_d where datestr='20130918';

验证:select count(distinct remote_addr) from ods_weblog_detail;

select count(1) from dw_user_dsct_history where datestr='20130918';

select count(1) from dw_user_new_d where datestr='20130918';

 

转载于:https://www.cnblogs.com/mediocreWorld/p/11107461.html

相关资源:JAVA上百实例源码以及开源项目
最新回复(0)