【hive建表】
create table CharacterLogin( AppID string, GameID string, ChildId string, IP string, ServerID string, AccountID string, CharacterID string, LogType string, LogTime int, PlatformChannelId string, IsLogin int, OnlineTime int, Level int, VIPLevel int ) partitioned by(year string,month string,day string) row format delimited fields terminated by '|';将数据根据时间分区导入Hive,分区格式为,年/月/日(将文件用xshell上传到linux过程略)
shell脚本如下
path="/home/li/Desktop/CharacterLogin" files=$(ls $path) for filename in $files do filename=${filename%.*} var1=`echo "$filename"|awk -F '-' '{print $1}'` var2=`echo "$filename"|awk -F '-' '{print $2}'` var3=`echo "$filename"|awk -F '-' '{print $3}'` $HIVE_HOME/bin/hive -e "load data local inpath '$path/$filename.txt' into table bigtrouble.CharacterLogin partition (year='$var1',month='$var2',day='$var3');" done【查询思路】
新建HIve数据库为统计库,新建一张表为统计结果表,表中两个字段date,count
查询已分区的角色登录登出表,根据用户ID去重,统计单张表的ID总数,取单表的日期和ID总数插入统计表
shell脚本如下
path="/home/li/Desktop/CharacterLogin" files=$(ls $path) for filename in $files do filename=${filename%.*} var1=`echo "$filename"|awk -F '-' '{print $1}'` var2=`echo "$filename"|awk -F '-' '{print $2}'` var3=`echo "$filename"|awk -F '-' '{print $3}'` count=`$HIVE_HOME/bin/hive -e "select count(distinct AccountID) from bigtrouble.CharacterLogin where year='$var1' and month='$var2' and day='$var3';"` #不打印查询字段名 $HIVE_HOME/bin/hive -e "set hive.cli.print.header=false;" $HIVE_HOME/bin/hive -e "insert into table day01.count1 values('$var1/$var2/$var3','$count');" done【坑】
注意把Hsql语句执行的结果赋值给shell变量时,那个符号不是单引号
