用于处理存储在Hadoop集群中的大量数据的大规模并行处理sql查询引擎,提供高性能和低延迟,提供了访问存储在Hadoop分布式文件系统中的数据的最快方法。
http://www.dba.cn/book/impala/IMPALAJiaoCheng/IMPALAHuanJing.html
使用Impala-shell进入命令行界面
create database if not exists databaseName;
删除数据库drop database if exists databaseName;
选择数据库use databaseName;
显示所有数据库show databases;
create table if not exists databaseName.tableName ( column1 dataType, column2 dataType, column3 dataType, … columnN dataType );
插入记录insert into tableName (column1, column2, column3, …columnN) values (value1, value2, value3, …valueN);
查询记录select column1, column2, columnN from tableName;
查看表的信息describe tableName;
更改表名alter table [oldDBName.]oldTableName rename to [newDBName.]newTableName;
表中添加列alter table tableName add columns ( column1 dataType, column2 dataType, … columnN dataType );
表中删除列alter table tableName drop columnName;
更改表中列的名称和类型alter table tableName change oldColumnName newColumnName newDataType;
删除表drop table if exists databaseName.tableName;
截断表(删除所有表数据)truncate tableName;
显示所有表show tables;
create view if not exists viewName as select statement;
更改视图alter view dataBaseName.viewName as select statement;
删除视图drop view dataBaseName.viewName;
select * from tableName order by colName [ASC|DESC] [NULLS FIRST|NULLS LAST];
组查询select data from tableName group by colName;
过滤查询(having常与group by一起使用)select data from tableName group by colName having 过滤条件;
结果集行数限制(numExpression为限制行数)select * from tableName limit numExpression;
结果集偏移(offset=3,即结果集从第3行开始输出)select * from tableName limit 5 offset 3;
结果集组合query1 union query2;
结果集定义别名(将select 1的结果集定义为别名x)with x as (select 1)
去除重复记录select distinct data from tableName;