impala与hbase整合

张映 发表于 2020-01-08

分类目录: hadoop/spark/scala

标签:, ,

1,创建hbase表

[root@bigserver2 ~]# hbase shell
HBase Shell
Use "help" to get list of supported commands.
Use "exit" to quit this interactive shell.
For Reference, please visit: http://hbase.apache.org/2.0/book.html#shell
Version 2.1.0-cdh6.3.1, rUnknown, Thu Sep 26 02:56:37 PDT 2019
Took 0.0015 seconds    

hbase(main):002:0> create_namespace 'tank_ns' //创建命名空间
Took 1.0714 seconds

hbase(main):004:0> create 'tank_ns:user','login','info'  //创建列族
Created table tank_ns:user
Took 4.5354 seconds
=> Hbase::Table - tank_ns:user

hbase(main):005:0> list_namespace_tables 'tank_ns'
TABLE
user
1 row(s)
Took 0.0832 seconds
=> ["user"]

//插入数据
hbase(main):023:0> put 'tank_ns:user','1000120190925','login:id','1';
hbase(main):024:0* put 'tank_ns:user','1000120190925','login:username','Tank';
hbase(main):025:0* put 'tank_ns:user','1000120190925','info:sex','man';
hbase(main):026:0* scan 'tank_ns:user'  //扫描全表
Took 0.0686 seconds
Took 0.0081 seconds
Took 0.0076 seconds
ROW        								COLUMN+CELL
 1000120190925                          column=info:sex, timestamp=1578455088679, value=man
 1000120190925                          column=login:id, timestamp=1578455088617, value=1
 1000120190925                          column=login:username, timestamp=1578455088670, value=Tank
1 row(s)
Took 0.0733 seconds

2,创建hive外部表,映射hbase表

[root@bigserver2 ~]# hive

hive> show databases; //显示数据库
OK
default
parquet_test
tanktest
Time taken: 1.763 seconds, Fetched: 3 row(s)

hive> use parquet_test; //切换数据库
OK
Time taken: 0.094 seconds

//创建外部表
hive> create external table hive_hbase_test(key bigint,id bigint,name string,sex string)
    > STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
    > WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,login:id,login:username,info:sex")
    > TBLPROPERTIES ("hbase.table.name" = "tank_ns:user");
OK
Time taken: 2.066 seconds

hive> desc formatted hive_hbase_test;  //查看表
OK
# col_name            	data_type           	comment             

key                 	bigint
id                  	bigint
name                	string
sex                 	string              	                    

# Detailed Table Information
Database:           	parquet_test
OwnerType:          	USER
Owner:              	root
CreateTime:         	Wed Jan 08 11:48:55 CST 2020
LastAccessTime:     	UNKNOWN
Retention:          	0
Location:           	hdfs://bigdata1/home/cdh6/hive/warehouse/parquet_test.db/hive_hbase_test
Table Type:         	EXTERNAL_TABLE
Table Parameters:
	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\"}
	EXTERNAL            	TRUE
	hbase.table.name    	tank_ns:user
	numFiles            	0
	numRows             	0
	rawDataSize         	0
	storage_handler     	org.apache.hadoop.hive.hbase.HBaseStorageHandler
	totalSize           	0
	transient_lastDdlTime	1578455335          

# Storage Information
SerDe Library:      	org.apache.hadoop.hive.hbase.HBaseSerDe
InputFormat:        	null
OutputFormat:       	null
Compressed:         	No
Num Buckets:        	-1
Bucket Columns:     	[]
Sort Columns:       	[]
Storage Desc Params:
	hbase.columns.mapping	:key,login:id,login:username,info:sex
	serialization.format	1
Time taken: 0.615 seconds, Fetched: 38 row(s)

3,同步metadata,并测试

[root@bigserver2 ~]# impala-shell

[bigserver2:21000] default> use parquet_test; //切换数据库
Query: use parquet_test
[bigserver2:21000] parquet_test> show tables; //显示表
Query: show tables
+-----------------------+
| name                  |
+-----------------------+
| tank_test_1           |
| tank_test_2           |
| tank_test_3           |
| tank_test_4           |
| tank_test_5           |
| tank_test_6           |
| toutiao_creative      |
| toutiao_creative_test |
+-----------------------+
Fetched 9 row(s) in 0.01s

[bigserver2:21000] parquet_test> invalidate metadata;  //同步数据
Query: invalidate metadata
Query submitted at: 2020-01-08 11:53:58 (Coordinator: http://bigserver2:25000)
Query progress can be monitored at: http://bigserver2:25000/query_plan?query_id=4d48b720aecf70c4:f243892e00000000
Fetched 0 row(s) in 3.77s

[bigserver2:21000] parquet_test> show tables;
Query: show tables
+-----------------------+
| name                  |
+-----------------------+
| hive_hbase_test       |  //表出来了
| tank_test_1           |
| tank_test_2           |
| tank_test_3           |
| tank_test_4           |
| tank_test_5           |
| tank_test_6           |
| toutiao_creative      |
| toutiao_creative_test |
+-----------------------+
Fetched 9 row(s) in 0.01s

[bigserver2:21000] parquet_test> desc formatted hive_hbase_test;  //impala 查看 表结构
Query: describe formatted hive_hbase_test
+------------------------------+--------------------------------------------------------------------------+--------------------------------------------------+
| name                         | type                                                                     | comment                                          |
+------------------------------+--------------------------------------------------------------------------+--------------------------------------------------+
| # col_name                   | data_type                                                                | comment                                          |
|                              | NULL                                                                     | NULL                                             |
| sex                          | string                                                                   | NULL                                             |
| id                           | bigint                                                                   | NULL                                             |
| name                         | string                                                                   | NULL                                             |
|                              | NULL                                                                     | NULL                                             |
| # Partition Information      | NULL                                                                     | NULL                                             |
| # col_name                   | data_type                                                                | comment                                          |
|                              | NULL                                                                     | NULL                                             |
| key                          | bigint                                                                   | NULL                                             |
|                              | NULL                                                                     | NULL                                             |
| # Detailed Table Information | NULL                                                                     | NULL                                             |
| Database:                    | parquet_test                                                             | NULL                                             |
| OwnerType:                   | USER                                                                     | NULL                                             |
| Owner:                       | root                                                                     | NULL                                             |
| CreateTime:                  | Wed Jan 08 11:48:55 CST 2020                                             | NULL                                             |
| LastAccessTime:              | UNKNOWN                                                                  | NULL                                             |
| Retention:                   | 0                                                                        | NULL                                             |
| Location:                    | hdfs://bigdata1/home/cdh6/hive/warehouse/parquet_test.db/hive_hbase_test | NULL                                             |
| Table Type:                  | EXTERNAL_TABLE                                                           | NULL                                             |
| Table Parameters:            | NULL                                                                     | NULL                                             |
|                              | COLUMN_STATS_ACCURATE                                                    | {\"BASIC_STATS\":\"true\"}                       |
|                              | EXTERNAL                                                                 | TRUE                                             |
|                              | hbase.table.name                                                         | tank_ns:user                                     |
|                              | numFiles                                                                 | 0                                                |
|                              | numRows                                                                  | 0                                                |
|                              | rawDataSize                                                              | 0                                                |
|                              | storage_handler                                                          | org.apache.hadoop.hive.hbase.HBaseStorageHandler |
|                              | totalSize                                                                | 0                                                |
|                              | transient_lastDdlTime                                                    | 1578455335                                       |
|                              | NULL                                                                     | NULL                                             |
| # Storage Information        | NULL                                                                     | NULL                                             |
| SerDe Library:               | org.apache.hadoop.hive.hbase.HBaseSerDe                                  | NULL                                             |
| InputFormat:                 | null                                                                     | NULL                                             |
| OutputFormat:                | null                                                                     | NULL                                             |
| Compressed:                  | No                                                                       | NULL                                             |
| Num Buckets:                 | -1                                                                       | NULL                                             |
| Bucket Columns:              | []                                                                       | NULL                                             |
| Sort Columns:                | []                                                                       | NULL                                             |
| Storage Desc Params:         | NULL                                                                     | NULL                                             |
|                              | hbase.columns.mapping                                                    | :key,login:id,login:username,info:sex            |
|                              | serialization.format                                                     | 1                                                |
+------------------------------+--------------------------------------------------------------------------+--------------------------------------------------+

[bigserver2:21000] parquet_test> select * from hive_hbase_test; //取数据
Query: select * from hive_hbase_test
Query submitted at: 2020-01-08 11:59:22 (Coordinator: http://bigserver2:25000)
Query progress can be monitored at: http://bigserver2:25000/query_plan?query_id=5a4100f5e64eb9c6:47cae39a00000000
+---------------+-----+----+------+
| key           | sex | id | name |
+---------------+-----+----+------+
| 1000120190925 | man | 1  | Tank |
| 121           | men | 1  | Tank |
+---------------+-----+----+------+
Fetched 2 row(s) in 0.18s

impala根hbase整合后,不支持upsert,update,delete。

[bigserver2:21000] parquet_test> upsert into hive_hbase_test values (121,'men',1,'Tank');
Query: upsert into hive_hbase_test values (121,'men',1,'Tank')
Query submitted at: 2020-01-08 11:58:28 (Coordinator: http://bigserver2:25000)
ERROR: AnalysisException: UPSERT is only supported for Kudu tables

[bigserver2:21000] parquet_test> update hive_hbase_test set name='tankzhang' where key=121;
Query: update hive_hbase_test set name='tankzhang' where key=121
Query submitted at: 2020-01-08 12:00:31 (Coordinator: http://bigserver2:25000)
ERROR: AnalysisException: Impala does not support modifying a non-Kudu table: parquet_test.hive_hbase_test

[bigserver2:21000] parquet_test> delete from hive_hbase_test where key=121;
Query: delete from hive_hbase_test where key=121
Query submitted at: 2020-01-08 12:01:53 (Coordinator: http://bigserver2:25000)
ERROR: AnalysisException: Impala does not support modifying a non-Kudu table: parquet_test.hive_hbase_test

但是插入可以实现update的功能:

[bigserver2:21000] parquet_test> insert into hive_hbase_test values (123,'men',1,'Tank'); //插入数据
Query: insert into hive_hbase_test values (123,'men',1,'Tank')
Query submitted at: 2020-01-08 11:59:16 (Coordinator: http://bigserver2:25000)
Query progress can be monitored at: http://bigserver2:25000/query_plan?query_id=ed4aed94961692f4:665a077d00000000
Modified 1 row(s) in 0.22s

[bigserver2:21000] parquet_test> select * from hive_hbase_test;
Query: select * from hive_hbase_test
Query submitted at: 2020-01-08 14:50:29 (Coordinator: http://bigserver2:25000)
Query progress can be monitored at: http://bigserver2:25000/query_plan?query_id=5545e4ffd5bb3ae4:aad8149c00000000
+---------------+------+------+------+
| key           | sex  | id   | name |
+---------------+------+------+------+
| 1000120190925 | man  | 1    | Tank |
| 121           | men  | 1    | Tank |
| 123           | men  | 1    | Tank |
+---------------+------+------+------+
Fetched 3 row(s) in 0.18s

[bigserver2:21000] parquet_test> insert into hive_hbase_test values (123,'men',2,'Tank');  //更新数据
Query: insert into hive_hbase_test values (123,'men',2,'Tank')
Query submitted at: 2020-01-08 14:50:55 (Coordinator: http://bigserver2:25000)
Query progress can be monitored at: http://bigserver2:25000/query_plan?query_id=af496361f93665bc:6b84d98d00000000
Modified 1 row(s) in 0.12s

[bigserver2:21000] parquet_test> select * from hive_hbase_test;
Query: select * from hive_hbase_test
Query submitted at: 2020-01-08 14:51:02 (Coordinator: http://bigserver2:25000)
Query progress can be monitored at: http://bigserver2:25000/query_plan?query_id=774ac1b309cce732:6f42953400000000
+---------------+------+------+------+
| key           | sex  | id   | name |
+---------------+------+------+------+
| 1000120190925 | man  | 1    | Tank |
| 121           | men  | 1    | Tank |
| 123           | men  | 2    | Tank |
+---------------+------+------+------+
Fetched 3 row(s) in 0.20s

 



转载请注明
作者:海底苍鹰
地址:http://blog.51yip.com/hadoop/2347.html