json string hive襦 譯朱朱覺る.
[{"product_id":220003038067,"timestamps":"1340321132000"},{"product_id":300003861266,"timestamps":"1340271857000"}]
~. 襷 蟇一螳.
get_json_object, json_tuple企朱 螳 讌襷, 螳 json る蠍 れ.
蟲蠍 谿場覲企 UDF襦 願屋螻 朱, れ UDF襯 谿場.
jar 殊 . 覃企朱 觜襯 伎 , 覃企 る 覲語 覯 .
.
WinSCP襯 伎 hive螳 願 覯 JAR 2螳襯 /usr/hdp/current/hive-client/lib/ 覲旧 hive CLI れ 覈
add jar /usr/hdp/current/hive-client/lib/JsonSplit-1.3-SNAPSHOT.jar;
create function json_split as 'com.pythian.hive.udf.JsonSplitUDF';
create function json_map as 'com.pythian.hive.udf.JsonMapUDF';
json test
create table json_test(json_string string);
insert into json_test values('[{"product_id":220003038067,"timestamps":"1340321132000"},{"product_id":300003861266,"timestamps":"1340271857000"}]');
set hive.cli.print.header=true;
select
b.col.row_id
, b.col.json_string
, c.*
from json_test a
lateral view explode(json_split(json_string)) b
lateral view json_tuple(b.col.json_string, 'product_id', 'timestamps') c as product_id, timestamps;
[root@sandbox ~]# hive
log4j:WARN No such property [maxFileSize] in org.apache.log4j.DailyRollingFileAppender.
Logging initialized using configuration in file:/etc/hive/2.6.1.0-129/0/hive-log4j.properties
hive> set hive.cli.print.header=true;
hive> select
> b.col.row_id
> , b.col.json_string
> , c.*
> from json_test a
> lateral view outer explode(json_split(json_string)) b
> lateral view json_tuple(b.col.json_string, 'product_id', 'timestamps') c as product_id, timestamps;
OK
row_id json_string c.product_id c.timestamps
0 {"product_id":220003038067,"timestamps":"1340321132000"} 220003038067 1340321132000
1 {"product_id":300003861266,"timestamps":"1340271857000"} 300003861266 1340271857000
Time taken: 2.067 seconds, Fetched: 2 row(s)
hive>
UDF覲企 譬 襭 谿場 覲伎朱 UDF襷 蟆 る 蟆磯 視 5螳 蟇碁碁.
語 企襷 add jar /usr/hdp/current/hive-client/lib/JsonSplit-1.3-SNAPSHOT.jar;襯 伎狩蟾 permanent function 襷 蟆 譬蟆.
hdfs jar 殊 襴螻
hadoop fs -put JsonSplit-1.3-SNAPSHOT.jar ;
hive ..
create function json_split as 'com.pythian.hive.udf.JsonSplitUDF' using JAR 'hdfs://localhost:8020/user/cloudera/JsonSplit-1.3-SNAPSHOT.jar';
create function json_map as 'com.pythian.hive.udf.JsonMapUDF' using JAR 'hdfs://localhost:8020/user/cloudera/JsonSplit-1.3-SNAPSHOT.jar';
os ..
sudo service hive-server2 restart
json_map 螳 襾豪.
--願唄 sql server openjson螻
select explode(json_map(json_string))
from json_test
select json_map(b.col.json_string)['product_id']
from json_test a
lateral view outer explode(json_split(json_string)) b
るジ 覦覯 #
願 觜襯企. 讌襷 蠏谿.
select b.*
from json_test a
lateral view outer posexplode(split(regexp_replace(substr(json_string, 2, length(json_string)-2), '},\\{', '},,,,{'), ',,,,')) b
select b.*
from json_test a
lateral view explode(split(regexp_replace(regexp_extract(a.json_string,'^\\[(.+)\\]$',1),'\\}\\,\\{', '\\}\\|\\|\\{'),'\\|\\|')) b