_覓 | 覦覈襦 | 豕蠏手 | 殊螳 | 譯殊碁 |
FrontPage › Impala
|
|
[edit]
1 get_json_object - java #json 讌 り 襷蠍磯 譬 蠏碁..
udf襯 襷れ 磯 . impala..
企蟆 覃
create function get_json_object(string, string) returns string location '/tmp/hive-exec-1.1.0-cdh5.12.0.jar' symbol='org.apache.hadoop.hive.ql.udf.UDFJson'; 企蟆 覃 蟲
create function get_json_object location '/tmp/hive-exec-1.1.0-cdh5.12.0.jar' symbol='org.apache.hadoop.hive.ql.udf.UDFJson';: 焔 る 企瓦. 襴り . create table test1(payload string); insert into test1 values('{"product_id":220003038067,"timestamps":"1340321132000"}'); select payload , get_json_object(payload, '$.product_id') product_id from test1; 蟆郁骸
[quickstart.cloudera:21000] > select > payload > , get_json_object(payload, '$.product_id') product_id > from test1; Query: select payload , get_json_object(payload, '$.product_id') product_id from test1 Query submitted at: 2018-02-28 07:32:40 (Coordinator: http://quickstart.cloudera:25000) Query progress can be monitored at: http://quickstart.cloudera:25000/query_plan?query_id=b04f737db6c69282:5e9d56c300000000 +----------------------------------------------------------+--------------+ | payload | product_id | +----------------------------------------------------------+--------------+ | {"product_id":220003038067,"timestamps":"1340321132000"} | 220003038067 | +----------------------------------------------------------+--------------+ Fetched 1 row(s) in 0.30s [quickstart.cloudera:21000] > [edit]
2 get_json_object - c++ #https://github.com/cloudera/impala-udf-samples
root 襦 sudo yum install gcc-c++ cmake boost-devel sudo yum install impala-udf-devel mkdir json_udf cd json_udf cmake /root/impala-get-json-object-udf-master/ make cd build mv libjsonUdf.so /home/hdfs/libjsonUdf.so cd /home/hdfs chown hdfs:hdfs libjsonUdf.so su hdfs hadoop fs -mkdir /user/hive/udfs hadoop fs -put -f libjsonUdf.so /user/hive/udfs create function lostark.get_json_object2(string, string) returns string location '/user/hive/udfs/libjsonUdf.so' symbol='JsonGetObject';覯蠏瑚 . 螳 貎朱Μ襯
[edit]
3 json split #覦一伎 蠍語企ゼ 企 所 蟒(?)襯 願屋. 蠍磯るΜる慨覃 螳 udf 襷り讌 覘..
test 企 .
drop table test; create table test(id int, payload string); insert into test values(1, '[{"key1":1,"key2":"aaa"},{"key1":2,"key2":"bbb"}]'); insert into test values(2, '[{"key1":3,"key2":"ccc"},{"key1":4,"key2":"ddd"}]'); dummy 企 .
覓朱 dummy 企 1,2,3, ..... 10000 蟾讌 . 蠍一 蠏谿朱 2螳襷.. create table dummy(seq int); insert into dummy values (1),(2); udf螳 .
create function get_json_object(string, string) returns string location '/tmp/hive-exec-1.1.0-cdh5.12.0.jar' symbol='org.apache.hadoop.hive.ql.udf.UDFJson'; れ螻 螳 覦一 蠍語企ゼ 碁.
select a.id , a.payload , (length(payload) - length(regexp_replace(payload, '\}\\s*,\\s*\{', '')))/3 + 1 array_size , b.seq from test a inner join dummy b on b.seq <= (length(a.payload) - length(regexp_replace(a.payload, '\}\\s*,\\s*\{', '')))/3 + 1 where b.seq <= 100 --願姥 蠎 . ;2018-11-10 れ
+----+---------------------------------------------------+------------+-----+ | id | payload | array_size | seq | +----+---------------------------------------------------+------------+-----+ | 1 | [{"key1":1,"key2":"aaa"},{"key1":2,"key2":"bbb"}] | 2 | 1 | | 1 | [{"key1":1,"key2":"aaa"},{"key1":2,"key2":"bbb"}] | 2 | 2 | | 2 | [{"key1":3,"key2":"ccc"},{"key1":4,"key2":"ddd"}] | 2 | 1 | | 2 | [{"key1":3,"key2":"ccc"},{"key1":4,"key2":"ddd"}] | 2 | 2 | +----+---------------------------------------------------+------------+-----+ れ螻 螳 襷覓企Μ..
select id , get_json_object(payload, '$.key1') key1_value , get_json_object(payload, '$.key2') key2_value from ( select id , seq , get_json_object(payload, concat('$.[', cast(seq-1 as string), ']')) payload from ( select a.id , a.payload , b.seq from test a inner join dummy b on b.seq <= (regexp_replace(a.payload, '\}\\s*,\\s*\{', '},{') - length(regexp_replace(regexp_replace(a.payload, '\}\\s*,\\s*\{', '},{'), '},{', '')))/3 + 1 ) t ) t; 螳 蠏朱 蟆 .
select regexp_replace('}, {', '\}\\s*,\\s*\{', '')譯殊伎 蟆 \s螳 \\s朱 蟆.. 蟆郁骸
+----+------------+------------+ | id | key1_value | key2_value | +----+------------+------------+ | 2 | 3 | ccc | | 2 | 4 | ddd | | 1 | 1 | aaa | | 1 | 2 | bbb | +----+------------+------------+ 蠏 Online: https://regexr.com/
鏤
|
殊 貍螳 螻 蟆 . (碁狩一一) |