1 get_json_object - java #
json 讌 り 襷蠍磯 譬 蠏碁..
udf襯 襷れ 磯 .
CDH5.12 quckstart 襯 蠍一朱..
os..
hadoop fs -put /usr/lib/hive/lib/hive-exec-1.1.0-cdh5.12.0.jar /tmp
impala..
企蟆 覃
create function get_json_object(string, string) returns string
location '/tmp/hive-exec-1.1.0-cdh5.12.0.jar' symbol='org.apache.hadoop.hive.ql.udf.UDFJson';
企蟆 覃 蟲
create function get_json_object
location '/tmp/hive-exec-1.1.0-cdh5.12.0.jar' symbol='org.apache.hadoop.hive.ql.udf.UDFJson';
: 焔 る 企瓦. 襴り .
create table test1(payload string);
insert into test1 values('{"product_id":220003038067,"timestamps":"1340321132000"}');
select
payload
, get_json_object(payload, '$.product_id') product_id
from test1;
蟆郁骸
[quickstart.cloudera:21000] > select
> payload
> , get_json_object(payload, '$.product_id') product_id
> from test1;
Query: select payload
, get_json_object(payload, '$.product_id') product_id
from test1
Query submitted at: 2018-02-28 07:32:40 (Coordinator: http://quickstart.cloudera:25000)
Query progress can be monitored at: http://quickstart.cloudera:25000/query_plan?query_id=b04f737db6c69282:5e9d56c300000000
+----------------------------------------------------------+--------------+
| payload | product_id |
+----------------------------------------------------------+--------------+
| {"product_id":220003038067,"timestamps":"1340321132000"} | 220003038067 |
+----------------------------------------------------------+--------------+
Fetched 1 row(s) in 0.30s
[quickstart.cloudera:21000] >
3 json split #
覦一伎 蠍語企ゼ 企 所 蟒(?)襯 願屋. 蠍磯るΜる慨覃 螳 udf 襷り讌 覘..
test 企 .
drop table test;
create table test(id int, payload string);
insert into test values(1, '[{"key1":1,"key2":"aaa"},{"key1":2,"key2":"bbb"}]');
insert into test values(2, '[{"key1":3,"key2":"ccc"},{"key1":4,"key2":"ddd"}]');
dummy 企 .
覓朱 dummy 企 1,2,3, ..... 10000 蟾讌 . 蠍一 蠏谿朱 2螳襷..
create table dummy(seq int);
insert into dummy values (1),(2);
udf螳 .
create function get_json_object(string, string) returns string location '/tmp/hive-exec-1.1.0-cdh5.12.0.jar' symbol='org.apache.hadoop.hive.ql.udf.UDFJson';
れ螻 螳 覦一 蠍語企ゼ 碁.
select
a.id
, a.payload
, (length(payload) - length(regexp_replace(payload, '\}\\s*,\\s*\{', '')))/3 + 1 array_size
, b.seq
from test a
inner join dummy b
on b.seq <= (length(a.payload) - length(regexp_replace(a.payload, '\}\\s*,\\s*\{', '')))/3 + 1
where b.seq <= 100 --願姥 蠎 .
;
2018-11-10 れ
- row螳 企覃 襷 碁.
- dummy企 1000螳 row襯 襷れ.
- 蠏碁 企*1000螳 row襯 filtering 蟇一. 伎蟲 蟆..
- where 麹(?) dummy row襯 譴 . seq <= 100 豌..
蟆郁骸
+----+---------------------------------------------------+------------+-----+
| id | payload | array_size | seq |
+----+---------------------------------------------------+------------+-----+
| 1 | [{"key1":1,"key2":"aaa"},{"key1":2,"key2":"bbb"}] | 2 | 1 |
| 1 | [{"key1":1,"key2":"aaa"},{"key1":2,"key2":"bbb"}] | 2 | 2 |
| 2 | [{"key1":3,"key2":"ccc"},{"key1":4,"key2":"ddd"}] | 2 | 1 |
| 2 | [{"key1":3,"key2":"ccc"},{"key1":4,"key2":"ddd"}] | 2 | 2 |
+----+---------------------------------------------------+------------+-----+
れ螻 螳 襷覓企Μ..
select
id
, get_json_object(payload, '$.key1') key1_value
, get_json_object(payload, '$.key2') key2_value
from (
select
id
, seq
, get_json_object(payload, concat('$.[', cast(seq-1 as string), ']')) payload
from (
select
a.id
, a.payload
, b.seq
from test a
inner join dummy b
on b.seq <= (regexp_replace(a.payload, '\}\\s*,\\s*\{', '},{') - length(regexp_replace(regexp_replace(a.payload, '\}\\s*,\\s*\{', '},{'), '},{', '')))/3 + 1
) t
) t;
螳 蠏朱 蟆 .
select regexp_replace('}, {', '\}\\s*,\\s*\{', '')
譯殊伎 蟆 \s螳 \\s朱 蟆..
蟆郁骸
+----+------------+------------+
| id | key1_value | key2_value |
+----+------------+------------+
| 2 | 3 | ccc |
| 2 | 4 | ddd |
| 1 | 1 | aaa |
| 1 | 2 | bbb |
+----+------------+------------+