Contents

1 覈磯
2 ssh 覈
3 webhdfs襦 襦
4 Windows OS ODBC 伎 8000 覓語伎 伎 螳語る?
5 debug
6 alter column
7
8 partitioned 企 (drop)
9
10 string to array
11 set 覈轟
12 Out Of Memory
13 beeline: 伎 hive CLI
14 select 貉朱 覲伎願蠍
15 hive server(hive thrift)
16 hive web interface
17 .hiverc
18 襷り覲
19 oracle dual 企
20 repair
21
22 partitioned table add columns 譯殊
23 Error: org.apache.hadoop.mapreduce.task.reduce.Shuffle$ShuffleError: error in shuffle in fetcher#15, Caused by: java.lang.OutOfMemoryError: Java heap space


1 覈磯 #

watch -n5 "yarn application -appStates RUNNING -list|sed '1d'|cut -f1,2,4,8|column -ts$'\t'"


yarn logs -applicationId application_1552564637687_16592

2 ssh 覈 #

plink.exe -ssh -batch -l 襦蠏語ID -pw れ 語ろ碁(IP) 覈轟

3 webhdfs襦 襦 #

"c:\curl\bin\" curl.exe -i -X PUT -L -T "d:\log\test.log" "http://1.1.1.10:50070/webhdfs/v1/tmp/100000.log?op=CREATE&user.name=hdfs&overwrite=true"

觜蠍 豌襴 企蟆
start /d "c:\curl\bin\" curl.exe -i -X PUT -L -T "d:\log\test.log" "http://1.1.1.10:50070/webhdfs/v1/tmp/100000.log?op=CREATE&user.name=hdfs&overwrite=true"

4 Windows OS ODBC 伎 8000 覓語伎 伎 螳語る? #

--企蟆 讓手 螳語り, 豺覃 .
select concat(convert(nvarchar(max), s1), s2, s3) s
from openquery(hive_odbc, 'select substring(s, 1, 4000) s1, substring(s, 4001, 4000) s2, substring(2, 8001, 4000) s3 from tbl')

5 debug #

hive -hiveconf hive.root.logger=DEBUG,console

6 alter column #

ALTER TABLE table_name CHANGE old_col_name new_col_name new_data_type

id 貉 れ..
ALTER TABLE table_name CHANGE old_col_name new_col_name new_data_type AFTER id

7 #

select
    from_unixtime(unix_timestamp(), 'yyyy-MM-dd') today  
,   from_unixtime(unix_timestamp()-1*60*60, 'yyyy-MM-dd hh:mm:ss.SSS') as `1螳`
,   from_unixtime(unix_timestamp()-1*60*60, 'yyyy-MM-dd HH:mm:ss.SSS') as `1螳` 
,   from_unixtime(unix_timestamp()-1*24*60*60, 'yyyy-MM-dd') yesterday
,   from_unixtime(unix_timestamp()+1*24*60*60, 'yyyy-MM-dd') tomorrow
,   cast(from_unixtime(unix_timestamp(), 'yyyyMMdd') as int) today_int
,   cast(cast(current_timestamp as decimal(38,6)) + 0.000001 as timestamp) `+1襷危襦語雑`
from dual

8 partitioned 企 (drop) #

襷 一 一 hive 企 drop 譟磯 襴.
襴 伎 覃一危磯ゼ 襴 螳 覓語企.
觜襴 る 覃一危磯ゼ 襴伎朱 . hive 覃一危 ろ願 mysql企 螳螻..

1. 覃 一危 襴
--TBL_ID 
select * from hive.TBLS where TBL_ID = 27992
 
 
--讌企.
delete from hive.PARTITION_KEY_VALS where PART_ID in (select part_id from hive.PARTITIONS where TBL_ID = 27992);
delete from hive.PARTITION_PARAMS   where PART_ID in (select part_id from hive.PARTITIONS where TBL_ID = 27992);
delete from hive.PART_COL_PRIVS     where PART_ID in (select part_id from hive.PARTITIONS where TBL_ID = 27992);
delete from hive.PART_COL_STATS     where PART_ID in (select part_id from hive.PARTITIONS where TBL_ID = 27992);
delete from hive.PART_PRIVS         where PART_ID in (select part_id from hive.PARTITIONS where TBL_ID = 27992);
delete from hive.PARTITIONS                                                               where TBL_ID = 27992 ;

select * from hive.PARTITIONS where TBL_ID = 27992

2. drop table xxxxx

9 #

STORED AS ORC
LOCATION 'maprfs:/mapr/demo.jbates.mapr/data/hive/orc2/trips
TBLPROPERTIES ( "orc.compress"="LZ4" );



CREATE TABLE student_text_lz4 (id STRING, name STRING) 
ROW FORMAT DELIMITED 
    FIELDS TERMINATED BY ',' 
    LINES TERMINATED BY '\n' 
STORED AS TEXTFILE; 

SET hive.exec.compress.output=true; 
SET mapred.output.compress=true; 
SET mapred.output.compression.codec=org.apache.hadoop.io.compress.Lz4Codec; 

INSERT OVERWRITE TABLE student_text_lz4 SELECT * FROM student; 

10 string to array #

--https://stackoverflow.com/questions/45965889/how-do-we-convert-a-string-into-array-in-hive
hive> with releases as (select '["us","ca","fr"]' as country)
    > select  split(regexp_extract(country,'^\\["(.*)\\"]$',1),'","')
    > from    releases
    > ;
OK
_c0
["us","ca","fr"]

11 set 覈轟 #

set hive.execution.engine=mr;

12 Out Of Memory #

13 beeline: 伎 hive CLI #


[root@sandbox ~]# beeline
Beeline version 1.2.1000.2.6.1.0-129 by Apache Hive
beeline> !connect jdbc:hive2://localhost:10000 scott tiger
Connecting to jdbc:hive2://localhost:10000
Connected to: Apache Hive (version 1.2.1000.2.6.1.0-129)
Driver: Hive JDBC (version 1.2.1000.2.6.1.0-129)
Transaction isolation: TRANSACTION_REPEATABLE_READ
0: jdbc:hive2://localhost:10000> show tables;
+------------+--+
|  tab_name  |
+------------+--+
| sample_07  |
| sample_08  |
+------------+--+
2 rows selected (1.895 seconds)
0: jdbc:hive2://localhost:10000> select * from sample_07 limit 10;
+-----------------+--------------------------------------+----------------------+-------------------+--+
| sample_07.code  |        sample_07.description         | sample_07.total_emp  | sample_07.salary  |
+-----------------+--------------------------------------+----------------------+-------------------+--+
| 00-0000         | All Occupations                      | 134354250            | 40690             |
| 11-0000         | Management occupations               | 6003930              | 96150             |
| 11-1011         | Chief executives                     | 299160               | 151370            |
| 11-1021         | General and operations managers      | 1655410              | 103780            |
| 11-1031         | Legislators                          | 61110                | 33880             |
| 11-2011         | Advertising and promotions managers  | 36300                | 91100             |
| 11-2021         | Marketing managers                   | 165240               | 113400            |
| 11-2022         | Sales managers                       | 322170               | 106790            |
| 11-2031         | Public relations managers            | 47210                | 97170             |
| 11-3011         | Administrative services managers     | 239360               | 76370             |
+-----------------+--------------------------------------+----------------------+-------------------+--+
10 rows selected (0.607 seconds)
0: jdbc:hive2://localhost:10000>

14 select 貉朱 覲伎願蠍 #

set hive.cli.print.header=true;

15 hive server(hive thrift) #


hive --service hiveserver

蠍磯蓋 10000 port 襯 .

16 hive web interface #


hive --service hwi
蠍磯蓋 9999 port . 9999 port螳 朱 hive-site.xml 覈 port.

http://192.168.136.100:9999/hwi/ --> 企蟆 覿企蓋. ( http://192.168.136.100:9999 覿 蟆朱 螻 曙 )

17 .hiverc #

$HOME/.hiverc
set hive.cli.print.current.db=true;
set hive.exec.mode.local.auto=true;
set hive.cli.print.header=true;

hive襯 れ螻 螳 ろ.
hive -i $HOME/.hiverc

18 襷り覲 #

set hivevar:p1 = 'SCOTT';
select * from emp where ename = ${hivevar:p1};
れ 蟆郁骸 .
hive (default)> set hivevar:p1 = 'SCOTT';
hive (default)> select * from emp where ename = ${hivevar:p1};
Automatically selecting local only mode for query
Total MapReduce jobs = 1
Launching Job 1 out of 1
Number of reduce tasks is set to 0 since there's no reduce operator
Execution log at: /tmp/huser/.log
Job running in-process (local Hadoop)
Hadoop job information for null: number of mappers: 0; number of reducers: 0
2013-08-06 23:20:35,671 null map = 0%,  reduce = 0%
Ended Job = job_local419394988_0001
Execution completed successfully
Mapred Local Task Succeeded . Convert the Join into MapJoin
OK
empno   ename   job     mgr     hiredate        sal     comm    deptno
7788    SCOTT   ANALYST 7566    1982-12-09      3000    NULL    20
Time taken: 2.894 seconds, Fetched: 1 row(s)
hive (default)>

19 oracle dual 企 #

echo 1 > dual.txt
create table dual (dummy STRING);
load data local inpath 'dual.txt' overwrite into table dual;
select date_add('2008-12-31', 1) from dual;


20 repair #

--https://stackoverflow.com/questions/44931305/partition-in-metastore-but-path-doesnt-exist-in-hdfs
hive> create table mytable (i int) partitioned by (p int);
OK
Time taken: 0.539 seconds

hive> !mkdir mytable/p=1;
hive> !mkdir mytable/p=2;
hive> !mkdir mytable/p=3;

hive> msck repair table mytable;
OK
Partitions not in metastore:    mytable:p=1 mytable:p=2 mytable:p=3
Repair: Added partition to metastore mytable:p=1
Repair: Added partition to metastore mytable:p=2
Repair: Added partition to metastore mytable:p=3
Time taken: 0.918 seconds, Fetched: 4 row(s)

hive> show partitions mytable;
OK
p=1
p=2
p=3
Time taken: 0.331 seconds, Fetched: 3 row(s)

hive> !rmdir mytable/p=1;
hive> !rmdir mytable/p=2;
hive> !rmdir mytable/p=3;

hive> msck repair table mytable;
OK
Partitions missing from filesystem: mytable:p=1 mytable:p=2 mytable:p=3
Time taken: 0.425 seconds, Fetched: 1 row(s)

hive> show partitions mytable;
OK
p=1
p=2
p=3
Time taken: 0.56 seconds, Fetched: 3 row(s)

21 #

set hive.exec.compress.output=true;
set mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;
set mapred.output.compression.type=BLOCK;
set io.seqfile.compression.type=BLOCK;
set hive.exec.parallel=true;
set hive.exec.max.dynamic.partitions.pernode=100000;
set hive.exec.max.dynamic.partitions=1000000;
set hive.exec.max.created.files=10000000;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.dynamic.partition=true;
set hive.execution.engine=spark;
  
insert overwrite table gamelog partition(date_key, channel_id)
select log, date_key, channel_id --一 る 螳 襷讌襷 襦..
from gamelog

22 partitioned table add columns 譯殊 #

set hive.cli.print.header=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.dynamic.partition=true;

drop table test3;
create external table test3
(
    a string
)
partitioned by (date_key int, hh int)
stored as sequencefile
;

insert overwrite table test3 partition(date_key, hh)
select 'a', 20190101, 1
from (select 1) t;

select * from test3;

hive> 
    > select * from test3;
OK
test3.a test3.date_key  test3.hh
a       20190101        1
Time taken: 0.064 seconds, Fetched: 1 row(s)
hive> 

蠏碁 add columns 覃...
alter table test3 add columns(b string, c string);

insert overwrite table test3 partition(date_key, hh)
select 'a', 'b', 'c', 20190101, 1
from (select 1) t;

insert overwrite table test3 partition(date_key, hh)
select 'a', 'b', 'c', 20190102, 1
from (select 1) t;

select * from test3;

豌 螻手碓 一 .
hive> 
    > select * from test3;
OK
test3.a test3.b test3.c test3.date_key  test3.hh
a       NULL    NULL    20190101        1
a       b       c       20190102        1
Time taken: 0.082 seconds, Fetched: 2 row(s)
hive> 

企 蟆曙 cascade 旧 譴 .
alter table test3 add columns(b string, c string) cascade;

insert overwrite table test3 partition(date_key, hh)
select 'a', 'b', 'c', 20190101, 1
from (select 1) t;

select * from test3;

蟆郁骸
hive> 
    > select * from test3;
OK
test3.a test3.b test3.c test3.date_key  test3.hh
a       b       c       20190101        1
a       b       c       20190102        1
Time taken: 0.083 seconds, Fetched: 2 row(s)

23 Error: org.apache.hadoop.mapreduce.task.reduce.Shuffle$ShuffleError: error in shuffle in fetcher#15, Caused by: java.lang.OutOfMemoryError: Java heap space #


task_1616538534439_302311_r_000000      2-Sep-2021 04:25:08     2-Sep-2021 04:30:03 (4mins, 54sec)      , Error: org.apache.hadoop.mapreduce.task.reduce.Shuffle$ShuffleError: error in shuffle in fetcher#6
        at org.apache.hadoop.mapreduce.task.reduce.Shuffle.run(Shuffle.java:134)
        at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:376)
        at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:415)
        at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1917)
        at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)
Caused by: java.lang.OutOfMemoryError: Java heap space
        at org.apache.hadoop.io.BoundedByteArrayOutputStream.<init>(BoundedByteArrayOutputStream.java:56)
        at org.apache.hadoop.io.BoundedByteArrayOutputStream.<init>(BoundedByteArrayOutputStream.java:46)
        at org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput.<init>(InMemoryMapOutput.java:63)
        at org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl.unconditionalReserve(MergeManagerImpl.java:309)
        at org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl.reserve(MergeManagerImpl.java:299)
        at org.apache.hadoop.mapreduce.task.reduce.Fetcher.copyMapOutput(Fetcher.java:539)
        at org.apache.hadoop.mapreduce.task.reduce.Fetcher.copyFromHost(Fetcher.java:348)
        at org.apache.hadoop.mapreduce.task.reduce.Fetcher.run(Fetcher.java:198)

れ 豢螳 願屋
set mapreduce.reduce.memory.total.bytes=2048MB;
set mapreduce.reduce.shuffle.input.buffer.percent=0.5;