wget http://databaser.net/moniwiki/pds/Hive_ec_98_88_ec_a0_9c_ed_8c_8c_ec_9d_bc/data.zip
unzip data.zip
hadoop fs -mkdir scott
hadoop fs -put dept.csv scott
hadoop fs -put emp.csv scott
hadoop fs -put salgrade.csv scott
pig
emp = load 'scott/emp.csv' using PigStorage(',') as (empno, ename, job, mgr, hiredate, sal, comm, deptno:int);
grouped = group emp by deptno;
total = foreach grouped generate group, SUM(emp.sal) as total_sal;
--total = foreach grouped generate emp.depno, SUM(emp.sal) as total_sal;
dept = load 'scott/dept.csv' using PigStorage(',') as (dname, loc, deptno:int);
join_data = join total by group left, dept by deptno;
view = foreach join_data generate $0, $3, $1;
dump view;
emp = load 'scott/emp.csv' using PigStorage(',') as (empno, ename, job, mgr, hiredate, sal, comm, deptno:int);
emp = foreach emp generate ename, sal;
filtered_set = filter emp by sal >= 2000;
sorted_set = order filtered_set by sal desc;
top3 = limit sorted_set 3;
dump top3;