Nested collections in Hive

1, 2 & 3 .. Lets go!

1. SHELL

echo "1345653,110909316904:1341894546|221065796761:1341887508" > /tmp/20170317_array_inputfile.txt
hdfs dfs -mkdir -p /tmp/20170317/array_test/input
hdfs dfs -put /tmp/20170317_array_inputfile.txt /tmp/20170317/array_test/input
rm /tmp/20170317_array_inputfile.txt

2. HIVE

drop table SAMPLE;
CREATE external TABLE SAMPLE(
  id BIGINT,
  record array<struct<col1:string,col2:string>>
 )row format delimited
fields terminated by ','
collection items terminated by '|'
map keys terminated by ':'
location '/tmp/20170317/array_test/input';
 
 
drop table SAMPLE_ORC;
CREATE TABLE SAMPLE_ORC(
  id BIGINT,
  record array<struct<col1:string,col2:string>>
 )row format delimited
fields terminated by ','
collection items terminated by '|'
map keys terminated by ':'
stored as orc;


insert into SAMPLE_ORC select * from SAMPLE;
 
select * from SAMPLE;
select * from SAMPLE_ORC;
 
select record.col1 from SAMPLE;
select record.col1 from SAMPLE_ORC;
 
 
drop table SAMPLE;
drop table SAMPLE_ORC;

3. SHELL

hdfs dfs -rm -r -skipTrash/tmp/20170317/array_test/

 

References

  1. http://stackoverflow.com/questions/18011252/how-to-define-nested-collection-items-in-hive

You may also like...

Leave a Reply

Your email address will not be published. Required fields are marked *