大数据全系列 教程
1869个小节阅读:465.1k
JAVA全系列 教程
面向对象的程序设计语言
Python全系列 教程
Python3.x版本,未来主流的版本
人工智能 教程
顺势而为,AI创新未来
大厂算法 教程
算法,程序员自我提升必经之路
C++ 教程
一门通用计算机编程语言
微服务 教程
目前业界流行的框架组合
web前端全系列 教程
通向WEB技术世界的钥匙
大数据全系列 教程
站在云端操控万千数据
AIGC全能工具班
A A
White Night
建表语句:
xxxxxxxxxx
create table person3(
id int comment "唯一标识id",
name string comment "名称",
likes array<string> comment "爱好",
address map<string,string> comment "地址"
);
操作:
xxxxxxxxxx
hive> create table person3(
> id int comment "唯一标识id",
> name string comment "名称",
> likes array<string> comment "爱好",
> address map<string,string> comment "地址"
> );
OK
Time taken: 0.173 seconds
hive> show tables;
OK
person
person2
person2_1
person2_2
person3
psn
Time taken: 0.063 seconds, Fetched: 6 row(s)
准备数据:
xxxxxxxxxx
[root@node4 data]# cp person.txt person3.txt
[root@node4 data]# vim person3.txt
1^A小明1^Alol^Bbook^Bmovie^Abeijing^Cxisanqi^Bshanghai^Cpudong
注意:^A输入:Ctrl+V ->Ctrl+A ,^B和 ^C同理
将数据load到person3表中:
xxxxxxxxxx
hive> load data local inpath '/root/data/person3.txt' into table person3;
Loading data to table default.person3
OK
Time taken: 0.59 seconds
hive> select * from person3;
OK
1 小明1 ["lol","book","movie"] {"beijing":"xisanqi","shanghai":"pudong"}
Time taken: 0.298 seconds, Fetched: 1 row(s)
hive> desc formatted person3;
OK
# col_name data_type comment
id int 唯一标识id
name string 名称
likes array<string> 爱好
address map<string,string> 地址
# Detailed Table Information
Database: default
OwnerType: USER
Owner: root
CreateTime: Thu Nov 11 15:36:10 CST 2021
LastAccessTime: UNKNOWN
Retention: 0
Location: hdfs://mycluster/user/hive_remote/warehouse/person3
Table Type: MANAGED_TABLE
Table Parameters:
bucketing_version 2
numFiles 1
numRows 0
rawDataSize 0
totalSize 57
transient_lastDdlTime 1636616468
# Storage Information
SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: []
Storage Desc Params:
serialization.format 1 #没有自定义的分隔符了。
自定义的方式指定默认的分隔符(了解):
xxxxxxxxxx
create table person3_1(
id int comment "唯一标识id",
name string comment "名称",
likes array<string> comment "爱好",
address map<string,string> comment "地址"
)
row format delimited
fields terminated by "\001"
collection items terminated by "\002"
map keys terminated by "\003";
演示:
xxxxxxxxxx
hive> create table person3_1(
> id int comment "唯一标识id",
> name string comment "名称",
> likes array<string> comment "爱好",
> address map<string,string> comment "地址"
> )
> row format delimited
> fields terminated by "\001"
> collection items terminated by "\002"
> map keys terminated by "\003";
OK
Time taken: 0.16 seconds
hive> desc formatted person3_1;
OK
# col_name data_type comment
id int 唯一标识id
name string 名称
likes array<string> 爱好
address map<string,string> 地址
# Detailed Table Information
Database: default
OwnerType: USER
Owner: root
CreateTime: Thu Nov 11 15:44:46 CST 2021
LastAccessTime: UNKNOWN
Retention: 0
Location: hdfs://mycluster/user/hive_remote/warehouse/person3_1
Table Type: MANAGED_TABLE
Table Parameters:
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"address\":\"true\",\"id\":\"true\",\"likes\":\"true\",\"name\":\"true\"}}
bucketing_version 2
numFiles 0
numRows 0
rawDataSize 0
totalSize 0
transient_lastDdlTime 1636616686
# Storage Information
SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: []
Storage Desc Params: #自定义的默认分隔符,和person3一样都是默认的。
collection.delim \u0002
field.delim \u0001
mapkey.delim \u0003
serialization.format \u0001