一、目的
在海豚调度HQL的脚本任务时报错,Caused by: java.util.regex.PatternSyntaxException: Illegal repetition near index 1
二、原本HiveSQL
with t1 as(
select
get_json_object(queue_json,'$.deviceNo') device_no,
get_json_object(queue_json,'$.createTime') create_time,
get_json_object(queue_json,'$.laneNum') lane_num,
get_json_object(queue_json,'$.queueList') queue_list,
day
from hurys_dc_ods.ods_queue
)
insert overwrite table hurys_dc_dwd.dwd_queue partition(day)
select
t1.device_no,
t1.lane_num,
t1.create_time,
get_json_object(list_json,'$.laneNo') lane_no,
get_json_object(list_json,'$.queueCount') queue_count,
cast(get_json_object(list_json,'$.queueLen') as decimal(10,2)) queue_len,
cast(get_json_object(list_json,'$.queueHead') as decimal(10,2)) queue_head,
cast(get_json_object(list_json,'$.queueTail') as decimal(10,2)) queue_tail,
date(t1.create_time) day
from t1
lateral view explode(split(regexp_replace(regexp_replace(queue_list,
'\\[|\\]','') , --将json数组两边的中括号去掉
'\\}\\,\\{','\\}\\;\\{'), --将json数组元素之间的逗号换成分号
'\\;') --以分号作为分隔符(split函数以分号作为分隔)
)list_queue as list_json
where device_no is not null and get_json_object(list_json,'$.queueLen') >=0
group by t1.device_no, t1.lane_num, t1.create_time, get_json_object(list_json,'$.laneNo'), get_json_object(list_json,'$.queueCount'), cast(get_json_object(list_json,'$.queueLen') as decimal(10,2)), cast(get_json_object(list_json,'$.queueHead') as decimal(10,2)), cast(get_json_object(list_json,'$.queueTail') as decimal(10,2)), date(t1.create_time)
;
原本的HiveSQL在Hive中执行正常!!!
三、原因分析
lateral view explode(split(regexp_replace(regexp_replace(queue_list,
'\\[|\\]','') ,
'\\}\\,\\{','\\}\\;\\{'),
'\\;')
)list_queue as list_json
因为+、*、|、\等符号在正则表达示中有相应的不同意义,所以在海豚调度HiveSQL时要进行转义处理。
一般采用加双斜杠进行转义,例如 '\\;') 变成 '\\\\;'),或者 将需要转义的字符放在 []
中。
不过,还是采用在\前面再添加\这种方法居多
四、修改海豚调度HQL的脚本
#! /bin/bash
source /etc/profile
nowdate=`date --date='0 days ago' "+%Y%m%d"`
yesdate=`date -d yesterday +%Y-%m-%d`
hive -e "
use hurys_dc_dwd;
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions.pernode=1000;
set hive.exec.max.dynamic.partitions=1500;
with t1 as(
select
get_json_object(queue_json,'$.deviceNo') device_no,
get_json_object(queue_json,'$.createTime') create_time,
get_json_object(queue_json,'$.laneNum') lane_num,
get_json_object(queue_json,'$.queueList') queue_list,
day
from hurys_dc_ods.ods_queue
)
insert overwrite table hurys_dc_dwd.dwd_queue partition(day='$yesdate')
select
t1.device_no,
t1.lane_num,
t1.create_time,
get_json_object(list_json,'$.laneNo') lane_no,
get_json_object(list_json,'$.queueCount') queue_count,
cast(get_json_object(list_json,'$.queueLen') as decimal(10,2)) queue_len,
cast(get_json_object(list_json,'$.queueHead') as decimal(10,2)) queue_head,
cast(get_json_object(list_json,'$.queueTail') as decimal(10,2)) queue_tail
from t1
lateral view explode(split(regexp_replace(regexp_replace(queue_list,
'\\\\[|\\\\]','') , --将json数组两边的中括号去掉
'\\\\}\\\\,\\\\{','\\\\}\\\\;\\\\{'), --将json数组元素之间的逗号换成分号
'\\\\;') --以分号作为分隔符(split函数以分号作为分隔)
)list_queue as list_json
where device_no is not null and get_json_object(list_json,'$.queueLen') >=0 and date(t1.create_time) = '$yesdate'
group by t1.device_no, t1.lane_num, t1.create_time, get_json_object(list_json,'$.laneNo'), get_json_object(list_json,'$.queueCount'), cast(get_json_object(list_json,'$.queueLen') as decimal(10,2)), cast(get_json_object(list_json,'$.queueHead') as decimal(10,2)), cast(get_json_object(list_json,'$.queueTail') as decimal(10,2)), date(t1.create_time)
"
五、海豚脚本改好后执行任务
修改脚本后,海豚任务执行成功!又学会一招
六、在Hive的计算引擎为MapReduce时,脚本里最好不要添加Hive调优语句,否则可能会报错
(一)Hive的计算引擎为MapReduce
set hive.execution.engine; --hive.execution.engine=mr
(二)在海豚脚本里添加Hive调优的SQL语句
set hive.vectorized.execution.enabled=false;
set hive.auto.convert.join=false;
set hive.exec.parallel=true;
set hive.support.concurrency=false;
(三)海豚任务运行报错
七、在Hive中执行的HiveSQL不能使用 \\\\; 只能使用 \\;,否则会报错
(一)原先执行成功的HQL 使用 \\;
with t1 as(
select
get_json_object(queue_json,'$.deviceNo') device_no,
get_json_object(queue_json,'$.createTime') create_time,
get_json_object(queue_json,'$.laneNum') lane_num,
get_json_object(queue_json,'$.queueList') queue_list,
day
from hurys_dc_ods.ods_queue
)
insert overwrite table hurys_dc_dwd.dwd_queue partition(day)
select
t1.device_no,
t1.lane_num,
t1.create_time,
get_json_object(list_json,'$.laneNo') lane_no,
get_json_object(list_json,'$.queueCount') queue_count,
cast(get_json_object(list_json,'$.queueLen') as decimal(10,2)) queue_len,
cast(get_json_object(list_json,'$.queueHead') as decimal(10,2)) queue_head,
cast(get_json_object(list_json,'$.queueTail') as decimal(10,2)) queue_tail,
date(t1.create_time) day
from t1
lateral view explode(split(regexp_replace(regexp_replace(queue_list,
'\\[|\\]','') , --将json数组两边的中括号去掉
'\\}\\,\\{','\\}\\;\\{'), --将json数组元素之间的逗号换成分号
'\\;') --以分号作为分隔符(split函数以分号作为分隔)
)list_queue as list_json
where device_no is not null and get_json_object(list_json,'$.queueLen') >=0
group by t1.device_no, t1.lane_num, t1.create_time, get_json_object(list_json,'$.laneNo'), get_json_object(list_json,'$.queueCount'), cast(get_json_object(list_json,'$.queueLen') as decimal(10,2)), cast(get_json_object(list_json,'$.queueHead') as decimal(10,2)), cast(get_json_object(list_json,'$.queueTail') as decimal(10,2)), date(t1.create_time)
;
(二)如果使用 \\\\; 则运行报错
with t1 as(
select
get_json_object(queue_json,'$.deviceNo') device_no,
get_json_object(queue_json,'$.createTime') create_time,
get_json_object(queue_json,'$.laneNum') lane_num,
get_json_object(queue_json,'$.queueList') queue_list,
day
from hurys_dc_ods.ods_queue
)
insert overwrite table hurys_dc_dwd.dwd_queue partition(day)
select
t1.device_no,
t1.lane_num,
t1.create_time,
get_json_object(list_json,'$.laneNo') lane_no,
get_json_object(list_json,'$.queueCount') queue_count,
cast(get_json_object(list_json,'$.queueLen') as decimal(10,2)) queue_len,
cast(get_json_object(list_json,'$.queueHead') as decimal(10,2)) queue_head,
cast(get_json_object(list_json,'$.queueTail') as decimal(10,2)) queue_tail,
date(t1.create_time) day
from t1
lateral view explode(split(regexp_replace(regexp_replace(queue_list,
'\\\\[|\\\\]','') , --将json数组两边的中括号去掉
'\\\\}\\\\,\\\\{','\\\\}\\\\;\\\\{'), --将json数组元素之间的逗号换成分号
'\\\\;') --以分号作为分隔符(split函数以分号作为分隔)
)list_queue as list_json
where device_no is not null and get_json_object(list_json,'$.queueLen') >=0
group by t1.device_no, t1.lane_num, t1.create_time, get_json_object(list_json,'$.laneNo'), get_json_object(list_json,'$.queueCount'), cast(get_json_object(list_json,'$.queueLen') as decimal(10,2)), cast(get_json_object(list_json,'$.queueHead') as decimal(10,2)), cast(get_json_object(list_json,'$.queueTail') as decimal(10,2)), date(t1.create_time)
;
运行报错:[08S01][2] Error while processing statement: FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.mr.MapRedTask
Hive中不能再转义,否则会报错!!!
八、总结一下
(一)在Hive的执行的HQL,不需要转义处理,例如使用 \\;
lateral view explode(split(regexp_replace(regexp_replace(queue_list,
'\\[|\\]','') , --将json数组两边的中括号去掉
'\\}\\,\\{','\\}\\;\\{'), --将json数组元素之间的逗号换成分号
'\\;') --以分号作为分隔符(split函数以分号作为分隔)
(二)在海豚调度执行HiveSQL脚本任务时,需要使用\\进行转义处理,例如\\\\;
lateral view explode(split(regexp_replace(regexp_replace(queue_list,
'\\\\[|\\\\]','') , --将json数组两边的中括号去掉
'\\\\}\\\\,\\\\{','\\\\}\\\\;\\\\{'), --将json数组元素之间的逗号换成分号
'\\\\;') --以分号作为分隔符(split函数以分号作为分隔)文章来源:https://www.toymoban.com/news/detail-789007.html
总是会遇到一些奇奇怪怪的问题,不过九九八十一难,也算是又经历一难吧,这个问题基本搞了一天。终于搞定了!文章来源地址https://www.toymoban.com/news/detail-789007.html
到了这里,关于二百一十九、Hive——HQL报错:Caused by: java.util.regex.PatternSyntaxException: Illegal repetition near index 1的文章就介绍完了。如果您还想了解更多内容,请在右上角搜索TOY模板网以前的文章或继续浏览下面的相关文章,希望大家以后多多支持TOY模板网!