获取 pyspark.sql.utils.ParseException:在 pyspark sql 的 'in' 处缺少 ')'
Getting pyspark.sql.utils.ParseException: missing ')' at 'in' in pyspark sql
您好,我在 运行 下面查询 pyspark sql 但出现错误。请在我遗漏')'的地方帮助我。
查询-
`with cte1 as (select `Project Number`, indication,rank() over (partition by `Project Number`,REGEXP_REPLACE(indication,'[^a-zA-Z0-9]+', '') order by `Project Number`,indication) as rnk from (select distinct `Project Number`, indication from vw_onco_pharma onco_pharma union select distinct `Project Number`, indication from vw_onco_cell_gene cell_gene union select distinct `Project Number`, indication from vw_non_onco_cell_gene onco_cell_gene union select distinct `Project Number`, indication from vw_non_onco_pharma non_onco_pharma union select distinct `Project Number`, indication from vw_plasma_protein plasma_protein)),y as (select max(cast(project_id as integer)) as max_prj_id from vw_project_id) select nvl(max_prj_id,0)+ROW_NUMBER () OVER (ORDER BY `Project Number`,indication) as project_id,`Project Number`,indication,date_format(current_timestamp(),'yyyy-MM-dd hh:mm:ss') as HTA_INSERT_DT from (select cte1.`Project Number`, cte1.indication,max_prj_id from cte1 left join vw_project_id prj on cte1.`Project Number` = prj.`Project Number` and REGEXP_REPLACE(cte1.indicatio,'[^a-zA-Z0-9]+', '') = REGEXP_REPLACE(prj.indication,'[^a-zA-Z0-9]+', '') left join y on 1 = 1 where rnk = 1 and prj.project_id is null and cte1.`project number` in (select `project number` from cte1 group by `project number` having count(*) > 1) union select cte1.`Project Number`, null as indication,max_prj_id from cte1 left join vw_project_id prj on cte1.`Project Number` = prj.`Project Number` left join y on 1 = 1 where rnk = 1 and prj.project_id is null and cte1.`project number` in (select `project number` from cte1 groupby `project number` having count(*) = 1))`
错误-
pyspark.sql.utils.ParseException:
missing ')' at 'in'(line 1, pos 1575)
最后一个 groupby
应该是 group by
.
也尝试格式化您的查询,使其可读:
with cte1 as (
select
` Project Number `,
indication,
rank() over (
partition by ` Project Number `,
REGEXP_REPLACE(indication, '[^a-zA-Z0-9]+', '')
order by
` Project Number `,
indication
) as rnk
from
(
select
distinct ` Project Number `,
indication
from
vw_onco_pharma onco_pharma
union
select
distinct ` Project Number `,
indication
from
vw_onco_cell_gene cell_gene
union
select
distinct ` Project Number `,
indication
from
vw_non_onco_cell_gene onco_cell_gene
union
select
distinct ` Project Number `,
indication
from
vw_non_onco_pharma non_onco_pharma
union
select
distinct ` Project Number `,
indication
from
vw_plasma_protein plasma_protein
)
),
y as (
select
max(cast(project_id as integer)) as max_prj_id
from
vw_project_id
)
select
nvl(max_prj_id, 0) + ROW_NUMBER () OVER (
ORDER BY
` Project Number `,
indication
) as project_id,
` Project Number `,
indication,
date_format(current_timestamp(), 'yyyy-MM-dd hh:mm:ss') as HTA_INSERT_DT
from
(
select
cte1.` Project Number `,
cte1.indication,
max_prj_id
from
cte1
left join vw_project_id prj on cte1.` Project Number ` = prj.` Project Number `
and REGEXP_REPLACE(cte1.indicatio, '[^a-zA-Z0-9]+', '') = REGEXP_REPLACE(prj.indication, '[^a-zA-Z0-9]+', '')
left join y on 1 = 1
where
rnk = 1
and prj.project_id is null
and cte1.` project number ` in (
select
` project number `
from
cte1
group by
` project number `
having
count(*) > 1
)
union
select
cte1.` Project Number `,
null as indication,
max_prj_id
from
cte1
left join vw_project_id prj on cte1.` Project Number ` = prj.` Project Number `
left join y on 1 = 1
where
rnk = 1
and prj.project_id is null
and cte1.` project number ` in (
select
` project number `
from
cte1 group by ` project number `
having
count(*) = 1
)
)
您好,我在 运行 下面查询 pyspark sql 但出现错误。请在我遗漏')'的地方帮助我。
查询-
`with cte1 as (select `Project Number`, indication,rank() over (partition by `Project Number`,REGEXP_REPLACE(indication,'[^a-zA-Z0-9]+', '') order by `Project Number`,indication) as rnk from (select distinct `Project Number`, indication from vw_onco_pharma onco_pharma union select distinct `Project Number`, indication from vw_onco_cell_gene cell_gene union select distinct `Project Number`, indication from vw_non_onco_cell_gene onco_cell_gene union select distinct `Project Number`, indication from vw_non_onco_pharma non_onco_pharma union select distinct `Project Number`, indication from vw_plasma_protein plasma_protein)),y as (select max(cast(project_id as integer)) as max_prj_id from vw_project_id) select nvl(max_prj_id,0)+ROW_NUMBER () OVER (ORDER BY `Project Number`,indication) as project_id,`Project Number`,indication,date_format(current_timestamp(),'yyyy-MM-dd hh:mm:ss') as HTA_INSERT_DT from (select cte1.`Project Number`, cte1.indication,max_prj_id from cte1 left join vw_project_id prj on cte1.`Project Number` = prj.`Project Number` and REGEXP_REPLACE(cte1.indicatio,'[^a-zA-Z0-9]+', '') = REGEXP_REPLACE(prj.indication,'[^a-zA-Z0-9]+', '') left join y on 1 = 1 where rnk = 1 and prj.project_id is null and cte1.`project number` in (select `project number` from cte1 group by `project number` having count(*) > 1) union select cte1.`Project Number`, null as indication,max_prj_id from cte1 left join vw_project_id prj on cte1.`Project Number` = prj.`Project Number` left join y on 1 = 1 where rnk = 1 and prj.project_id is null and cte1.`project number` in (select `project number` from cte1 groupby `project number` having count(*) = 1))`
错误-
pyspark.sql.utils.ParseException:
missing ')' at 'in'(line 1, pos 1575)
最后一个 groupby
应该是 group by
.
也尝试格式化您的查询,使其可读:
with cte1 as (
select
` Project Number `,
indication,
rank() over (
partition by ` Project Number `,
REGEXP_REPLACE(indication, '[^a-zA-Z0-9]+', '')
order by
` Project Number `,
indication
) as rnk
from
(
select
distinct ` Project Number `,
indication
from
vw_onco_pharma onco_pharma
union
select
distinct ` Project Number `,
indication
from
vw_onco_cell_gene cell_gene
union
select
distinct ` Project Number `,
indication
from
vw_non_onco_cell_gene onco_cell_gene
union
select
distinct ` Project Number `,
indication
from
vw_non_onco_pharma non_onco_pharma
union
select
distinct ` Project Number `,
indication
from
vw_plasma_protein plasma_protein
)
),
y as (
select
max(cast(project_id as integer)) as max_prj_id
from
vw_project_id
)
select
nvl(max_prj_id, 0) + ROW_NUMBER () OVER (
ORDER BY
` Project Number `,
indication
) as project_id,
` Project Number `,
indication,
date_format(current_timestamp(), 'yyyy-MM-dd hh:mm:ss') as HTA_INSERT_DT
from
(
select
cte1.` Project Number `,
cte1.indication,
max_prj_id
from
cte1
left join vw_project_id prj on cte1.` Project Number ` = prj.` Project Number `
and REGEXP_REPLACE(cte1.indicatio, '[^a-zA-Z0-9]+', '') = REGEXP_REPLACE(prj.indication, '[^a-zA-Z0-9]+', '')
left join y on 1 = 1
where
rnk = 1
and prj.project_id is null
and cte1.` project number ` in (
select
` project number `
from
cte1
group by
` project number `
having
count(*) > 1
)
union
select
cte1.` Project Number `,
null as indication,
max_prj_id
from
cte1
left join vw_project_id prj on cte1.` Project Number ` = prj.` Project Number `
left join y on 1 = 1
where
rnk = 1
and prj.project_id is null
and cte1.` project number ` in (
select
` project number `
from
cte1 group by ` project number `
having
count(*) = 1
)
)