在完成 left join 之后,我得到了重复的条目。需要知道如何删除重复条目
after doing left join i am getting duplicate entries. Need to know how to remove duplicate entries
LOAD EmployeeID,
SickLeaveHours,
(8760-SickLeaveHours-VacationHours)as QualityTimeHours,
(8760-SickLeaveHours-VacationHours)/24 as QualityDays,
((8760-SickLeaveHours-VacationHours)/24)/30 as QualityMonths,
VacationHours;
SQL SELECT EmployeeID,
SickLeaveHours,
VacationHours
FROM Database;
Join
LOAD * INLINE [
F1, F2
ShiftID, Shift
1, DAY
2, EVENING
3, NIGHT
];
left join(fact)
b:
LOAD
AddressID,
EmployeeID;
SQL SELECT
AddressID,
EmployeeID
FROM Database;
left join(fact)
c:
LOAD
DepartmentID,
EmployeeID;
SQL SELECT
DepartmentID,
EmployeeID
FROM Database;
left join(fact)
LOAD ShiftID;
SQL SELECT ShiftID
FROM Database;
left Join (fact)
d:
LOAD EmployeeID,
Rate;
SQL SELECT
EmployeeID,
Rate
FROM Database ;
empDetails:
LOAD BirthDate,
EmployeeID,
Gender,
Title;
SQL SELECT BirthDate,
EmployeeID,
Gender,
Title
FROM Database ;
Department:
LOAD DepartmentID,
GroupName,
Name;
SQL SELECT
DepartmentID,
GroupName,
Name
FROM Database;
Address:
LOAD AddressID,
ModifiedDate,
rowguid;
SQL SELECT
AddressID,
ModifiedDate,
rowguid
FROM Database;
shift:
LOAD EndTime,
Name as name,
ShiftID,
StartTime;
SQL SELECT
EndTime,
Name,
ShiftID,
StartTime
FROM Database;
预计 table,没有重复条目
你得到了重复项,因为几乎所有 table 都加入了 fact
table,其中一些 table 没有公共键,这会导致交叉加入。例如:
left join(fact)
LOAD ShiftID;
SQL SELECT ShiftID
FROM Database;
上面的脚本只加载一个字段ShiftID
,在fact
table中不存在,连接基本上会执行交叉连接(all-to-all
)
Qlik 在公共字段名称上加入 tables。如果您需要连接两个 table,那么这些 table 应该至少有一个公共字段。在您的示例中:fact
和 EmpAddress
tables 将在 EmployeeID
字段上 joined/linked。
另一点:不要尝试总是将 table 合为一体。有时只 link 他们更好。否则您可以获得 wrong/duplicated 个答案。
例如:fact
table 每个 EmployeeID
可以有多行,如果你加入(不是 link) EmpDetails
table到 fact
然后计算 Gender
字段你将得到 wrong/duplicated 答案。在 one to many
的情况下,只有 link tables(两个 tables 应该有共同的领域,但他们没有加入。Qlik 会自动 link 他们)
下面的脚本是您的版本,没有 "hard" 连接到 fact
(认为您不需要这些连接)
而且 - shifts
数据不会被 link 编辑成任何东西,因为它与员工 tables
没有任何公共字段
fact:
LOAD
EmployeeID,
SickLeaveHours,
(8760-SickLeaveHours-VacationHours)as QualityTimeHours,
(8760-SickLeaveHours-VacationHours)/24 as QualityDays,
((8760-SickLeaveHours-VacationHours)/24)/30 as QualityMonths,
VacationHours;
SQL SELECT EmployeeID,
SickLeaveHours,
VacationHours
FROM Database;
EmpAddress:
LOAD
EmployeeID,
AddressID;
SQL SELECT
AddressID,
EmployeeID
FROM Database;
EmpDepartment:
LOAD
EmployeeID,
DepartmentID;
SQL SELECT
DepartmentID,
EmployeeID
FROM Database;
EmpRate:
LOAD EmployeeID,
Rate;
SQL SELECT
EmployeeID,
Rate
FROM Database;
EmpDetails:
LOAD
EmployeeID,
BirthDate,
Gender,
Title;
SQL SELECT BirthDate,
EmployeeID,
Gender,
Title
FROM Database ;
Department:
LOAD
DepartmentID,
GroupName,
Name as DeprtmentName;
SQL SELECT
DepartmentID,
GroupName,
Name
FROM Database;
Address:
LOAD AddressID,
ModifiedDate as Address_ModifiedDate,
rowguid;
SQL SELECT
AddressID,
ModifiedDate,
rowguid
FROM Database;
// This part of the script will be isolated from the rest
// because there is no key to join on
shift:
LOAD
Name as ShiftName,
ShiftID,
EndTime,
StartTime;
SQL SELECT
EndTime,
Name,
ShiftID,
StartTime
FROM Database;
Join
LOAD * INLINE [
ShiftID, Shift
1, DAY
2, EVENING
3, NIGHT
];
// Is this table needed at all?
LOAD ShiftID;
SQL SELECT ShiftID
FROM Database;
LOAD EmployeeID,
SickLeaveHours,
(8760-SickLeaveHours-VacationHours)as QualityTimeHours,
(8760-SickLeaveHours-VacationHours)/24 as QualityDays,
((8760-SickLeaveHours-VacationHours)/24)/30 as QualityMonths,
VacationHours;
SQL SELECT EmployeeID,
SickLeaveHours,
VacationHours
FROM Database;
Join
LOAD * INLINE [
F1, F2
ShiftID, Shift
1, DAY
2, EVENING
3, NIGHT
];
left join(fact)
b:
LOAD
AddressID,
EmployeeID;
SQL SELECT
AddressID,
EmployeeID
FROM Database;
left join(fact)
c:
LOAD
DepartmentID,
EmployeeID;
SQL SELECT
DepartmentID,
EmployeeID
FROM Database;
left join(fact)
LOAD ShiftID;
SQL SELECT ShiftID
FROM Database;
left Join (fact)
d:
LOAD EmployeeID,
Rate;
SQL SELECT
EmployeeID,
Rate
FROM Database ;
empDetails:
LOAD BirthDate,
EmployeeID,
Gender,
Title;
SQL SELECT BirthDate,
EmployeeID,
Gender,
Title
FROM Database ;
Department:
LOAD DepartmentID,
GroupName,
Name;
SQL SELECT
DepartmentID,
GroupName,
Name
FROM Database;
Address:
LOAD AddressID,
ModifiedDate,
rowguid;
SQL SELECT
AddressID,
ModifiedDate,
rowguid
FROM Database;
shift:
LOAD EndTime,
Name as name,
ShiftID,
StartTime;
SQL SELECT
EndTime,
Name,
ShiftID,
StartTime
FROM Database;
预计 table,没有重复条目
你得到了重复项,因为几乎所有 table 都加入了 fact
table,其中一些 table 没有公共键,这会导致交叉加入。例如:
left join(fact)
LOAD ShiftID;
SQL SELECT ShiftID
FROM Database;
上面的脚本只加载一个字段ShiftID
,在fact
table中不存在,连接基本上会执行交叉连接(all-to-all
)
Qlik 在公共字段名称上加入 tables。如果您需要连接两个 table,那么这些 table 应该至少有一个公共字段。在您的示例中:fact
和 EmpAddress
tables 将在 EmployeeID
字段上 joined/linked。
另一点:不要尝试总是将 table 合为一体。有时只 link 他们更好。否则您可以获得 wrong/duplicated 个答案。
例如:fact
table 每个 EmployeeID
可以有多行,如果你加入(不是 link) EmpDetails
table到 fact
然后计算 Gender
字段你将得到 wrong/duplicated 答案。在 one to many
的情况下,只有 link tables(两个 tables 应该有共同的领域,但他们没有加入。Qlik 会自动 link 他们)
下面的脚本是您的版本,没有 "hard" 连接到 fact
(认为您不需要这些连接)
而且 - shifts
数据不会被 link 编辑成任何东西,因为它与员工 tables
fact:
LOAD
EmployeeID,
SickLeaveHours,
(8760-SickLeaveHours-VacationHours)as QualityTimeHours,
(8760-SickLeaveHours-VacationHours)/24 as QualityDays,
((8760-SickLeaveHours-VacationHours)/24)/30 as QualityMonths,
VacationHours;
SQL SELECT EmployeeID,
SickLeaveHours,
VacationHours
FROM Database;
EmpAddress:
LOAD
EmployeeID,
AddressID;
SQL SELECT
AddressID,
EmployeeID
FROM Database;
EmpDepartment:
LOAD
EmployeeID,
DepartmentID;
SQL SELECT
DepartmentID,
EmployeeID
FROM Database;
EmpRate:
LOAD EmployeeID,
Rate;
SQL SELECT
EmployeeID,
Rate
FROM Database;
EmpDetails:
LOAD
EmployeeID,
BirthDate,
Gender,
Title;
SQL SELECT BirthDate,
EmployeeID,
Gender,
Title
FROM Database ;
Department:
LOAD
DepartmentID,
GroupName,
Name as DeprtmentName;
SQL SELECT
DepartmentID,
GroupName,
Name
FROM Database;
Address:
LOAD AddressID,
ModifiedDate as Address_ModifiedDate,
rowguid;
SQL SELECT
AddressID,
ModifiedDate,
rowguid
FROM Database;
// This part of the script will be isolated from the rest
// because there is no key to join on
shift:
LOAD
Name as ShiftName,
ShiftID,
EndTime,
StartTime;
SQL SELECT
EndTime,
Name,
ShiftID,
StartTime
FROM Database;
Join
LOAD * INLINE [
ShiftID, Shift
1, DAY
2, EVENING
3, NIGHT
];
// Is this table needed at all?
LOAD ShiftID;
SQL SELECT ShiftID
FROM Database;