Mondrian 试图优化 Segment.load 奇怪地导致部分或完全空的结果
Mondrian tries to optimize Segment.load weirdly resulting in partial or fully empty results
我觉得我遗漏了一些东西:我有简单的 MDX 查询和相当简单的 Mondrian 代码,但是,它仍然表现得很奇怪——查询的结果会有所不同,具体取决于 XML 模式是否包含不相关的查询维度。
此时我正在使用这段代码:
import org.olap4j.CellSet;
import org.olap4j.OlapConnection;
import org.olap4j.OlapWrapper;
import org.olap4j.layout.RectangularCellSetFormatter;
import java.io.PrintWriter;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
public class MainTest {
public static void main(String[] args) throws ClassNotFoundException, SQLException {
Class.forName("mondrian.olap4j.MondrianOlap4jDriver");
String driver = "org.postgresql.Driver";
String jdbcString = "jdbc:postgresql://localhost:5432/postgres";
String username = "postgres";
String password = "password";
String xmlLoc = "...";
Connection connection = DriverManager.getConnection(
"jdbc:mondrian:"
+ "JdbcDrivers=" + driver + ";"
+ "Jdbc=" + jdbcString + ";"
+ "Catalog=" + xmlLoc + ";JdbcUser=" + username + ";JdbcPassword=" + password + ";PoolNeeded=true;"
);
OlapWrapper wrapper = (OlapWrapper) connection;
OlapConnection connection1 = wrapper.unwrap(OlapConnection.class);
CellSet cellSet = connection1.createStatement().executeOlapQuery(
"select {[name1dim].Members} on 0, {[Measures].Members} on 1 from [testcube]"
);
PrintWriter pw = new PrintWriter(System.out);
new RectangularCellSetFormatter(false).format(cellSet, pw);
pw.flush();
}
}
而这个 XML 架构:
<Schema name="sth">
<Cube name="testcube">
<Table name="test_table_2"/>
<Dimension name="name1dim" >
<Hierarchy hasAll="false">
<Level name="name1dimlevel" column="name1"/>
</Hierarchy>
</Dimension>
<Dimension name="name2dim" >
<Hierarchy hasAll="false">
<Level name="name2dimlevel" column="name2"/>
</Hierarchy>
</Dimension>
<Dimension name="name3dim" >
<Hierarchy hasAll="false">
<Level name="name3dimlevel" column="name3"/>
</Hierarchy>
</Dimension>
<Measure name="type1measure_sum" column="type1" aggregator="sum"/>
<Measure name="type1measure_avg" column="type1" aggregator="avg"/>
<Measure name="type1measure_count" column="type1" aggregator="count"/>
<Measure name="type2measure_sum" column="type2" aggregator="sum"/>
<Measure name="type3measure_avg" column="type2" aggregator="avg"/>
<Measure name="type3measure_count" column="type2" aggregator="count"/>
</Cube>
</Schema>
测试 table 包含 id、type1、type2(整数)和 name1、name2、name3(varchars)。我得到以下结果:
| | name1_0 | name1_1 | name1_10 | name1_11 | name1_12 | name1_2 | name1_3 | name1_4 | name1_5 | name1_6 | name1_7 | name1_8 | name1_9 |
+--------------------+---------+---------+----------+----------+----------+---------+---------+---------+---------+---------+---------+---------+---------+
| type1measure_sum | 1 590 | 1 940 | 1 832 | 1 750 | 1 350 | 1 619 | 1 742 | 1 521 | 2 015 | 2 152 | 1 725 | 1 945 | 1 812 |
| type1measure_avg | 4,804 | 5,119 | 5,357 | 5,014 | 4,193 | 4,51 | 5,109 | 4,798 | 5,14 | 5,249 | 4,901 | 4,642 | 4,611 |
| type1measure_count | 331 | 379 | 342 | 349 | 322 | 359 | 341 | 317 | 392 | 410 | 352 | 419 | 393 |
| type2measure_sum | 2 719 | 2 740 | 2 865 | 2 894 | 2 616 | 3 000 | 2 869 | 2 634 | 3 204 | 3 178 | 2 708 | 3 335 | 3 166 |
| type3measure_avg | 8,366 | 7,268 | 8,304 | 8,152 | 7,856 | 8,152 | 8,513 | 8,257 | 8,215 | 7,905 | 7,715 | 7,884 | 7,935 |
| type3measure_count | 325 | 377 | 345 | 355 | 333 | 368 | 337 | 319 | 390 | 402 | 351 | 423 | 399 |
但是,如果我从 XML 架构中丢弃“name3dim”,结果会有所不同:
| | name1_0 | name1_1 | name1_10 | name1_11 | name1_12 | name1_2 | name1_3 | name1_4 | name1_5 | name1_6 | name1_7 | name1_8 | name1_9 |
+--------------------+---------+---------+----------+----------+----------+---------+---------+---------+---------+---------+---------+---------+---------+
| type1measure_sum | 1 655 | 1 970 | 1 845 | 1 802 | 1 376 | 1 687 | 1 809 | 1 546 | 2 064 | 2 170 | 1 772 | 2 007 | 1 842 |
| type1measure_avg | 4,825 | 5,117 | 5,287 | 5,019 | 4,157 | 4,523 | 5,139 | 4,728 | 5,199 | 5,242 | 4,922 | 4,667 | 4,628 |
| type1measure_count | 343 | 385 | 349 | 359 | 331 | 373 | 352 | 327 | 397 | 414 | 360 | 430 | 398 |
| type2measure_sum | 2 793 | 2 781 | 2 964 | 2 982 | 2 712 | 3 130 | 2 986 | 2 751 | 3 250 | 3 206 | 2 771 | 3 383 | 3 202 |
| type3measure_avg | 8,312 | 7,261 | 8,42 | 8,17 | 7,93 | 8,194 | 8,58 | 8,362 | 8,228 | 7,897 | 7,719 | 7,795 | 7,926 |
| type3measure_count | 336 | 383 | 352 | 365 | 342 | 382 | 348 | 329 | 395 | 406 | 359 | 434 | 404 |
因为蒙德里安试图通过添加不必要的 where 子句来优化查询:
select
"test_table_2"."name1" as "c0",
"test_table_2"."name2" as "c1",
"test_table_2"."name3" as "c2",
sum("test_table_2"."type1") as "m0",
avg("test_table_2"."type1") as "m1",
count("test_table_2"."type1") as "m2",
sum("test_table_2"."type2") as "m3",
avg("test_table_2"."type2") as "m4",
count("test_table_2"."type2") as "m5"
from
"test_table_2" as "test_table_2"
where
"test_table_2"."name2" = 'name2_0'
and
"test_table_2"."name3" is null
group by
"test_table_2"."name1",
"test_table_2"."name2",
"test_table_2"."name3"
这限制了包含的结果数量。我不确定这是错误还是某些计划的行为。我还有另一个非常相似的问题,但蒙德里安没有使用“为空”作为过滤器,而是仅使用第一维成员。
在架构中使用 <View><SQL>SomeSql</SQL></View>
而不是 <Table/>
时也会出现此问题。
测试 MDX 查询未指定切片器(WHERE 子句)。在这种情况下,切片器是自动创建的,并包括其他轴上未提及的所有维度。此外,切片器是使用默认维度元素构建的。由于您在定义维度时没有指定任何默认成员,蒙德里安只采用“第一个”元素。什么是第一个元素可以通过维度序号(或维度元素的排序)来确定。所以我猜蒙德里安只是将“空”元素作为维度中的第一个元素。我建议调整您的 Mondrian 模式,使每个维度都有一个默认顺序,并可能添加一些过滤器以排除 NULL 元素值。
我觉得我遗漏了一些东西:我有简单的 MDX 查询和相当简单的 Mondrian 代码,但是,它仍然表现得很奇怪——查询的结果会有所不同,具体取决于 XML 模式是否包含不相关的查询维度。
此时我正在使用这段代码:
import org.olap4j.CellSet;
import org.olap4j.OlapConnection;
import org.olap4j.OlapWrapper;
import org.olap4j.layout.RectangularCellSetFormatter;
import java.io.PrintWriter;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
public class MainTest {
public static void main(String[] args) throws ClassNotFoundException, SQLException {
Class.forName("mondrian.olap4j.MondrianOlap4jDriver");
String driver = "org.postgresql.Driver";
String jdbcString = "jdbc:postgresql://localhost:5432/postgres";
String username = "postgres";
String password = "password";
String xmlLoc = "...";
Connection connection = DriverManager.getConnection(
"jdbc:mondrian:"
+ "JdbcDrivers=" + driver + ";"
+ "Jdbc=" + jdbcString + ";"
+ "Catalog=" + xmlLoc + ";JdbcUser=" + username + ";JdbcPassword=" + password + ";PoolNeeded=true;"
);
OlapWrapper wrapper = (OlapWrapper) connection;
OlapConnection connection1 = wrapper.unwrap(OlapConnection.class);
CellSet cellSet = connection1.createStatement().executeOlapQuery(
"select {[name1dim].Members} on 0, {[Measures].Members} on 1 from [testcube]"
);
PrintWriter pw = new PrintWriter(System.out);
new RectangularCellSetFormatter(false).format(cellSet, pw);
pw.flush();
}
}
而这个 XML 架构:
<Schema name="sth">
<Cube name="testcube">
<Table name="test_table_2"/>
<Dimension name="name1dim" >
<Hierarchy hasAll="false">
<Level name="name1dimlevel" column="name1"/>
</Hierarchy>
</Dimension>
<Dimension name="name2dim" >
<Hierarchy hasAll="false">
<Level name="name2dimlevel" column="name2"/>
</Hierarchy>
</Dimension>
<Dimension name="name3dim" >
<Hierarchy hasAll="false">
<Level name="name3dimlevel" column="name3"/>
</Hierarchy>
</Dimension>
<Measure name="type1measure_sum" column="type1" aggregator="sum"/>
<Measure name="type1measure_avg" column="type1" aggregator="avg"/>
<Measure name="type1measure_count" column="type1" aggregator="count"/>
<Measure name="type2measure_sum" column="type2" aggregator="sum"/>
<Measure name="type3measure_avg" column="type2" aggregator="avg"/>
<Measure name="type3measure_count" column="type2" aggregator="count"/>
</Cube>
</Schema>
测试 table 包含 id、type1、type2(整数)和 name1、name2、name3(varchars)。我得到以下结果:
| | name1_0 | name1_1 | name1_10 | name1_11 | name1_12 | name1_2 | name1_3 | name1_4 | name1_5 | name1_6 | name1_7 | name1_8 | name1_9 |
+--------------------+---------+---------+----------+----------+----------+---------+---------+---------+---------+---------+---------+---------+---------+
| type1measure_sum | 1 590 | 1 940 | 1 832 | 1 750 | 1 350 | 1 619 | 1 742 | 1 521 | 2 015 | 2 152 | 1 725 | 1 945 | 1 812 |
| type1measure_avg | 4,804 | 5,119 | 5,357 | 5,014 | 4,193 | 4,51 | 5,109 | 4,798 | 5,14 | 5,249 | 4,901 | 4,642 | 4,611 |
| type1measure_count | 331 | 379 | 342 | 349 | 322 | 359 | 341 | 317 | 392 | 410 | 352 | 419 | 393 |
| type2measure_sum | 2 719 | 2 740 | 2 865 | 2 894 | 2 616 | 3 000 | 2 869 | 2 634 | 3 204 | 3 178 | 2 708 | 3 335 | 3 166 |
| type3measure_avg | 8,366 | 7,268 | 8,304 | 8,152 | 7,856 | 8,152 | 8,513 | 8,257 | 8,215 | 7,905 | 7,715 | 7,884 | 7,935 |
| type3measure_count | 325 | 377 | 345 | 355 | 333 | 368 | 337 | 319 | 390 | 402 | 351 | 423 | 399 |
但是,如果我从 XML 架构中丢弃“name3dim”,结果会有所不同:
| | name1_0 | name1_1 | name1_10 | name1_11 | name1_12 | name1_2 | name1_3 | name1_4 | name1_5 | name1_6 | name1_7 | name1_8 | name1_9 |
+--------------------+---------+---------+----------+----------+----------+---------+---------+---------+---------+---------+---------+---------+---------+
| type1measure_sum | 1 655 | 1 970 | 1 845 | 1 802 | 1 376 | 1 687 | 1 809 | 1 546 | 2 064 | 2 170 | 1 772 | 2 007 | 1 842 |
| type1measure_avg | 4,825 | 5,117 | 5,287 | 5,019 | 4,157 | 4,523 | 5,139 | 4,728 | 5,199 | 5,242 | 4,922 | 4,667 | 4,628 |
| type1measure_count | 343 | 385 | 349 | 359 | 331 | 373 | 352 | 327 | 397 | 414 | 360 | 430 | 398 |
| type2measure_sum | 2 793 | 2 781 | 2 964 | 2 982 | 2 712 | 3 130 | 2 986 | 2 751 | 3 250 | 3 206 | 2 771 | 3 383 | 3 202 |
| type3measure_avg | 8,312 | 7,261 | 8,42 | 8,17 | 7,93 | 8,194 | 8,58 | 8,362 | 8,228 | 7,897 | 7,719 | 7,795 | 7,926 |
| type3measure_count | 336 | 383 | 352 | 365 | 342 | 382 | 348 | 329 | 395 | 406 | 359 | 434 | 404 |
因为蒙德里安试图通过添加不必要的 where 子句来优化查询:
select
"test_table_2"."name1" as "c0",
"test_table_2"."name2" as "c1",
"test_table_2"."name3" as "c2",
sum("test_table_2"."type1") as "m0",
avg("test_table_2"."type1") as "m1",
count("test_table_2"."type1") as "m2",
sum("test_table_2"."type2") as "m3",
avg("test_table_2"."type2") as "m4",
count("test_table_2"."type2") as "m5"
from
"test_table_2" as "test_table_2"
where
"test_table_2"."name2" = 'name2_0'
and
"test_table_2"."name3" is null
group by
"test_table_2"."name1",
"test_table_2"."name2",
"test_table_2"."name3"
这限制了包含的结果数量。我不确定这是错误还是某些计划的行为。我还有另一个非常相似的问题,但蒙德里安没有使用“为空”作为过滤器,而是仅使用第一维成员。
在架构中使用 <View><SQL>SomeSql</SQL></View>
而不是 <Table/>
时也会出现此问题。
测试 MDX 查询未指定切片器(WHERE 子句)。在这种情况下,切片器是自动创建的,并包括其他轴上未提及的所有维度。此外,切片器是使用默认维度元素构建的。由于您在定义维度时没有指定任何默认成员,蒙德里安只采用“第一个”元素。什么是第一个元素可以通过维度序号(或维度元素的排序)来确定。所以我猜蒙德里安只是将“空”元素作为维度中的第一个元素。我建议调整您的 Mondrian 模式,使每个维度都有一个默认顺序,并可能添加一些过滤器以排除 NULL 元素值。