Core Data 中的哪些查询可以从属性的 R-Tree 索引中获益?
What kind of queries in Core Data can profit from R-Tree index on attributes?
阅读 https://www.sqlite.org/rtree.html 关于 SQLite 中的 R*Tree 的这篇文章后,我目前正在试验 2-Dim R-Tree核心数据模型。特别是我期望(可能有点天真)在索引 table 上有某种 select
语句,但在 SQLite 调试跟踪中没有看到任何 Region
具有索引属性的实体(参见下面代码中的 predicateBoundaryIdx
)。
我的问题是:为了从 R-Tree 索引中获益,核心数据模型(实体、属性)和 NSPredicate 必须是什么样子?
[XCode v11.4,iOS v13.1,Swift。开启 com.apple.CoreData.SQLDebug 4]
型号
索引
对应的数据库方案
CREATE TABLE ZPERSON ( Z_PK INTEGER PRIMARY KEY, Z_ENT INTEGER, Z_OPT INTEGER, ZLOCATION INTEGER, Z1CONTACTS INTEGER, ZNAME VARCHAR );
CREATE TABLE ZREGION ( Z_PK INTEGER PRIMARY KEY, Z_ENT INTEGER, Z_OPT INTEGER, ZMAXLATITUDE FLOAT, ZMAXLATITUDEIDX FLOAT, ZMAXLONGITUDE FLOAT, ZMAXLONGITUDEIDX FLOAT, ZMINLATITUDE FLOAT, ZMINLATITUDEIDX FLOAT, ZMINLONGITUDE FLOAT, ZMINLONGITUDEIDX FLOAT, ZNAME VARCHAR );
CREATE INDEX ZPERSON_ZLOCATION_INDEX ON ZPERSON (ZLOCATION);
CREATE INDEX ZPERSON_Z1CONTACTS_INDEX ON ZPERSON (Z1CONTACTS);
CREATE VIRTUAL TABLE Z_Region_RegionIndex USING RTREE (Z_PK INTEGER PRIMARY KEY, ZMINLATITUDEIDX_MIN, ZMINLATITUDEIDX_MAX, ZMAXLATITUDEIDX_MIN, ZMAXLATITUDEIDX_MAX, ZMINLONGITUDEIDX_MIN, ZMINLONGITUDEIDX_MAX, ZMAXLONGITUDEIDX_MIN, ZMAXLONGITUDEIDX_MAX)
/* Z_Region_RegionIndex(Z_PK,ZMINLATITUDEIDX_MIN,ZMINLATITUDEIDX_MAX,ZMAXLATITUDEIDX_MIN,ZMAXLATITUDEIDX_MAX,ZMINLONGITUDEIDX_MIN,ZMINLONGITUDEIDX_MAX,ZMAXLONGITUDEIDX_MIN,ZMAXLONGITUDEIDX_MAX) */;
CREATE TABLE IF NOT EXISTS "Z_Region_RegionIndex_rowid"(rowid INTEGER PRIMARY KEY,nodeno);
CREATE TABLE IF NOT EXISTS "Z_Region_RegionIndex_node"(nodeno INTEGER PRIMARY KEY,data);
CREATE TABLE IF NOT EXISTS "Z_Region_RegionIndex_parent"(nodeno INTEGER PRIMARY KEY,parentnode);
测试代码
func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]?) -> Bool {
let mainContext: NSManagedObjectContext
mainContext = persistentContainer.viewContext
mainContext.mergePolicy = NSMergeByPropertyObjectTrumpMergePolicy
mainContext.undoManager = nil
mainContext.shouldDeleteInaccessibleFaults = true
mainContext.automaticallyMergesChangesFromParent = true
var personObj: Person
var locationObj: Region
let n = 1000000
let personNr = stride(from: 1, through: n+1, by: 1).map(String.init).shuffled()
for i in 1...n
{
personObj = Person(context: mainContext)
locationObj = Region(context: mainContext)
locationObj.name = "Region \(i)"
locationObj.minlatitude = 40.000000 - Float.random(in: 0 ..< 5)
locationObj.minlongitude = 9.000000 - Float.random(in: 0 ..< 5)
locationObj.maxlatitude = 40.000000 + Float.random(in: 0 ..< 5)
locationObj.maxlongitude = 9.000000 + Float.random(in: 0 ..< 5)
locationObj.minlatitudeidx = locationObj.minlatitude
locationObj.minlongitudeidx = locationObj.minlongitude
locationObj.maxlatitudeidx = locationObj.maxlatitude
locationObj.maxlongitudeidx = locationObj.maxlongitude
personObj.name = "Person \(personNr[i])"
personObj.location = locationObj
if i % 1000 == 0 {
saveContext()
}
}
saveContext()
let request: NSFetchRequest<Region> = Region.fetchRequest()
let requestIdx: NSFetchRequest<Region> = Region.fetchRequest()
let eps : Float = 1.0
let predicateBoundaryIdx = NSPredicate(format: "(minlatitudeidx >= %lf and maxlatitudeidx =< %lf) and (minlongitudeidx >= %lf and maxlongitudeidx =< %lf)",40.000000-eps,40.000000+eps,9.000000-eps,9.000000+eps)
let predicateBoundary = NSPredicate(format: "(minlatitude >= %lf and maxlatitude =< %lf) and (minlongitude >= %lf and maxlongitude =< %lf)",40.000000-eps,40.000000+eps,9.000000-eps,9.000000+eps)
requestIdx.predicate = predicateBoundaryIdx;
request.predicate = predicateBoundary;
print("fetch index:")
do {
let result = try mainContext.count(for:requestIdx)
print("Count = \(result)")
} catch {
print("Error: \(error)")
}
print("fetch no index:")
do {
let result = try mainContext.count(for:request)
print("Count = \(result)")
} catch {
print("Error: \(error)")
}
for store in (persistentContainer.persistentStoreCoordinator.persistentStores) {
os_log("Store URL: %@", log: Debug.coredata_log, type: .info, store.url?.absoluteString ?? "No Store")
}
return true
}
核心数据SQL跟踪
CoreData: sql: SELECT COUNT( DISTINCT t0.Z_PK) FROM ZREGION t0 WHERE ( t0.ZMINLATITUDEIDX >= ? AND t0.ZMAXLATITUDEIDX <= ? AND t0.ZMINLONGITUDEIDX >= ? AND t0.ZMAXLONGITUDEIDX <= ?)
CoreData 于 2017 年引入了对 R-Tree 索引的支持。WWDC 2017 session 210 covers it and provides an example. As you will see, the key is that you need to use a function in the predicate format string to indicate that the index should be used. There's another example in WWDC 2018 session 224。
对您的示例进行稍微简单的修改:具有位置(latitude
和 longitude
)属性和 name
属性的实体:
添加名为 "bylocation" 的抓取索引,将其类型指定为 "R-Tree" 并为 latitude
和 longitude
添加抓取索引元素:
稍微修改您的代码,以反映不同的属性等。准备两个单独的谓词,一个使用索引,另一个不使用,运行 两者进行比较:
let mainContext: NSManagedObjectContext
mainContext = persistentContainer.viewContext
mainContext.mergePolicy = NSMergeByPropertyObjectTrumpMergePolicy
mainContext.undoManager = nil
mainContext.shouldDeleteInaccessibleFaults = true
mainContext.automaticallyMergesChangesFromParent = true
var locationObj: Region
let n = 10 // Just for demo purposes
for i in 1...n
{
locationObj = Region(context: mainContext)
locationObj.name = "Region \(i)"
locationObj.latitude = 40.000000 + 5.0 - Float.random(in: 0 ..< 10)
locationObj.longitude = 9.000000 + 5.0 - Float.random(in: 0 ..< 10)
if i % 1000 == 0 {
saveContext()
}
}
saveContext()
mainContext.reset()
let request: NSFetchRequest<Region> = Region.fetchRequest()
let requestIdx: NSFetchRequest<Region> = Region.fetchRequest()
let eps : Float = 1.0
let predicateBoundaryIdx = NSPredicate(format: "indexed:by:(latitude, 'bylocation') between { %lf, %lf } AND indexed:by:(longitude, 'bylocation') between { %lf, %lf }", 40.0-eps, 40.0+eps, 9.0-eps, 9.0+eps)
let predicateBoundary = NSPredicate(format: "latitude between { %lf, %lf } AND longitude between { %lf, %lf} ",40.000000-eps,40.000000+eps,9.000000-eps,9.000000+eps)
requestIdx.predicate = predicateBoundaryIdx;
request.predicate = predicateBoundary;
print("fetch index:")
do {
let result = try mainContext.fetch(requestIdx)
print("Count = \(result.count)")
} catch {
print("Error: \(error)")
}
mainContext.reset()
print("fetch no index:")
do {
let result = try mainContext.fetch(request)
print("Count = \(result.count)")
} catch {
print("Error: \(error)")
}
运行 SQLDebug = 4,然后您可以在日志中看到一些正在发生的事情。首先,创建数据库并添加 Region table,然后添加 RTree 索引。每当修改Region table时,创建触发器将相关数据添加到索引中:
CoreData: sql: CREATE TABLE ZREGION ( Z_PK INTEGER PRIMARY KEY, Z_ENT INTEGER, Z_OPT INTEGER, ZLATITUDE FLOAT, ZLONGITUDE FLOAT, ZNAME VARCHAR )
CoreData: sql: CREATE VIRTUAL TABLE IF NOT EXISTS Z_Region_bylocation USING RTREE (Z_PK INTEGER PRIMARY KEY, ZLATITUDE_MIN, ZLATITUDE_MAX, ZLONGITUDE_MIN, ZLONGITUDE_MAX)
CoreData: sql: CREATE TRIGGER IF NOT EXISTS Z_Region_bylocation_INSERT AFTER INSERT ON ZREGION FOR EACH ROW BEGIN INSERT OR REPLACE INTO Z_Region_bylocation (Z_PK, ZLATITUDE_MIN, ZLATITUDE_MAX, ZLONGITUDE_MIN, ZLONGITUDE_MAX) VALUES (NEW.Z_PK, NEW.ZLATITUDE, NEW.ZLATITUDE, NEW.ZLONGITUDE, NEW.ZLONGITUDE) ; END
CoreData: sql: CREATE TRIGGER IF NOT EXISTS Z_Region_bylocation_UPDATE AFTER UPDATE ON ZREGION FOR EACH ROW BEGIN DELETE FROM Z_Region_bylocation WHERE Z_PK = NEW.Z_PK ; INSERT INTO Z_Region_bylocation (Z_PK, ZLATITUDE_MIN, ZLATITUDE_MAX, ZLONGITUDE_MIN, ZLONGITUDE_MAX) VALUES (NEW.Z_PK, NEW.ZLATITUDE, NEW.ZLATITUDE, NEW.ZLONGITUDE, NEW.ZLONGITUDE) ; END
CoreData: sql: CREATE TRIGGER IF NOT EXISTS Z_Region_bylocation_DELETE AFTER DELETE ON ZREGION FOR EACH ROW BEGIN DELETE FROM Z_Region_bylocation WHERE Z_PK = OLD.Z_PK ; END
然后在提取时,您可以看到发送到 SQLite 的两个不同查询:
加上索引:
CoreData: sql: SELECT 0, t0.Z_PK, t0.Z_OPT, t0.ZLATITUDE, t0.ZLONGITUDE, t0.ZNAME FROM ZREGION t0 WHERE ( t0.Z_PK IN (SELECT n1_t0.Z_PK FROM Z_Region_bylocation n1_t0 WHERE (? <= n1_t0.ZLATITUDE_MIN AND n1_t0.ZLATITUDE_MAX <= ?)) AND t0.Z_PK IN (SELECT n1_t0.Z_PK FROM Z_Region_bylocation n1_t0 WHERE (? <= n1_t0.ZLONGITUDE_MIN AND n1_t0.ZLONGITUDE_MAX <= ?)))
并且日志甚至包括 SQLite 使用的查询计划:
2 0 0 SEARCH TABLE ZREGION AS t0 USING INTEGER PRIMARY KEY (rowid=?)
6 0 0 LIST SUBQUERY 1
8 6 0 SCAN TABLE Z_Region_bylocation AS n1_t0 VIRTUAL TABLE INDEX 2:D0B1
26 0 0 LIST SUBQUERY 2
28 26 0 SCAN TABLE Z_Region_bylocation AS n1_t0 VIRTUAL TABLE INDEX 2:D2B3
没有索引:
CoreData: sql: SELECT 0, t0.Z_PK, t0.Z_OPT, t0.ZLATITUDE, t0.ZLONGITUDE, t0.ZNAME FROM ZREGION t0 WHERE (( t0.ZLATITUDE BETWEEN ? AND ?) AND ( t0.ZLONGITUDE BETWEEN ? AND ?))
2 0 0 SCAN TABLE ZREGION AS t0
从这里可以看出,使用索引涉及到一些非常混乱的子选择。我发现结果是对于小数据集,索引实际上会减慢速度。同样,如果结果集很大。但是如果数据集很大,结果集很小,就有优势了。我把它留给你玩,看看这个游戏是否值得。我不太明白的一件事是使用索引需要两个单独的子选择,一个用于经度,一个用于纬度。在我看来(尽管也许我遗漏了什么)破坏了 R 树的全部要点,即它们的多维性。
我稍微修改了 OP 中的数据库以测试 () indexed:by:
语句并进行一些时间测量:
数据库:
索引:
用例:
统计访问某个地区的人数。
对于区域 R42,结果应为 2(第 1 人和第 3 人):
代码:
func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]?) -> Bool {
let mainContext: NSManagedObjectContext
mainContext = persistentContainer.viewContext
mainContext.mergePolicy = NSMergeByPropertyObjectTrumpMergePolicy
mainContext.undoManager = nil
mainContext.shouldDeleteInaccessibleFaults = true
mainContext.automaticallyMergesChangesFromParent = true
var bounds: Bounds
var location: Bounds
var person: Person
var region: Region
let longstep = 2
let latstep = 2
let minlong = 0
let maxlong = 20
let minlat = 20
let maxlat = 55
let createSomeData: Bool = false
if(createSomeData) {
// create some regions
var regionNr: Int = 0
for long in stride(from: minlong, to: maxlong, by: longstep)
{
for lat in stride(from: minlat, to: maxlat, by: latstep) {
regionNr += 1
region = Region(context: mainContext)
bounds = Bounds(context: mainContext)
bounds.minlongitude = Float(long)
bounds.maxlongitude = Float(min(long + longstep,maxlong))
bounds.minlatitude = Float(lat)
bounds.maxlatitude = Float(min(lat + latstep,maxlat))
region.bounds = bounds
region.name = "Region \(regionNr)"
// hotsptLvl["Region \(regionNr)"] = Int.random(in: 0 ... 100)
print("region.name = \(String(describing: region.name))")
if regionNr % 1000 == 0 {
saveContext()
}
}
}
saveContext()
// create persons and vistited locations
var k = 0
let n = 100000
let personNr = stride(from: 1, through: n+1, by: 1).map(String.init).shuffled()
for i in 1...n
{
person = Person(context: mainContext)
person.name = "Person \(personNr[i])"
// create locations
let m = 10
for _ in 1...m
{
k += 1
location = Bounds(context: mainContext)
location.minlatitude = Float.random(in: Float(minlat + 3 * latstep) ... Float(maxlat)) - Float.random(in: 0 ... Float(3 * latstep))
location.minlongitude = Float.random(in: Float(minlong + 3 * longstep) ... Float(maxlong)) - Float.random(in: 0 ... Float(3 * longstep))
location.maxlatitude = min(location.minlatitude + Float.random(in: 0 ... Float(3 * latstep)),Float(maxlat))
location.maxlongitude = min(location.minlongitude + Float.random(in: 0 ... Float(3 * longstep)),Float(maxlong))
person.addToLocations(location)
if k % 1000 == 0 {
saveContext()
}
}
}
saveContext()
}
let start = Date()
for regionName in ["Region 1","Region 13","Region 43","Region 101","Region 113","Region 145"] {
print("\(Calendar.current.dateComponents([Calendar.Component.second], from:start, to:Date()).second!) Region: \(regionName)")
let requestOnRegion: NSFetchRequest<Region> = Region.fetchRequest()
let someRegion = NSPredicate(format: "(name = %@)",regionName)
requestOnRegion.predicate = someRegion
do {
let regionResA : [Region] = try mainContext.fetch(requestOnRegion) as [Region]
let regionRes : Region = regionResA[0]
print("\(Calendar.current.dateComponents([Calendar.Component.second], from:start, to:Date()).second!) Region: L1 = (\(regionRes.bounds!.minlongitude),\(regionRes.bounds!.minlatitude)) R1 = (\(regionRes.bounds!.maxlongitude),\(regionRes.bounds!.maxlatitude))")
let someBounds1 = NSPredicate(format: "(minlongitude <= %lf && maxlongitude >= %lf && minlatitude <= %lf && maxlatitude >= %lf)",
regionRes.bounds!.maxlongitude,
regionRes.bounds!.minlongitude,
regionRes.bounds!.maxlatitude,
regionRes.bounds!.minlatitude)
let someBounds2 = NSPredicate(format: "(indexed:by:(minlongitude, 'BoundsIndex') between { %lf, %lf } && " +
"indexed:by:(maxlongitude, 'BoundsIndex') between { %lf, %lf } && " +
"indexed:by:(minlatitude, 'BoundsIndex') between { %lf, %lf } && " +
"indexed:by:(maxlatitude, 'BoundsIndex') between { %lf, %lf} )",
Float(minlong),
regionRes.bounds!.maxlongitude,
regionRes.bounds!.minlongitude,
Float(maxlong),
Float(minlat),
regionRes.bounds!.maxlatitude,
regionRes.bounds!.minlatitude,
Float(maxlat))
let requestOnBounds: NSFetchRequest<NSDictionary> = NSFetchRequest<NSDictionary>(entityName:"Bounds")
requestOnBounds.resultType = NSFetchRequestResultType.dictionaryResultType
requestOnBounds.propertiesToFetch = ["person.name"]
requestOnBounds.returnsDistinctResults = true
requestOnBounds.predicate = someBounds1
print("\n")
print("\(Calendar.current.dateComponents([Calendar.Component.second], from:start, to:Date()).second!) Start - Fetch (no index):")
var boundsRes = try mainContext.fetch(requestOnBounds)
var uniquePersons : [String] = boundsRes.compactMap { [=10=].value(forKey: "person.name") as? String };
print("\(Calendar.current.dateComponents([Calendar.Component.second], from:start, to:Date()).second!) Number of Persons in this Region: \(uniquePersons.count)")
print("\n")
requestOnBounds.predicate = someBounds2
print("\(Calendar.current.dateComponents([Calendar.Component.second], from:start, to:Date()).second!) Start - Fetch (with index):")
boundsRes = try mainContext.fetch(requestOnBounds)
uniquePersons = boundsRes.compactMap { [=10=].value(forKey: "person.name") as? String };
print("\(Calendar.current.dateComponents([Calendar.Component.second], from:start, to:Date()).second!) Number of Persons in this Region: \(uniquePersons.count)")
print("\n")
} catch {
print("Error: \(error)")
}
}
for store in (persistentContainer.persistentStoreCoordinator.persistentStores) {
os_log("Store URL: %@", log: Debug.coredata_log, type: .info, store.url?.absoluteString ?? "No Store")
}
return true
}
输出:
前导数字是以秒为单位的时间。
0 Region: Region 1
0 Region: L1 = (0.0,20.0) R1 = (2.0,22.0)
0 Start - Fetch (no index):
2 Number of Persons in this Region: 267
2 Start - Fetch (with index):
10 Number of Persons in this Region: 267
10 Region: Region 13
10 Region: L1 = (0.0,44.0) R1 = (2.0,46.0)
10 Start - Fetch (no index):
11 Number of Persons in this Region: 4049
11 Start - Fetch (with index):
13 Number of Persons in this Region: 4049
13 Region: Region 43
13 Region: L1 = (4.0,32.0) R1 = (6.0,34.0)
13 Start - Fetch (no index):
14 Number of Persons in this Region: 28798
14 Start - Fetch (with index):
17 Number of Persons in this Region: 28798
17 Region: Region 101
17 Region: L1 = (10.0,40.0) R1 = (12.0,42.0)
17 Start - Fetch (no index):
18 Number of Persons in this Region: 46753
18 Start - Fetch (with index):
22 Number of Persons in this Region: 46753
22 Region: Region 113
22 Region: L1 = (12.0,28.0) R1 = (14.0,30.0)
22 Start - Fetch (no index):
22 Number of Persons in this Region: 45312
22 Start - Fetch (with index):
28 Number of Persons in this Region: 45312
28 Region: Region 145
28 Region: L1 = (16.0,20.0) R1 = (18.0,22.0)
28 Start - Fetch (no index):
28 Number of Persons in this Region: 3023
28 Start - Fetch (with index):
34 Number of Persons in this Region: 3023
结果:
indexed:by:
导致 Core Date 使用 R*Tree 索引。
- 使用 R*Tree 确实对查询执行时间不利。
未决问题:
什么类型的查询和 Core Data 模型利用了 R*Tree 索引?
阅读 https://www.sqlite.org/rtree.html 关于 SQLite 中的 R*Tree 的这篇文章后,我目前正在试验 2-Dim R-Tree核心数据模型。特别是我期望(可能有点天真)在索引 table 上有某种 select
语句,但在 SQLite 调试跟踪中没有看到任何 Region
具有索引属性的实体(参见下面代码中的 predicateBoundaryIdx
)。
我的问题是:为了从 R-Tree 索引中获益,核心数据模型(实体、属性)和 NSPredicate 必须是什么样子?
[XCode v11.4,iOS v13.1,Swift。开启 com.apple.CoreData.SQLDebug 4]
型号
索引
对应的数据库方案
CREATE TABLE ZPERSON ( Z_PK INTEGER PRIMARY KEY, Z_ENT INTEGER, Z_OPT INTEGER, ZLOCATION INTEGER, Z1CONTACTS INTEGER, ZNAME VARCHAR );
CREATE TABLE ZREGION ( Z_PK INTEGER PRIMARY KEY, Z_ENT INTEGER, Z_OPT INTEGER, ZMAXLATITUDE FLOAT, ZMAXLATITUDEIDX FLOAT, ZMAXLONGITUDE FLOAT, ZMAXLONGITUDEIDX FLOAT, ZMINLATITUDE FLOAT, ZMINLATITUDEIDX FLOAT, ZMINLONGITUDE FLOAT, ZMINLONGITUDEIDX FLOAT, ZNAME VARCHAR );
CREATE INDEX ZPERSON_ZLOCATION_INDEX ON ZPERSON (ZLOCATION);
CREATE INDEX ZPERSON_Z1CONTACTS_INDEX ON ZPERSON (Z1CONTACTS);
CREATE VIRTUAL TABLE Z_Region_RegionIndex USING RTREE (Z_PK INTEGER PRIMARY KEY, ZMINLATITUDEIDX_MIN, ZMINLATITUDEIDX_MAX, ZMAXLATITUDEIDX_MIN, ZMAXLATITUDEIDX_MAX, ZMINLONGITUDEIDX_MIN, ZMINLONGITUDEIDX_MAX, ZMAXLONGITUDEIDX_MIN, ZMAXLONGITUDEIDX_MAX)
/* Z_Region_RegionIndex(Z_PK,ZMINLATITUDEIDX_MIN,ZMINLATITUDEIDX_MAX,ZMAXLATITUDEIDX_MIN,ZMAXLATITUDEIDX_MAX,ZMINLONGITUDEIDX_MIN,ZMINLONGITUDEIDX_MAX,ZMAXLONGITUDEIDX_MIN,ZMAXLONGITUDEIDX_MAX) */;
CREATE TABLE IF NOT EXISTS "Z_Region_RegionIndex_rowid"(rowid INTEGER PRIMARY KEY,nodeno);
CREATE TABLE IF NOT EXISTS "Z_Region_RegionIndex_node"(nodeno INTEGER PRIMARY KEY,data);
CREATE TABLE IF NOT EXISTS "Z_Region_RegionIndex_parent"(nodeno INTEGER PRIMARY KEY,parentnode);
测试代码
func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]?) -> Bool {
let mainContext: NSManagedObjectContext
mainContext = persistentContainer.viewContext
mainContext.mergePolicy = NSMergeByPropertyObjectTrumpMergePolicy
mainContext.undoManager = nil
mainContext.shouldDeleteInaccessibleFaults = true
mainContext.automaticallyMergesChangesFromParent = true
var personObj: Person
var locationObj: Region
let n = 1000000
let personNr = stride(from: 1, through: n+1, by: 1).map(String.init).shuffled()
for i in 1...n
{
personObj = Person(context: mainContext)
locationObj = Region(context: mainContext)
locationObj.name = "Region \(i)"
locationObj.minlatitude = 40.000000 - Float.random(in: 0 ..< 5)
locationObj.minlongitude = 9.000000 - Float.random(in: 0 ..< 5)
locationObj.maxlatitude = 40.000000 + Float.random(in: 0 ..< 5)
locationObj.maxlongitude = 9.000000 + Float.random(in: 0 ..< 5)
locationObj.minlatitudeidx = locationObj.minlatitude
locationObj.minlongitudeidx = locationObj.minlongitude
locationObj.maxlatitudeidx = locationObj.maxlatitude
locationObj.maxlongitudeidx = locationObj.maxlongitude
personObj.name = "Person \(personNr[i])"
personObj.location = locationObj
if i % 1000 == 0 {
saveContext()
}
}
saveContext()
let request: NSFetchRequest<Region> = Region.fetchRequest()
let requestIdx: NSFetchRequest<Region> = Region.fetchRequest()
let eps : Float = 1.0
let predicateBoundaryIdx = NSPredicate(format: "(minlatitudeidx >= %lf and maxlatitudeidx =< %lf) and (minlongitudeidx >= %lf and maxlongitudeidx =< %lf)",40.000000-eps,40.000000+eps,9.000000-eps,9.000000+eps)
let predicateBoundary = NSPredicate(format: "(minlatitude >= %lf and maxlatitude =< %lf) and (minlongitude >= %lf and maxlongitude =< %lf)",40.000000-eps,40.000000+eps,9.000000-eps,9.000000+eps)
requestIdx.predicate = predicateBoundaryIdx;
request.predicate = predicateBoundary;
print("fetch index:")
do {
let result = try mainContext.count(for:requestIdx)
print("Count = \(result)")
} catch {
print("Error: \(error)")
}
print("fetch no index:")
do {
let result = try mainContext.count(for:request)
print("Count = \(result)")
} catch {
print("Error: \(error)")
}
for store in (persistentContainer.persistentStoreCoordinator.persistentStores) {
os_log("Store URL: %@", log: Debug.coredata_log, type: .info, store.url?.absoluteString ?? "No Store")
}
return true
}
核心数据SQL跟踪
CoreData: sql: SELECT COUNT( DISTINCT t0.Z_PK) FROM ZREGION t0 WHERE ( t0.ZMINLATITUDEIDX >= ? AND t0.ZMAXLATITUDEIDX <= ? AND t0.ZMINLONGITUDEIDX >= ? AND t0.ZMAXLONGITUDEIDX <= ?)
CoreData 于 2017 年引入了对 R-Tree 索引的支持。WWDC 2017 session 210 covers it and provides an example. As you will see, the key is that you need to use a function in the predicate format string to indicate that the index should be used. There's another example in WWDC 2018 session 224。
对您的示例进行稍微简单的修改:具有位置(latitude
和 longitude
)属性和 name
属性的实体:
添加名为 "bylocation" 的抓取索引,将其类型指定为 "R-Tree" 并为 latitude
和 longitude
添加抓取索引元素:
稍微修改您的代码,以反映不同的属性等。准备两个单独的谓词,一个使用索引,另一个不使用,运行 两者进行比较:
let mainContext: NSManagedObjectContext
mainContext = persistentContainer.viewContext
mainContext.mergePolicy = NSMergeByPropertyObjectTrumpMergePolicy
mainContext.undoManager = nil
mainContext.shouldDeleteInaccessibleFaults = true
mainContext.automaticallyMergesChangesFromParent = true
var locationObj: Region
let n = 10 // Just for demo purposes
for i in 1...n
{
locationObj = Region(context: mainContext)
locationObj.name = "Region \(i)"
locationObj.latitude = 40.000000 + 5.0 - Float.random(in: 0 ..< 10)
locationObj.longitude = 9.000000 + 5.0 - Float.random(in: 0 ..< 10)
if i % 1000 == 0 {
saveContext()
}
}
saveContext()
mainContext.reset()
let request: NSFetchRequest<Region> = Region.fetchRequest()
let requestIdx: NSFetchRequest<Region> = Region.fetchRequest()
let eps : Float = 1.0
let predicateBoundaryIdx = NSPredicate(format: "indexed:by:(latitude, 'bylocation') between { %lf, %lf } AND indexed:by:(longitude, 'bylocation') between { %lf, %lf }", 40.0-eps, 40.0+eps, 9.0-eps, 9.0+eps)
let predicateBoundary = NSPredicate(format: "latitude between { %lf, %lf } AND longitude between { %lf, %lf} ",40.000000-eps,40.000000+eps,9.000000-eps,9.000000+eps)
requestIdx.predicate = predicateBoundaryIdx;
request.predicate = predicateBoundary;
print("fetch index:")
do {
let result = try mainContext.fetch(requestIdx)
print("Count = \(result.count)")
} catch {
print("Error: \(error)")
}
mainContext.reset()
print("fetch no index:")
do {
let result = try mainContext.fetch(request)
print("Count = \(result.count)")
} catch {
print("Error: \(error)")
}
运行 SQLDebug = 4,然后您可以在日志中看到一些正在发生的事情。首先,创建数据库并添加 Region table,然后添加 RTree 索引。每当修改Region table时,创建触发器将相关数据添加到索引中:
CoreData: sql: CREATE TABLE ZREGION ( Z_PK INTEGER PRIMARY KEY, Z_ENT INTEGER, Z_OPT INTEGER, ZLATITUDE FLOAT, ZLONGITUDE FLOAT, ZNAME VARCHAR )
CoreData: sql: CREATE VIRTUAL TABLE IF NOT EXISTS Z_Region_bylocation USING RTREE (Z_PK INTEGER PRIMARY KEY, ZLATITUDE_MIN, ZLATITUDE_MAX, ZLONGITUDE_MIN, ZLONGITUDE_MAX)
CoreData: sql: CREATE TRIGGER IF NOT EXISTS Z_Region_bylocation_INSERT AFTER INSERT ON ZREGION FOR EACH ROW BEGIN INSERT OR REPLACE INTO Z_Region_bylocation (Z_PK, ZLATITUDE_MIN, ZLATITUDE_MAX, ZLONGITUDE_MIN, ZLONGITUDE_MAX) VALUES (NEW.Z_PK, NEW.ZLATITUDE, NEW.ZLATITUDE, NEW.ZLONGITUDE, NEW.ZLONGITUDE) ; END
CoreData: sql: CREATE TRIGGER IF NOT EXISTS Z_Region_bylocation_UPDATE AFTER UPDATE ON ZREGION FOR EACH ROW BEGIN DELETE FROM Z_Region_bylocation WHERE Z_PK = NEW.Z_PK ; INSERT INTO Z_Region_bylocation (Z_PK, ZLATITUDE_MIN, ZLATITUDE_MAX, ZLONGITUDE_MIN, ZLONGITUDE_MAX) VALUES (NEW.Z_PK, NEW.ZLATITUDE, NEW.ZLATITUDE, NEW.ZLONGITUDE, NEW.ZLONGITUDE) ; END
CoreData: sql: CREATE TRIGGER IF NOT EXISTS Z_Region_bylocation_DELETE AFTER DELETE ON ZREGION FOR EACH ROW BEGIN DELETE FROM Z_Region_bylocation WHERE Z_PK = OLD.Z_PK ; END
然后在提取时,您可以看到发送到 SQLite 的两个不同查询:
加上索引:
CoreData: sql: SELECT 0, t0.Z_PK, t0.Z_OPT, t0.ZLATITUDE, t0.ZLONGITUDE, t0.ZNAME FROM ZREGION t0 WHERE ( t0.Z_PK IN (SELECT n1_t0.Z_PK FROM Z_Region_bylocation n1_t0 WHERE (? <= n1_t0.ZLATITUDE_MIN AND n1_t0.ZLATITUDE_MAX <= ?)) AND t0.Z_PK IN (SELECT n1_t0.Z_PK FROM Z_Region_bylocation n1_t0 WHERE (? <= n1_t0.ZLONGITUDE_MIN AND n1_t0.ZLONGITUDE_MAX <= ?)))
并且日志甚至包括 SQLite 使用的查询计划:
2 0 0 SEARCH TABLE ZREGION AS t0 USING INTEGER PRIMARY KEY (rowid=?)
6 0 0 LIST SUBQUERY 1
8 6 0 SCAN TABLE Z_Region_bylocation AS n1_t0 VIRTUAL TABLE INDEX 2:D0B1
26 0 0 LIST SUBQUERY 2
28 26 0 SCAN TABLE Z_Region_bylocation AS n1_t0 VIRTUAL TABLE INDEX 2:D2B3
没有索引:
CoreData: sql: SELECT 0, t0.Z_PK, t0.Z_OPT, t0.ZLATITUDE, t0.ZLONGITUDE, t0.ZNAME FROM ZREGION t0 WHERE (( t0.ZLATITUDE BETWEEN ? AND ?) AND ( t0.ZLONGITUDE BETWEEN ? AND ?))
2 0 0 SCAN TABLE ZREGION AS t0
从这里可以看出,使用索引涉及到一些非常混乱的子选择。我发现结果是对于小数据集,索引实际上会减慢速度。同样,如果结果集很大。但是如果数据集很大,结果集很小,就有优势了。我把它留给你玩,看看这个游戏是否值得。我不太明白的一件事是使用索引需要两个单独的子选择,一个用于经度,一个用于纬度。在我看来(尽管也许我遗漏了什么)破坏了 R 树的全部要点,即它们的多维性。
我稍微修改了 OP 中的数据库以测试 (indexed:by:
语句并进行一些时间测量:
数据库:
索引:
用例:
统计访问某个地区的人数。
对于区域 R42,结果应为 2(第 1 人和第 3 人):
代码:
func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]?) -> Bool {
let mainContext: NSManagedObjectContext
mainContext = persistentContainer.viewContext
mainContext.mergePolicy = NSMergeByPropertyObjectTrumpMergePolicy
mainContext.undoManager = nil
mainContext.shouldDeleteInaccessibleFaults = true
mainContext.automaticallyMergesChangesFromParent = true
var bounds: Bounds
var location: Bounds
var person: Person
var region: Region
let longstep = 2
let latstep = 2
let minlong = 0
let maxlong = 20
let minlat = 20
let maxlat = 55
let createSomeData: Bool = false
if(createSomeData) {
// create some regions
var regionNr: Int = 0
for long in stride(from: minlong, to: maxlong, by: longstep)
{
for lat in stride(from: minlat, to: maxlat, by: latstep) {
regionNr += 1
region = Region(context: mainContext)
bounds = Bounds(context: mainContext)
bounds.minlongitude = Float(long)
bounds.maxlongitude = Float(min(long + longstep,maxlong))
bounds.minlatitude = Float(lat)
bounds.maxlatitude = Float(min(lat + latstep,maxlat))
region.bounds = bounds
region.name = "Region \(regionNr)"
// hotsptLvl["Region \(regionNr)"] = Int.random(in: 0 ... 100)
print("region.name = \(String(describing: region.name))")
if regionNr % 1000 == 0 {
saveContext()
}
}
}
saveContext()
// create persons and vistited locations
var k = 0
let n = 100000
let personNr = stride(from: 1, through: n+1, by: 1).map(String.init).shuffled()
for i in 1...n
{
person = Person(context: mainContext)
person.name = "Person \(personNr[i])"
// create locations
let m = 10
for _ in 1...m
{
k += 1
location = Bounds(context: mainContext)
location.minlatitude = Float.random(in: Float(minlat + 3 * latstep) ... Float(maxlat)) - Float.random(in: 0 ... Float(3 * latstep))
location.minlongitude = Float.random(in: Float(minlong + 3 * longstep) ... Float(maxlong)) - Float.random(in: 0 ... Float(3 * longstep))
location.maxlatitude = min(location.minlatitude + Float.random(in: 0 ... Float(3 * latstep)),Float(maxlat))
location.maxlongitude = min(location.minlongitude + Float.random(in: 0 ... Float(3 * longstep)),Float(maxlong))
person.addToLocations(location)
if k % 1000 == 0 {
saveContext()
}
}
}
saveContext()
}
let start = Date()
for regionName in ["Region 1","Region 13","Region 43","Region 101","Region 113","Region 145"] {
print("\(Calendar.current.dateComponents([Calendar.Component.second], from:start, to:Date()).second!) Region: \(regionName)")
let requestOnRegion: NSFetchRequest<Region> = Region.fetchRequest()
let someRegion = NSPredicate(format: "(name = %@)",regionName)
requestOnRegion.predicate = someRegion
do {
let regionResA : [Region] = try mainContext.fetch(requestOnRegion) as [Region]
let regionRes : Region = regionResA[0]
print("\(Calendar.current.dateComponents([Calendar.Component.second], from:start, to:Date()).second!) Region: L1 = (\(regionRes.bounds!.minlongitude),\(regionRes.bounds!.minlatitude)) R1 = (\(regionRes.bounds!.maxlongitude),\(regionRes.bounds!.maxlatitude))")
let someBounds1 = NSPredicate(format: "(minlongitude <= %lf && maxlongitude >= %lf && minlatitude <= %lf && maxlatitude >= %lf)",
regionRes.bounds!.maxlongitude,
regionRes.bounds!.minlongitude,
regionRes.bounds!.maxlatitude,
regionRes.bounds!.minlatitude)
let someBounds2 = NSPredicate(format: "(indexed:by:(minlongitude, 'BoundsIndex') between { %lf, %lf } && " +
"indexed:by:(maxlongitude, 'BoundsIndex') between { %lf, %lf } && " +
"indexed:by:(minlatitude, 'BoundsIndex') between { %lf, %lf } && " +
"indexed:by:(maxlatitude, 'BoundsIndex') between { %lf, %lf} )",
Float(minlong),
regionRes.bounds!.maxlongitude,
regionRes.bounds!.minlongitude,
Float(maxlong),
Float(minlat),
regionRes.bounds!.maxlatitude,
regionRes.bounds!.minlatitude,
Float(maxlat))
let requestOnBounds: NSFetchRequest<NSDictionary> = NSFetchRequest<NSDictionary>(entityName:"Bounds")
requestOnBounds.resultType = NSFetchRequestResultType.dictionaryResultType
requestOnBounds.propertiesToFetch = ["person.name"]
requestOnBounds.returnsDistinctResults = true
requestOnBounds.predicate = someBounds1
print("\n")
print("\(Calendar.current.dateComponents([Calendar.Component.second], from:start, to:Date()).second!) Start - Fetch (no index):")
var boundsRes = try mainContext.fetch(requestOnBounds)
var uniquePersons : [String] = boundsRes.compactMap { [=10=].value(forKey: "person.name") as? String };
print("\(Calendar.current.dateComponents([Calendar.Component.second], from:start, to:Date()).second!) Number of Persons in this Region: \(uniquePersons.count)")
print("\n")
requestOnBounds.predicate = someBounds2
print("\(Calendar.current.dateComponents([Calendar.Component.second], from:start, to:Date()).second!) Start - Fetch (with index):")
boundsRes = try mainContext.fetch(requestOnBounds)
uniquePersons = boundsRes.compactMap { [=10=].value(forKey: "person.name") as? String };
print("\(Calendar.current.dateComponents([Calendar.Component.second], from:start, to:Date()).second!) Number of Persons in this Region: \(uniquePersons.count)")
print("\n")
} catch {
print("Error: \(error)")
}
}
for store in (persistentContainer.persistentStoreCoordinator.persistentStores) {
os_log("Store URL: %@", log: Debug.coredata_log, type: .info, store.url?.absoluteString ?? "No Store")
}
return true
}
输出:
前导数字是以秒为单位的时间。
0 Region: Region 1
0 Region: L1 = (0.0,20.0) R1 = (2.0,22.0)
0 Start - Fetch (no index):
2 Number of Persons in this Region: 267
2 Start - Fetch (with index):
10 Number of Persons in this Region: 267
10 Region: Region 13
10 Region: L1 = (0.0,44.0) R1 = (2.0,46.0)
10 Start - Fetch (no index):
11 Number of Persons in this Region: 4049
11 Start - Fetch (with index):
13 Number of Persons in this Region: 4049
13 Region: Region 43
13 Region: L1 = (4.0,32.0) R1 = (6.0,34.0)
13 Start - Fetch (no index):
14 Number of Persons in this Region: 28798
14 Start - Fetch (with index):
17 Number of Persons in this Region: 28798
17 Region: Region 101
17 Region: L1 = (10.0,40.0) R1 = (12.0,42.0)
17 Start - Fetch (no index):
18 Number of Persons in this Region: 46753
18 Start - Fetch (with index):
22 Number of Persons in this Region: 46753
22 Region: Region 113
22 Region: L1 = (12.0,28.0) R1 = (14.0,30.0)
22 Start - Fetch (no index):
22 Number of Persons in this Region: 45312
22 Start - Fetch (with index):
28 Number of Persons in this Region: 45312
28 Region: Region 145
28 Region: L1 = (16.0,20.0) R1 = (18.0,22.0)
28 Start - Fetch (no index):
28 Number of Persons in this Region: 3023
28 Start - Fetch (with index):
34 Number of Persons in this Region: 3023
结果:
indexed:by:
导致 Core Date 使用 R*Tree 索引。- 使用 R*Tree 确实对查询执行时间不利。
未决问题:
什么类型的查询和 Core Data 模型利用了 R*Tree 索引?