Realm return 排序数据的速度有多快?

How quickly can Realm return sorted data?

Realm 允许您按排序顺序接收查询结果。

let realm = try! Realm()
let dogs = realm.objects(Dog.self)
let dogsSorted = dogs.sorted(byKeyPath: "name", ascending: false)

我运行这个测试看看realmreturns排序数据有多快

import Foundation
import RealmSwift

class TestModel: Object {
    @Persisted(indexed: true) var value: Int = 0
}

class RealmSortTest {
    let documentCount = 1000000
    var smallestValue: TestModel = TestModel()
    
    func writeData() {
        let realm = try! Realm()
        var documents: [TestModel] = []
        for _ in 0 ... documentCount {
            let newDoc = TestModel()
            newDoc.value = Int.random(in: 0 ... Int.max)
            documents.append(newDoc)
        }
        try! realm.write {
            realm.deleteAll()
            realm.add(documents)
        }
    }
    
    func readData() {
        let realm = try! Realm()
        let sortedResults = realm.objects(TestModel.self).sorted(byKeyPath: "value")
                
        let start = Date()
        
        self.smallestValue = sortedResults[0]
        
        let end = Date()
        let delta = end.timeIntervalSinceReferenceDate - start.timeIntervalSinceReferenceDate
        print("Time Taken: \(delta)")
    }
    
    func updateSmallestValue() {
        let realm = try! Realm()
        let sortedResults = realm.objects(TestModel.self).sorted(byKeyPath: "value")

        smallestValue = sortedResults[0]
        
        print("Originally loaded smallest value: \(smallestValue.value)")
        
        let newSmallestValue = TestModel()
        newSmallestValue.value = smallestValue.value - 1
        try! realm.write {
            realm.add(newSmallestValue)
        }
        
        print("Originally loaded smallest value after write: \(smallestValue.value)")
        
        let readStart = Date()
        smallestValue = sortedResults[0]
        let readEnd = Date()
        let readDelta = readEnd.timeIntervalSinceReferenceDate - readStart.timeIntervalSinceReferenceDate
        print("Reloaded smallest value \(smallestValue.value)")
        print("Time Taken to reload the smallest value: \(readDelta)")
    }
}

使用 documentCount = 100000,readData() 输出:

Time taken to load smallest value: 0.48901796340942383

和 updateData() 输出:

Originally loaded smallest value: 2075613243102
Originally loaded smallest value after write: 2075613243102
Reloaded smallest value 2075613243101
Time taken to reload the smallest value: 0.4624580144882202

使用 documentCount = 1000000,readData() 输出:

Time taken to load smallest value: 4.807577967643738

和 updateData() 输出:

Originally loaded smallest value: 4004790407680
Originally loaded smallest value after write: 4004790407680
Reloaded smallest value 4004790407679
Time taken to reload the smallest value: 5.2308430671691895

从排序结果集中检索第一个文档所花费的时间与存储在领域中的文档数量成比例,而不是与正在检索的文档数量成比例。这向我表明,领域在查询时而不是在写入文档时对所有文档进行排序。有没有一种方法可以为您的数据编制索引,以便您可以快速检索少量已排序的文档?

编辑:

根据评论中的讨论,我更新了代码以仅加载已排序集合中的最小值。

编辑 2

我将代码更新为 observe 评论中建议的结果。

import Foundation
import RealmSwift

class TestModel: Object {
    @Persisted(indexed: true) var value: Int = 0
}

class RealmSortTest {
    let documentCount = 1000000
    var smallestValue: TestModel = TestModel()
    var storedResults: Results<TestModel> = (try! Realm()).objects(TestModel.self).sorted(byKeyPath: "value")
    var resultsToken: NotificationToken? = nil
    
    func writeData() {
        let realm = try! Realm()
        var documents: [TestModel] = []
        for _ in 0 ... documentCount {
            let newDoc = TestModel()
            newDoc.value = Int.random(in: 0 ... Int.max)
            documents.append(newDoc)
        }
        try! realm.write {
            realm.deleteAll()
            realm.add(documents)
        }
    }
    
    func observeData() {
        let realm = try! Realm()
        print("Loading Data")
        let startTime = Date()
        self.storedResults = realm.objects(TestModel.self).sorted(byKeyPath: "value")
        self.resultsToken = self.storedResults.observe { changes in
            let observationTime = Date().timeIntervalSince(startTime)
            print("Time to first observation: \(observationTime)")
            let firstTenElementsSlice = self.storedResults[0..<10]
            let elementsArray = Array(firstTenElementsSlice) //print this if you want to see the elements
            elementsArray.forEach { print([=17=].value) }
            let moreElapsed = Date().timeIntervalSince(startTime)
            print("Time to printed elements: \(moreElapsed)")
        }
    }
}

我得到了以下输出

Loading Data
Time to first observation: 5.252112984657288
3792614823099
56006949537408
Time to printed elements: 5.253015995025635

用观察者读取数据并没有减少读取数据所花费的时间。

dogsdogsSorted 是 Realm Results 集合对象,本质上包含指向基础数据的指针,而不是数据本身。

定义排序顺序不会加载所有对象,它们会保持惰性 - 只在需要时加载,这是 Realm 的巨大好处之一;可以使用巨大的数据集而不用担心内存过载。

这也是Realm Results对象始终反映底层数据数据当前状态的原因之一;该数据可以更改很多次,您在应用结果变量中看到的内容(通常 Realm Collections)将始终显示更新后的数据。

作为一个侧节点,此时使用具有 Swift 高级函数的 Realm Collection 对象会导致数据加载到内存中 - 所以不要那样做。使用 Realm 函数进行排序、过滤等,一切都保持惰性和内存友好。

索引是一种权衡;一方面,它可以提高某些查询的性能,例如等式 ("name == 'Spot'"),但另一方面,它会降低写入性能。此外,添加索引会占用更多 space.

一般来说,索引最适合特定的用例;也许在某种情况下,您在性能至关重要的情况下进行了某种类型的提前自动填充。我们有几个具有非常大数据集 (Gb) 的应用程序,没有任何内容被索引,因为收到的性能优势被较慢的写入所抵消,而写入速度较慢,而写入速度较慢,而写入速度较慢。我建议在没有索引的情况下开始。

编辑:

将根据其他讨论更新答案。

首先,将数据从一个对象复制到另一个对象并不是数据库加载性能的衡量标准。这里真正的 objective 是能够访问该数据的用户体验 and/or - 从用户期望看到数据的时间到显示数据的时间。因此,让我们提供一些代码来演示一般性能:

我们将首先从与 OP 使用的模型类似的模型开始

class TestModel: Object {
    @Persisted(indexed: true) var value: Int = 0
    
    convenience init(withIndex: Int) {
        self.init()
        self.value = withIndex
    }
}

然后定义几个变量来保存来自磁盘的结果和一个通知标记,它允许我们知道该数据何时可以显示给用户。最后是一个 var 来保存加载开始的时间

var modelResults: Results<TestModel>!
var modelsToken: NotificationToken?
var startTime = Date()

这是写入大量数据的函数。 objectCount var 将从第一个 运行 1,000,000 个对象的第一个对象的 10,000 个对象更改为第二个对象的 1,000,000 个对象。请注意,这是错误的编码,因为我正在内存中创建一百万个对象,所以不要这样做;仅供演示。

func writeLotsOfData() {
    let realm = try! Realm()
    let objectCount = 1000000
    autoreleasepool {
        var testModelArray = [TestModel]()
        for _ in 0..<objectCount {
            let m = TestModel(withIndex: Int.random(in: 0 ... Int.max))
            testModelArray.append(m)
        }

        try! realm.write {
            realm.add(testModelArray)
        }
        
        print("data written: \(testModelArray.count) objects")
    }
}

最后是从领域加载这些对象并在数据可用于向用户显示时输出的函数。请注意,它们是根据原始问题排序的 - 事实上,随着数据的添加和更改,它们将保持排序!很酷的东西。

func loadBigData() {
    let realm = try! Realm()
    print("Loading Data")
    
    self.startTime = Date()
    self.modelResults = realm.objects(TestModel.self).sorted(byKeyPath: "value")
    self.modelsToken = self.modelResults?.observe { changes in
        let elapsed = Date().timeIntervalSince(self.startTime)
        print("Load completed of \(self.modelResults.count) objects -  elapsed time of \(elapsed)")
    }
}

和结果。两次运行,一次有 10,000 个对象,一次有 1,000,000 个对象

data written: 10000 objects
Loading Data
Load completed of 10000 objects -  elapsed time of 0.0059670209884643555

data written: 1000000 objects
Loading Data
Load completed of 1000000 objects -  elapsed time of 0.6800119876861572

需要注意三点

  1. A Realm Notification object fires an event when the data has completed loading, and also when there are additional changes. We are leveraging that to notify the app when the data has completed loading and is available to be used - shown to the user for example.

  2. We are lazily loading all of the objects! At no point are we going to run into a memory overloading issue. Once the objects have loaded into the results, they are then freely available to be shown to the user or processed in whatever way is needed. Super important to work with Realm objects in a Realm way when working with large datasets. Generally speaking, if it's 10 objects well, no problem tossing them into an array, but when there are 1 Million objects - let Realm do it's lazy job.

  3. The app is protected using the above code and techniques. There could be 10 objects or 1,000,000 objects and the memory impact is minimal.

编辑 2

(有关此编辑的更多信息,请参阅对 OP 问题的评论)

根据 OP 的要求,他们希望看到带有打印值和时间的相同练习。这是更新后的代码

self.modelsToken = self.modelResults?.observe { changes in
    let elapsed = Date().timeIntervalSince(self.startTime)
    print("Load completed of \(self.modelResults.count) objects -  elapsed time of \(elapsed)")
    print("print first 10 object values")
    let firstTenElementsSlice = self.modelResults[0..<10]
    let elementsArray = Array(firstTenElementsSlice) //print this if you want to see the elements
    elementsArray.forEach { print([=15=].value)}
    let moreElapsed = Date().timeIntervalSince(self.startTime)
    print("Printing of 10 elements completed: \(moreElapsed)")
}

然后输出

Loading Data
Load completed of 1000000 objects -  elapsed time of 0.6730009317398071
print first 10 object values
12264243738520
17242140785413
29611477414437
31558144830373
32913160803785
45399774467128
61700529799916
63929929449365
73833938586206
81739195218861
Printing of 10 elements completed: 0.6745189428329468

此时看来,Realm 在数据被访问时而不是在写入时对数据进行排序,并且没有办法让 Realm 在写入时对数据进行排序。这意味着访问排序的数据与数据库中文档的数量成比例,而不是被访问的文档数量。

访问数据的实际时间因用例和平台而异。