Swift 中数组值的函数聚类

Functional clustering of array values in Swift

给定一个数组,例如:

[0, 0.5, 0.51, 1.0, 1.5, 1.99, 2.0, 2.1, 2.5, 3.0] 

我想根据值的顺序差异(例如,其中 abs(x-y) < nn = 0.2)将值聚类到子数组中,例如:

[[0], [0.5, 0.51], [1.0], [1.5], [1.99, 2.0, 2.1], [2.5], [3.0]]. 

我想以声明的方式进行——只是为了更好地了解更复杂的序列操作在函数式上下文中的工作方式(似乎大多数 "functional Swift" demos/tutorials 都非常基础).

提前致谢。


更新:

这是一个有点接近的单行:

let times = [0, 0.5, 0.99, 1, 1.01, 1.5, 2, 2.5, 2.51, 3, 3.49, 3.5]

let result = times.map { t1 in
    return times.filter { fabs([=13=] - t1) < 0.2 }
}

// [[0.0], [0.5], [0.99, 1.0, 1.01], [0.99, 1.0, 1.01], [0.99, 1.0, 1.01], [1.5], [2.0], [2.5, 2.51], [2.5, 2.51], [3.0], [3.49, 3.5], [3.49, 3.5]]

只需要去掉重复项。

我不知道有任何本机 Swift 方法可以满足您的要求。不过,您可以通过一个简单的扩展来完成此操作:

extension Array {
    func split(condition : (Element, Element) -> Bool) -> [[Element]] {
        var returnArray = [[Element]]()

        var currentSubArray = [Element]()

        for (index, element) in self.enumerate() {
            currentSubArray.append(element)

            if index == self.count - 1 || condition(element, self[index+1]) {
                returnArray.append(currentSubArray)
                currentSubArray = []
            }
        }

        return returnArray
    }
}

用法示例:

let source = [0, 0.5, 0.51, 1.0, 1.5, 1.99, 2.0, 2.1, 2.5, 3.0]
let n = 0.2
let target = source.split { abs([=11=] - ) > n }

输出:

[[0.0], [0.5, 0.51], [1.0], [1.5], [1.99, 2.0, 2.1], [2.5], [3.0]]

这是通过 reduce 实现的:

extension Array {
    func split(condition : (Element, Element) -> Bool) -> [[Element]] {
        return self.reduce([], combine:
            { (list : [[Element]], value : Element) in
                if list.isEmpty {
                    return [[value]]
                }
                else if !condition(list.last!.last!, value) {
                    return list[0..<list.count - 1] + [list.last!+[value]]
                }
                else {
                    return list + [[value]]
                }
            }
        )
    }
}

let source = [0, 0.5, 0.51, 1.0, 1.5, 1.99, 2.0, 2.1, 2.5, 3.0]
let n = 0.2
let target = source.split { abs([=10=] - ) > n }

输出:

[[0], [0.5, 0.51], [1], [1.5], [1.99, 2, 2.1], [2.5], [3]]

更新

如果您不介意改变 reduce 中的数组,您会得到一个更短且可能更有效的解决方案:

extension Array {
    func split(condition : (Element, Element) -> Bool) -> [[Element]] {
        return self.reduce([], combine:
            { ( var list : [[Element]], value : Element) in
                if list.isEmpty || condition(list.last!.last!, value) {
                    list += [[value]]
                }
                else {
                    list[list.count - 1] += [value]
                }
                return list
            }
        )
    }
}

我绝对会像 Aaron Brager 那样做。在我看来,这是最好的 Swift 方法。 Swift 实际上在函数式编程中并不是那么好用。但是为了探索你如何可能,这里是我可能攻击它的一种方式。

我完全预计这方面的表现会很糟糕。可能是 O(n^2)。递归构建一个数组,就像我在 groupWhile 中所做的那样,强制它在每一步都复制整个数组。

// Really Swift? We have to say that a subsequence has the same elements as its sequence?
extension CollectionType where SubSequence.Generator.Element == Generator.Element {

    // Return the prefix for which pred is true, and the rest of the elements.
    func span(pred: Generator.Element -> Bool) -> ([Generator.Element], [Generator.Element]) {
        let split = indexOf{ !pred([=10=]) } ?? endIndex
        return (Array(self[startIndex..<split]), Array(self[split..<endIndex]))
    }

    // Start a new group each time pred is false.
    func groupWhile(pred: Generator.Element -> Bool) -> [[Generator.Element]] {
        guard self.count > 0 else { return [] }
        let (this, that) = span(pred)
        let next = that.first.map{[[=10=]]} ?? []
        let rest = Array(that.dropFirst())
        return [this + next] + rest.groupWhile(pred)
    }
}

extension CollectionType where
    Generator.Element : FloatingPointType,
    Generator.Element : AbsoluteValuable,
    SubSequence.Generator.Element == Generator.Element {

    //
    // Here's the meat of it
    //
    func groupBySeqDistanceGreaterThan(n: Generator.Element) -> [[Generator.Element]] {

        // I don't love this, but it is a simple way to deal with the last element.
        let xs = self + [Generator.Element.infinity]

        // Zip us with our own tail. This creates a bunch of pairs we can evaluate.
        let pairs = Array(zip(xs, xs.dropFirst()))

        // Insert breaks where the difference is too high in the pair
        let groups = pairs.groupWhile { abs([=10=].1 - [=10=].0) < n }

        // Collapse the pairs down to values
        return groups.map { [=10=].map { [=10=].0 } }
    }
}

具有累积参数的简单折叠有效。顺便说一句,不确定这是否正是您想要的,因为我不明白数组中的元素是否需要后续。在描述中你是这么说的,但是你的 'sample answer' 没有考虑它们是否是后续的。你应该改进问题描述。

let a : [Double] = [0, 0.5, 0.51, 1.0, 1.5, 1.99, 2.0, 2.1, 2.5, 3.0];
let diff : Double = 0.2;
let eps = 0.0000001

let b = a.sort().reduce(([],[])) { (ps : ([Double],[[Double]]), c : Double) -> ([Double],[[Double]]) in
  if ps.0.count == 0 || abs(ps.0.first! - c) - diff <= eps { return (ps.0 + [c], ps.1) } else { return ([c], ps.1 + [ps.0]) }
}
let result = b.1 + [b.0];
print(result)

Returns

[[0.0], [0.5, 0.51], [1.0], [1.5], [1.99, 2.0, 2.1], [2.5], [3.0]]

Swift 在函数式编程方面还不错。

这是一种避免 if/else 语句并保持分组条件隔离的方法: (比接受的答案恕我直言更清晰和直接)

let values:[Double] = [0, 0.5, 0.51, 1.0, 1.5, 1.99, 2.0, 2.1, 2.5, 3.0]

let inGroup     = { (x:Double,y:Double) return abs(x-y) < 0.2 }

let intervals   = zip(values,values.enumerated().dropFirst())            
let starts      = intervals.filter{ !inGroup([=10=],.1) }.map{[=10=].1.0}
let ranges      = zip([0]+starts, starts+[values.count])
let result      = ranges.map{values[[=10=]..<]}

//  result :  [ [0.0], [0.5, 0.51], [1.0], [1.5], [1.99, 2.0, 2.1], [2.5], [3.0]  ]             

//  how it works ...
//
//  intervals:   Contains pairs of consecutive values along with the index of second one
//               [value[i],(index,value[i+1])] 
//
//  starts:      Contains the index of second values that don't meet the grouping condition 
//               (i.e. start of next group) 
//               [index] 
//        
//  ranges:      Contains begining and end indexes for group ranges formed using start..<end
//               [(Int,Int)]
//
//  result:      Groups of consecutive values meeting the inGroup condition
//