F# - 包含数字的自然排序字符串

F# - Natural Sort Of Strings Containing Numbers

是否有与 Sorting for Humans: Natural Sort Order 等效的 F#?例如,重现以下示例:

Actual (List.sort)  : let strngLst = ["1-5"; "10-15"; "15-20"; "5-10"]
Expected            : let strngLst = ["1-5"; "5-10"; "10-15"; "15-20"]

请指教?

基于那篇文章中的 Python 3-liner,我会这样做:

open System
open System.Text.RegularExpressions

let sortNicely l =
    let convert text =
        match Int32.TryParse(text) with
        | true, i -> Choice1Of2 i
        | false, _ -> Choice2Of2 text
    let alphanumKey key =
        Regex("([0-9]+)").Split(key) |> Array.map convert
    List.sortBy alphanumKey l

主要区别在于Choice类型的使用。 Python 版本巧妙地在 convert 中使用动态类型:Python 总是认为 int 小于字符串,因此 convert 可以 return int 或一个字符串和 sort 将做我们想要的。但在 F# 中,我们需要更加明确。我使用了可区分联合,因为它也能满足我们的需求:第一种情况 (Choice1Of2) 的值总是小于第二种情况 (Choice2Of2) 的值。

根据@matekus 的评论,最正确的解决方案可能是将 AlphaNum 排序移植到 F#,因此:

let len = String.length

let isnum (s: string) i = 
    let c = s.Chars i
    c >= '0' && c <= '9'

let chunk s f t = (f < len s) && (t < len s) && (isnum s f) = (isnum s t)

let chunkto s f =
       let rec to_ s f e = if chunk s f e then to_ s f (e + 1) else e in to_ s f f

let int_of_string str = 
    let v = ref 0
    if System.Int32.TryParse(str, v) then !v else 0

let alphanumcmp a b =
       let rec chunkcmp a ai b bi = 
            let al, bl = len a, len b
            if ai >= al || bi >= bl then compare al bl else
            let ae, be = chunkto a ai, chunkto b bi
            let sa, sb = a.Substring(ai, (ae-ai)), b.Substring(bi, (be-bi))
            let cmp = if isnum a ai && isnum b bi then compare (int_of_string sa) (int_of_string sb) else compare sa sb
            if cmp = 0 then chunkcmp a ae b be else cmp
       in chunkcmp a 0 b 0


type AlphanumComparer() =
    interface System.Collections.IComparer with 
        member this.Compare(x, y) =
            alphanumcmp (x.ToString()) (y.ToString())

测试:

let names = [ "1000X Radonius Maximus"; "10X Radonius"; "200X Radonius"; "20X Radonius"; "20X Radonius Prime"; "30X Radonius"; "40X Radonius"; "Allegia 50 Clasteron"; "Allegia 500 Clasteron"; "Allegia 51 Clasteron"; "Allegia 51B Clasteron"; "Allegia 52 Clasteron"; "Allegia 60 Clasteron"; "Alpha 100"; "Alpha 2"; "Alpha 200"; "Alpha 2A";  "Alpha 2A-8000"; "Alpha 2A-900"; "Callisto Morphamax"; "Callisto Morphamax 500"; "Callisto Morphamax 5000"; "Callisto Morphamax 600"; "Callisto Morphamax 700"; "Callisto Morphamax 7000"; "Callisto Morphamax 7000 SE";"Callisto Morphamax 7000 SE2"; "QRS-60 Intrinsia Machine"; "QRS-60F Intrinsia Machine"; "QRS-62 Intrinsia Machine"; "QRS-62F Intrinsia Machine"; "Xiph Xlater 10000"; "Xiph Xlater 2000"; "Xiph Xlater 300"; "Xiph Xlater 40"; "Xiph Xlater 5"; "Xiph Xlater 50"; "Xiph Xlater 500"; "Xiph Xlater 5000"; "Xiph Xlater 58" ];;

names |> List.sortWith alphanumcmp |> printf "%A" 

结果:

 ["10X Radonius"; "20X Radonius"; "20X Radonius Prime"; "30X Radonius";
 "40X Radonius"; "200X Radonius"; "1000X Radonius Maximus";
 "Allegia 50 Clasteron"; "Allegia 51 Clasteron"; "Allegia 51B Clasteron";
 "Allegia 52 Clasteron"; "Allegia 60 Clasteron"; "Allegia 500 Clasteron";
 "Alpha 2"; "Alpha 2A"; "Alpha 2A-900"; "Alpha 2A-8000"; "Alpha 100";
 "Alpha 200"; "Callisto Morphamax"; "Callisto Morphamax 500";
 "Callisto Morphamax 600"; "Callisto Morphamax 700"; "Callisto Morphamax 5000";
 "Callisto Morphamax 7000"; "Callisto Morphamax 7000 SE";
 "Callisto Morphamax 7000 SE2"; "QRS-60 Intrinsia Machine";
 "QRS-60F Intrinsia Machine"; "QRS-62 Intrinsia Machine";
 "QRS-62F Intrinsia Machine"; "Xiph Xlater 5"; "Xiph Xlater 40";
 "Xiph Xlater 50"; "Xiph Xlater 58"; "Xiph Xlater 300"; "Xiph Xlater 500";
 "Xiph Xlater 2000"; "Xiph Xlater 5000"; "Xiph Xlater 10000"]val it : unit = ()