F# - 包含数字的自然排序字符串
F# - Natural Sort Of Strings Containing Numbers
是否有与 Sorting for Humans: Natural Sort Order 等效的 F#?例如,重现以下示例:
Actual (List.sort) : let strngLst = ["1-5"; "10-15"; "15-20"; "5-10"]
Expected : let strngLst = ["1-5"; "5-10"; "10-15"; "15-20"]
请指教?
基于那篇文章中的 Python 3-liner,我会这样做:
open System
open System.Text.RegularExpressions
let sortNicely l =
let convert text =
match Int32.TryParse(text) with
| true, i -> Choice1Of2 i
| false, _ -> Choice2Of2 text
let alphanumKey key =
Regex("([0-9]+)").Split(key) |> Array.map convert
List.sortBy alphanumKey l
主要区别在于Choice
类型的使用。 Python 版本巧妙地在 convert
中使用动态类型:Python 总是认为 int 小于字符串,因此 convert
可以 return int 或一个字符串和 sort
将做我们想要的。但在 F# 中,我们需要更加明确。我使用了可区分联合,因为它也能满足我们的需求:第一种情况 (Choice1Of2
) 的值总是小于第二种情况 (Choice2Of2
) 的值。
根据@matekus 的评论,最正确的解决方案可能是将 AlphaNum 排序移植到 F#,因此:
let len = String.length
let isnum (s: string) i =
let c = s.Chars i
c >= '0' && c <= '9'
let chunk s f t = (f < len s) && (t < len s) && (isnum s f) = (isnum s t)
let chunkto s f =
let rec to_ s f e = if chunk s f e then to_ s f (e + 1) else e in to_ s f f
let int_of_string str =
let v = ref 0
if System.Int32.TryParse(str, v) then !v else 0
let alphanumcmp a b =
let rec chunkcmp a ai b bi =
let al, bl = len a, len b
if ai >= al || bi >= bl then compare al bl else
let ae, be = chunkto a ai, chunkto b bi
let sa, sb = a.Substring(ai, (ae-ai)), b.Substring(bi, (be-bi))
let cmp = if isnum a ai && isnum b bi then compare (int_of_string sa) (int_of_string sb) else compare sa sb
if cmp = 0 then chunkcmp a ae b be else cmp
in chunkcmp a 0 b 0
type AlphanumComparer() =
interface System.Collections.IComparer with
member this.Compare(x, y) =
alphanumcmp (x.ToString()) (y.ToString())
测试:
let names = [ "1000X Radonius Maximus"; "10X Radonius"; "200X Radonius"; "20X Radonius"; "20X Radonius Prime"; "30X Radonius"; "40X Radonius"; "Allegia 50 Clasteron"; "Allegia 500 Clasteron"; "Allegia 51 Clasteron"; "Allegia 51B Clasteron"; "Allegia 52 Clasteron"; "Allegia 60 Clasteron"; "Alpha 100"; "Alpha 2"; "Alpha 200"; "Alpha 2A"; "Alpha 2A-8000"; "Alpha 2A-900"; "Callisto Morphamax"; "Callisto Morphamax 500"; "Callisto Morphamax 5000"; "Callisto Morphamax 600"; "Callisto Morphamax 700"; "Callisto Morphamax 7000"; "Callisto Morphamax 7000 SE";"Callisto Morphamax 7000 SE2"; "QRS-60 Intrinsia Machine"; "QRS-60F Intrinsia Machine"; "QRS-62 Intrinsia Machine"; "QRS-62F Intrinsia Machine"; "Xiph Xlater 10000"; "Xiph Xlater 2000"; "Xiph Xlater 300"; "Xiph Xlater 40"; "Xiph Xlater 5"; "Xiph Xlater 50"; "Xiph Xlater 500"; "Xiph Xlater 5000"; "Xiph Xlater 58" ];;
names |> List.sortWith alphanumcmp |> printf "%A"
结果:
["10X Radonius"; "20X Radonius"; "20X Radonius Prime"; "30X Radonius";
"40X Radonius"; "200X Radonius"; "1000X Radonius Maximus";
"Allegia 50 Clasteron"; "Allegia 51 Clasteron"; "Allegia 51B Clasteron";
"Allegia 52 Clasteron"; "Allegia 60 Clasteron"; "Allegia 500 Clasteron";
"Alpha 2"; "Alpha 2A"; "Alpha 2A-900"; "Alpha 2A-8000"; "Alpha 100";
"Alpha 200"; "Callisto Morphamax"; "Callisto Morphamax 500";
"Callisto Morphamax 600"; "Callisto Morphamax 700"; "Callisto Morphamax 5000";
"Callisto Morphamax 7000"; "Callisto Morphamax 7000 SE";
"Callisto Morphamax 7000 SE2"; "QRS-60 Intrinsia Machine";
"QRS-60F Intrinsia Machine"; "QRS-62 Intrinsia Machine";
"QRS-62F Intrinsia Machine"; "Xiph Xlater 5"; "Xiph Xlater 40";
"Xiph Xlater 50"; "Xiph Xlater 58"; "Xiph Xlater 300"; "Xiph Xlater 500";
"Xiph Xlater 2000"; "Xiph Xlater 5000"; "Xiph Xlater 10000"]val it : unit = ()
是否有与 Sorting for Humans: Natural Sort Order 等效的 F#?例如,重现以下示例:
Actual (List.sort) : let strngLst = ["1-5"; "10-15"; "15-20"; "5-10"]
Expected : let strngLst = ["1-5"; "5-10"; "10-15"; "15-20"]
请指教?
基于那篇文章中的 Python 3-liner,我会这样做:
open System
open System.Text.RegularExpressions
let sortNicely l =
let convert text =
match Int32.TryParse(text) with
| true, i -> Choice1Of2 i
| false, _ -> Choice2Of2 text
let alphanumKey key =
Regex("([0-9]+)").Split(key) |> Array.map convert
List.sortBy alphanumKey l
主要区别在于Choice
类型的使用。 Python 版本巧妙地在 convert
中使用动态类型:Python 总是认为 int 小于字符串,因此 convert
可以 return int 或一个字符串和 sort
将做我们想要的。但在 F# 中,我们需要更加明确。我使用了可区分联合,因为它也能满足我们的需求:第一种情况 (Choice1Of2
) 的值总是小于第二种情况 (Choice2Of2
) 的值。
根据@matekus 的评论,最正确的解决方案可能是将 AlphaNum 排序移植到 F#,因此:
let len = String.length
let isnum (s: string) i =
let c = s.Chars i
c >= '0' && c <= '9'
let chunk s f t = (f < len s) && (t < len s) && (isnum s f) = (isnum s t)
let chunkto s f =
let rec to_ s f e = if chunk s f e then to_ s f (e + 1) else e in to_ s f f
let int_of_string str =
let v = ref 0
if System.Int32.TryParse(str, v) then !v else 0
let alphanumcmp a b =
let rec chunkcmp a ai b bi =
let al, bl = len a, len b
if ai >= al || bi >= bl then compare al bl else
let ae, be = chunkto a ai, chunkto b bi
let sa, sb = a.Substring(ai, (ae-ai)), b.Substring(bi, (be-bi))
let cmp = if isnum a ai && isnum b bi then compare (int_of_string sa) (int_of_string sb) else compare sa sb
if cmp = 0 then chunkcmp a ae b be else cmp
in chunkcmp a 0 b 0
type AlphanumComparer() =
interface System.Collections.IComparer with
member this.Compare(x, y) =
alphanumcmp (x.ToString()) (y.ToString())
测试:
let names = [ "1000X Radonius Maximus"; "10X Radonius"; "200X Radonius"; "20X Radonius"; "20X Radonius Prime"; "30X Radonius"; "40X Radonius"; "Allegia 50 Clasteron"; "Allegia 500 Clasteron"; "Allegia 51 Clasteron"; "Allegia 51B Clasteron"; "Allegia 52 Clasteron"; "Allegia 60 Clasteron"; "Alpha 100"; "Alpha 2"; "Alpha 200"; "Alpha 2A"; "Alpha 2A-8000"; "Alpha 2A-900"; "Callisto Morphamax"; "Callisto Morphamax 500"; "Callisto Morphamax 5000"; "Callisto Morphamax 600"; "Callisto Morphamax 700"; "Callisto Morphamax 7000"; "Callisto Morphamax 7000 SE";"Callisto Morphamax 7000 SE2"; "QRS-60 Intrinsia Machine"; "QRS-60F Intrinsia Machine"; "QRS-62 Intrinsia Machine"; "QRS-62F Intrinsia Machine"; "Xiph Xlater 10000"; "Xiph Xlater 2000"; "Xiph Xlater 300"; "Xiph Xlater 40"; "Xiph Xlater 5"; "Xiph Xlater 50"; "Xiph Xlater 500"; "Xiph Xlater 5000"; "Xiph Xlater 58" ];;
names |> List.sortWith alphanumcmp |> printf "%A"
结果:
["10X Radonius"; "20X Radonius"; "20X Radonius Prime"; "30X Radonius";
"40X Radonius"; "200X Radonius"; "1000X Radonius Maximus";
"Allegia 50 Clasteron"; "Allegia 51 Clasteron"; "Allegia 51B Clasteron";
"Allegia 52 Clasteron"; "Allegia 60 Clasteron"; "Allegia 500 Clasteron";
"Alpha 2"; "Alpha 2A"; "Alpha 2A-900"; "Alpha 2A-8000"; "Alpha 100";
"Alpha 200"; "Callisto Morphamax"; "Callisto Morphamax 500";
"Callisto Morphamax 600"; "Callisto Morphamax 700"; "Callisto Morphamax 5000";
"Callisto Morphamax 7000"; "Callisto Morphamax 7000 SE";
"Callisto Morphamax 7000 SE2"; "QRS-60 Intrinsia Machine";
"QRS-60F Intrinsia Machine"; "QRS-62 Intrinsia Machine";
"QRS-62F Intrinsia Machine"; "Xiph Xlater 5"; "Xiph Xlater 40";
"Xiph Xlater 50"; "Xiph Xlater 58"; "Xiph Xlater 300"; "Xiph Xlater 500";
"Xiph Xlater 2000"; "Xiph Xlater 5000"; "Xiph Xlater 10000"]val it : unit = ()