过滤包含字符串模式的列表
Filtering lists containing string pattern
我需要过滤列表中包含字符串 Status=ACTIVE 的元素并将其保存为新列表。
稍后我还需要使用 Status=STOPPED 和 System=Windows.[=14= 过滤相同的列表]
列表:
Programs=
["Process=1,System=Linux,PID=240,Program=DRMX,Status=ACTIVE",
"Process=1,System=Linux,PID=240,Program=DRMX,Status=STOPPED",
"Process=1,System=Windows,PID=240,Program=DRMX,Status=ACTIVE",
"Process=1,System=Linux,PID=242,Program=DRMX,Status=ACTIVE",
"Process=1,System=Windows,PID=242,Program=DRMX,Status=STOPPED",
"Process=1,System=Windows,PID=242,Program=DRMX,Status=ACTIVE",
"Process=1,System=Linux,PID=246,Program=DRMX,Status=ACTIVE",
"Process=1,System=Linux,PID=246,Program=DRMX,Status=STOPPED",
"Process=1,System=Linux,PID=246,Program=DRMX,Status=ACTIVE"].
想要的结果:
["Process=1,System=Linux,PID=240,Program=DRMX,Status=ACTIVE",
"Process=1,System=Windows,PID=240,Program=DRMX,Status=ACTIVE",
"Process=1,System=Linux,PID=242,Program=DRMX,Status=ACTIVE",
"Process=1,System=Windows,PID=242,Program=DRMX,Status=ACTIVE",
"Process=1,System=Linux,PID=246,Program=DRMX,Status=ACTIVE",
"Process=1,System=Linux,PID=246,Program=DRMX,Status=ACTIVE"].
和:
["Process=1,System=Windows,PID=242,Program=DRMX,Status=STOPPED"].
我找到了使用 re:run/2 和 lists:filter/2 过滤它的可能解决方案。有没有更简单快捷的方法?
Filter=fun(Acc)->
nomatch =/= re:run(Acc,"Status=ACTIVE") end.
Result=lists:filter(Filter,Programs).
Br,
达米安
第一个可以通过
实现
lists:filter(
fun (A) ->
lists:member("Status=ACTIVE", string:tokens(A,","))
end, Programs).
而第二个比较复杂,因为它需要验证两个条件:
lists:filter(
fun(A) ->
Tokens = string:tokens(A, ","),
lists:member("Status=STOPPED", Tokens)
andalso lists:member("System=Windows", Tokens)
end, Programs).
我不确定它是否更快,但至少可读性更高。
我会强烈考虑将此类列表解析为记录列表以形式化数据结构。用记录过滤变得很优雅:
-record(program,{process,
system,
pid,
program,
status}).
% Parse list of strings to list of programs
ParsedPrograms=parseProgram(Programs),
[Valid || Valid = #program{status="STOPPED", system="Windows"} <- ParsedPrograms].
您还可以使用字符串:str/2 或字符串:find/2,因为 str 已过时。
ListA = [X || X <- Programs, string:str(X, "ACTIVE") > 0],
ListB = [X || X <- Programs, string:str(X, "STOPPED") > 0 and string:str(X, "Windows") > 0].
ListA = [X || X <- Programs, string:find(X, "ACTIVE") /= undefined],
ListB = [X || X <- Programs, string:find(X, "STOPPED") /= undefined and string:find(X, "Windows") /= undefined]
虽然 choroba's 是完全正确的,但我会写一些关于 Erlang 最佳实践的文章。一个非常好的做法是尽快将来自 Erlang 世界之外的数据转换为正确的 Erlang 结构。当一个项目变得越来越复杂,需要适当的操作、调试、故障排除等时,它会在长期的项目发展和维护中得到回报。所以你通常会尽可能多地将文本数据解析为记录、原子、整数,数字等。它允许两件事。首先,您尽快验证传入的数据,您将在边界上防止错误在系统内部传播。然后允许使用快速方法。其次,您可以编写许多辅助函数,使进一步的开发更加容易。像
-module(programs).
-record(program, {
process,
system,
pid,
program,
status
}).
%% API
-export([parse_programs/1, active/1, stopped/1, linux/1, windows/1]).
parse_programs(L) ->
[parse_program(X) || X <- L].
active(P) -> P#program.status =:= active.
stopped(P) -> P#program.status =:= stopped.
linux(P) -> P#program.system =:= 'Linux'.
windows(P) -> P#program.system =:= 'Windows'.
%% Internal functions
parse_program(Str) ->
parse_program(string:tokens(Str, ","), #program{}).
parse_program([], P) -> P;
parse_program(["Process=" ++ Str | T], P) ->
parse_program(T, P#program{process = list_to_integer(Str)});
parse_program(["System=" ++ Str | T], P) ->
parse_program(T, P#program{system = parse_system(Str)});
parse_program(["PID=" ++ Str | T], P) ->
parse_program(T, P#program{pid = list_to_integer(Str)});
parse_program(["Program=" ++ Str | T], P) ->
parse_program(T, P#program{program = Str});
parse_program(["Status=" ++ Str | T], P) ->
parse_program(T, P#program{status = parse_status(Str)});
parse_program([H | _], _) ->
error(badarg, [H]).
parse_system("Linux") -> 'Linux';
parse_system("Windows") -> 'Windows';
parse_system(Str) -> error(badarg, [Str]).
parse_status("ACTIVE") -> active;
parse_status("STOPPED") -> stopped;
parse_status(Str) -> error(badarg, [Str]).
然后进一步的任务变得容易
1> c(programs).
{ok,programs}
2> rr("programs.erl").
[program]
3> Programs=
3> ["Process=1,System=Linux,PID=240,Program=DRMX,Status=ACTIVE",
3> "Process=1,System=Linux,PID=240,Program=DRMX,Status=STOPPED",
3> "Process=1,System=Windows,PID=240,Program=DRMX,Status=ACTIVE",
3> "Process=1,System=Linux,PID=242,Program=DRMX,Status=ACTIVE",
3> "Process=1,System=Windows,PID=242,Program=DRMX,Status=STOPPED",
3> "Process=1,System=Windows,PID=242,Program=DRMX,Status=ACTIVE",
3> "Process=1,System=Linux,PID=246,Program=DRMX,Status=ACTIVE",
3> "Process=1,System=Linux,PID=246,Program=DRMX,Status=STOPPED",
3> "Process=1,System=Linux,PID=246,Program=DRMX,Status=ACTIVE"].
["Process=1,System=Linux,PID=240,Program=DRMX,Status=ACTIVE",
"Process=1,System=Linux,PID=240,Program=DRMX,Status=STOPPED",
"Process=1,System=Windows,PID=240,Program=DRMX,Status=ACTIVE",
"Process=1,System=Linux,PID=242,Program=DRMX,Status=ACTIVE",
"Process=1,System=Windows,PID=242,Program=DRMX,Status=STOPPED",
"Process=1,System=Windows,PID=242,Program=DRMX,Status=ACTIVE",
"Process=1,System=Linux,PID=246,Program=DRMX,Status=ACTIVE",
"Process=1,System=Linux,PID=246,Program=DRMX,Status=STOPPED",
"Process=1,System=Linux,PID=246,Program=DRMX,Status=ACTIVE"]
4> Ps = programs:parse_programs(Programs).
[#program{process = 1,system = 'Linux',pid = 240,
program = "DRMX",status = active},
#program{process = 1,system = 'Linux',pid = 240,
program = "DRMX",status = stopped},
#program{process = 1,system = 'Windows',pid = 240,
program = "DRMX",status = active},
#program{process = 1,system = 'Linux',pid = 242,
program = "DRMX",status = active},
#program{process = 1,system = 'Windows',pid = 242,
program = "DRMX",status = stopped},
#program{process = 1,system = 'Windows',pid = 242,
program = "DRMX",status = active},
#program{process = 1,system = 'Linux',pid = 246,
program = "DRMX",status = active},
#program{process = 1,system = 'Linux',pid = 246,
program = "DRMX",status = stopped},
#program{process = 1,system = 'Linux',pid = 246,
program = "DRMX",status = active}]
5> lists:filter(fun programs:active/1, Ps).
[#program{process = 1,system = 'Linux',pid = 240,
program = "DRMX",status = active},
#program{process = 1,system = 'Windows',pid = 240,
program = "DRMX",status = active},
#program{process = 1,system = 'Linux',pid = 242,
program = "DRMX",status = active},
#program{process = 1,system = 'Windows',pid = 242,
program = "DRMX",status = active},
#program{process = 1,system = 'Linux',pid = 246,
program = "DRMX",status = active},
#program{process = 1,system = 'Linux',pid = 246,
program = "DRMX",status = active}]
6> lists:filter(fun(P) -> programs:stopped(P) andalso programs:windows(P) end, Ps).
[#program{process = 1,system = 'Windows',pid = 242,
program = "DRMX",status = stopped}]
作为副作用,您的程序将消耗更少的内存,因为数字和原子消耗的内存都比字符串少得多。任何进一步的处理都会快得多,因为所有原子比较就像数字比较一样,更少的内存意味着 CPU 缓存中有更多数据,而 CPU 缓存命中比主内存访问快两个数量级。
在这种情况下
7> erts_debug:size(Programs).
1062
8> erts_debug:size(Ps).
153
这意味着您可以在 CPU 缓存中存储近七倍的数据。当你在 Erlang 分布中的进程或节点之间发送消息时,它会快七倍,...(如果你使用 list_to_binary/1 作为程序名称,它甚至会相差十倍。)
parse_program(["Program=" ++ Str | T], P) ->
parse_program(T, P#program{program = list_to_binary(Str)});
然后
9> c(programs).
{ok,programs}
10> f(Ps).
ok
11> Ps = programs:parse_programs(Programs).
[{program,1,'Linux',240,<<"DRMX">>,active},
{program,1,'Linux',240,<<"DRMX">>,stopped},
{program,1,'Windows',240,<<"DRMX">>,active},
{program,1,'Linux',242,<<"DRMX">>,active},
{program,1,'Windows',242,<<"DRMX">>,stopped},
{program,1,'Windows',242,<<"DRMX">>,active},
{program,1,'Linux',246,<<"DRMX">>,active},
{program,1,'Linux',246,<<"DRMX">>,stopped},
{program,1,'Linux',246,<<"DRMX">>,active}]
12> erts_debug:size(Ps).
108
所以我们从 8.3KiB 降到了 864B。
我需要过滤列表中包含字符串 Status=ACTIVE 的元素并将其保存为新列表。
稍后我还需要使用 Status=STOPPED 和 System=Windows.[=14= 过滤相同的列表]
列表:
Programs=
["Process=1,System=Linux,PID=240,Program=DRMX,Status=ACTIVE",
"Process=1,System=Linux,PID=240,Program=DRMX,Status=STOPPED",
"Process=1,System=Windows,PID=240,Program=DRMX,Status=ACTIVE",
"Process=1,System=Linux,PID=242,Program=DRMX,Status=ACTIVE",
"Process=1,System=Windows,PID=242,Program=DRMX,Status=STOPPED",
"Process=1,System=Windows,PID=242,Program=DRMX,Status=ACTIVE",
"Process=1,System=Linux,PID=246,Program=DRMX,Status=ACTIVE",
"Process=1,System=Linux,PID=246,Program=DRMX,Status=STOPPED",
"Process=1,System=Linux,PID=246,Program=DRMX,Status=ACTIVE"].
想要的结果:
["Process=1,System=Linux,PID=240,Program=DRMX,Status=ACTIVE",
"Process=1,System=Windows,PID=240,Program=DRMX,Status=ACTIVE",
"Process=1,System=Linux,PID=242,Program=DRMX,Status=ACTIVE",
"Process=1,System=Windows,PID=242,Program=DRMX,Status=ACTIVE",
"Process=1,System=Linux,PID=246,Program=DRMX,Status=ACTIVE",
"Process=1,System=Linux,PID=246,Program=DRMX,Status=ACTIVE"].
和:
["Process=1,System=Windows,PID=242,Program=DRMX,Status=STOPPED"].
我找到了使用 re:run/2 和 lists:filter/2 过滤它的可能解决方案。有没有更简单快捷的方法?
Filter=fun(Acc)->
nomatch =/= re:run(Acc,"Status=ACTIVE") end.
Result=lists:filter(Filter,Programs).
Br,
达米安
第一个可以通过
实现lists:filter(
fun (A) ->
lists:member("Status=ACTIVE", string:tokens(A,","))
end, Programs).
而第二个比较复杂,因为它需要验证两个条件:
lists:filter(
fun(A) ->
Tokens = string:tokens(A, ","),
lists:member("Status=STOPPED", Tokens)
andalso lists:member("System=Windows", Tokens)
end, Programs).
我不确定它是否更快,但至少可读性更高。
我会强烈考虑将此类列表解析为记录列表以形式化数据结构。用记录过滤变得很优雅:
-record(program,{process,
system,
pid,
program,
status}).
% Parse list of strings to list of programs
ParsedPrograms=parseProgram(Programs),
[Valid || Valid = #program{status="STOPPED", system="Windows"} <- ParsedPrograms].
您还可以使用字符串:str/2 或字符串:find/2,因为 str 已过时。
ListA = [X || X <- Programs, string:str(X, "ACTIVE") > 0],
ListB = [X || X <- Programs, string:str(X, "STOPPED") > 0 and string:str(X, "Windows") > 0].
ListA = [X || X <- Programs, string:find(X, "ACTIVE") /= undefined],
ListB = [X || X <- Programs, string:find(X, "STOPPED") /= undefined and string:find(X, "Windows") /= undefined]
虽然 choroba's
-module(programs).
-record(program, {
process,
system,
pid,
program,
status
}).
%% API
-export([parse_programs/1, active/1, stopped/1, linux/1, windows/1]).
parse_programs(L) ->
[parse_program(X) || X <- L].
active(P) -> P#program.status =:= active.
stopped(P) -> P#program.status =:= stopped.
linux(P) -> P#program.system =:= 'Linux'.
windows(P) -> P#program.system =:= 'Windows'.
%% Internal functions
parse_program(Str) ->
parse_program(string:tokens(Str, ","), #program{}).
parse_program([], P) -> P;
parse_program(["Process=" ++ Str | T], P) ->
parse_program(T, P#program{process = list_to_integer(Str)});
parse_program(["System=" ++ Str | T], P) ->
parse_program(T, P#program{system = parse_system(Str)});
parse_program(["PID=" ++ Str | T], P) ->
parse_program(T, P#program{pid = list_to_integer(Str)});
parse_program(["Program=" ++ Str | T], P) ->
parse_program(T, P#program{program = Str});
parse_program(["Status=" ++ Str | T], P) ->
parse_program(T, P#program{status = parse_status(Str)});
parse_program([H | _], _) ->
error(badarg, [H]).
parse_system("Linux") -> 'Linux';
parse_system("Windows") -> 'Windows';
parse_system(Str) -> error(badarg, [Str]).
parse_status("ACTIVE") -> active;
parse_status("STOPPED") -> stopped;
parse_status(Str) -> error(badarg, [Str]).
然后进一步的任务变得容易
1> c(programs).
{ok,programs}
2> rr("programs.erl").
[program]
3> Programs=
3> ["Process=1,System=Linux,PID=240,Program=DRMX,Status=ACTIVE",
3> "Process=1,System=Linux,PID=240,Program=DRMX,Status=STOPPED",
3> "Process=1,System=Windows,PID=240,Program=DRMX,Status=ACTIVE",
3> "Process=1,System=Linux,PID=242,Program=DRMX,Status=ACTIVE",
3> "Process=1,System=Windows,PID=242,Program=DRMX,Status=STOPPED",
3> "Process=1,System=Windows,PID=242,Program=DRMX,Status=ACTIVE",
3> "Process=1,System=Linux,PID=246,Program=DRMX,Status=ACTIVE",
3> "Process=1,System=Linux,PID=246,Program=DRMX,Status=STOPPED",
3> "Process=1,System=Linux,PID=246,Program=DRMX,Status=ACTIVE"].
["Process=1,System=Linux,PID=240,Program=DRMX,Status=ACTIVE",
"Process=1,System=Linux,PID=240,Program=DRMX,Status=STOPPED",
"Process=1,System=Windows,PID=240,Program=DRMX,Status=ACTIVE",
"Process=1,System=Linux,PID=242,Program=DRMX,Status=ACTIVE",
"Process=1,System=Windows,PID=242,Program=DRMX,Status=STOPPED",
"Process=1,System=Windows,PID=242,Program=DRMX,Status=ACTIVE",
"Process=1,System=Linux,PID=246,Program=DRMX,Status=ACTIVE",
"Process=1,System=Linux,PID=246,Program=DRMX,Status=STOPPED",
"Process=1,System=Linux,PID=246,Program=DRMX,Status=ACTIVE"]
4> Ps = programs:parse_programs(Programs).
[#program{process = 1,system = 'Linux',pid = 240,
program = "DRMX",status = active},
#program{process = 1,system = 'Linux',pid = 240,
program = "DRMX",status = stopped},
#program{process = 1,system = 'Windows',pid = 240,
program = "DRMX",status = active},
#program{process = 1,system = 'Linux',pid = 242,
program = "DRMX",status = active},
#program{process = 1,system = 'Windows',pid = 242,
program = "DRMX",status = stopped},
#program{process = 1,system = 'Windows',pid = 242,
program = "DRMX",status = active},
#program{process = 1,system = 'Linux',pid = 246,
program = "DRMX",status = active},
#program{process = 1,system = 'Linux',pid = 246,
program = "DRMX",status = stopped},
#program{process = 1,system = 'Linux',pid = 246,
program = "DRMX",status = active}]
5> lists:filter(fun programs:active/1, Ps).
[#program{process = 1,system = 'Linux',pid = 240,
program = "DRMX",status = active},
#program{process = 1,system = 'Windows',pid = 240,
program = "DRMX",status = active},
#program{process = 1,system = 'Linux',pid = 242,
program = "DRMX",status = active},
#program{process = 1,system = 'Windows',pid = 242,
program = "DRMX",status = active},
#program{process = 1,system = 'Linux',pid = 246,
program = "DRMX",status = active},
#program{process = 1,system = 'Linux',pid = 246,
program = "DRMX",status = active}]
6> lists:filter(fun(P) -> programs:stopped(P) andalso programs:windows(P) end, Ps).
[#program{process = 1,system = 'Windows',pid = 242,
program = "DRMX",status = stopped}]
作为副作用,您的程序将消耗更少的内存,因为数字和原子消耗的内存都比字符串少得多。任何进一步的处理都会快得多,因为所有原子比较就像数字比较一样,更少的内存意味着 CPU 缓存中有更多数据,而 CPU 缓存命中比主内存访问快两个数量级。
在这种情况下
7> erts_debug:size(Programs).
1062
8> erts_debug:size(Ps).
153
这意味着您可以在 CPU 缓存中存储近七倍的数据。当你在 Erlang 分布中的进程或节点之间发送消息时,它会快七倍,...(如果你使用 list_to_binary/1 作为程序名称,它甚至会相差十倍。)
parse_program(["Program=" ++ Str | T], P) ->
parse_program(T, P#program{program = list_to_binary(Str)});
然后
9> c(programs).
{ok,programs}
10> f(Ps).
ok
11> Ps = programs:parse_programs(Programs).
[{program,1,'Linux',240,<<"DRMX">>,active},
{program,1,'Linux',240,<<"DRMX">>,stopped},
{program,1,'Windows',240,<<"DRMX">>,active},
{program,1,'Linux',242,<<"DRMX">>,active},
{program,1,'Windows',242,<<"DRMX">>,stopped},
{program,1,'Windows',242,<<"DRMX">>,active},
{program,1,'Linux',246,<<"DRMX">>,active},
{program,1,'Linux',246,<<"DRMX">>,stopped},
{program,1,'Linux',246,<<"DRMX">>,active}]
12> erts_debug:size(Ps).
108
所以我们从 8.3KiB 降到了 864B。