TXR lisp:处理TXR收集的数据
TXR lisp: processing TXR collected data
我有以下 lisp 数据,我想为其实现特定的输出。我使用来自我的 TXR 解析器的 @(do (prinl order) (prinl location) ...)
得到了这个输出。
(defvar order '(0 1 2 3 4 5))
(defvar location
'("shape" "shape/rectangle" "shape/square" "shape/rectangle" "shape/rectangle" ""))
(defvar headings
'(("geometer") ("id" "width: cm" "height: cm")
("id" "length: m") ("id" "width: cm" "height: cm")
("angle: °") ("year" "month" "day")))
(defvar values
'(("Alice")
(("1" "13" "15") ("2" "12" "14"))
(("1" "10") ("2" "5") ("3..5" "7")
("6;8" "15;12") ("7" "20") ("9..10" "25;30"))
(("3" "5" "12.2")) ("90") ("2017" "03" "01")))
(defvar type '("meta" "data" "data" "data" "meta" "meta"))
一天结束时,我想要的输出是 CSV 表格
[shape/rectangle]
year,month,day,geometer,angle: °,id,width: cm,height: cm
2017,03,01,90,Alice,1,13,15
2017,03,01,90,Alice,2,12,14
2017,03,01,90,Alice,3,5,12.2
[shape/square]
year,month,day,geometer,id,length: m
2017,03,01,Alice,1,10
2017,03,01,Alice,2,5
2017,03,01,Alice,3,7
2017,03,01,Alice,4,7
2017,03,01,Alice,5,7
2017,03,01,Alice,6,15
2017,03,01,Alice,8,12
2017,03,01,Alice,7,20
2017,03,01,Alice,9,25
2017,03,01,Alice,10,30
我写了一些用于解压缩值的 TXR lisp 代码:
(defun str-range-p (x)
(m^$ #/\d+\.\.\d+/ x))
(defun str-range-expand (x)
[apply range [mapcar int-str (split-str x "..")]])
(defun str-int-list-p (s)
(and (str-list-p s)
(all (str-list-expand s)
(lambda (x)
(or (int-str x)
(str-range-p x))))))
(defun str-list-p (x)
(search-str x ";"))
(defun str-list-expand (x)
(split-str x ";"))
(defun expand (s)
(cond ((str-int-list-p s)
(flatten [mapcar (lambda (x)
(if (str-range-p x)
(str-range-expand x)
(int-str x)))
(str-list-expand s)]))
((str-list-p s) (str-list-expand s))
((str-range-p s) (str-range-expand s))
((int-str s) (int-str s))
(t s)))
并检查位置字符串是否是另一个位置字符串的父级:
(defun level-up (x)
(cond ((equal x "") nil)
((search-str x "/")
(sub-str x 0 (search-str x "/" 0 t)))
(t "")))
(defun parent-location-p (x y)
(or (equal x y)
(equal x "")
(and (not (equal y ""))
(match-str (level-up y) x))))
我主要对您认为哪些 TXR lisp 内置函数可能有助于解决此任务的剩余部分以实现所需输出感兴趣。而且,您将如何以不同的方式处理现有代码以利用现有的 TXR lisp 功能?
此解决方案适用于问题的早期编辑中给出的示例数据。它不会将数据保存在不同的 .csv
文件中,但它的输出会指示数据的去向。
几个objects用来组织逻辑。位置由 locations
结构表示,该结构会自动将路径名称分解为组件以便于分析。标题表示为 heading
objects,它在某种程度上处理了类型符号;目前它仅用于将代表年、日和月的整数重新格式化为带前导零的正确符号。表表示为 table
objects,其中包含各种属性。但是,值只是列表。 table 包含行列表,而行只是值列表。值通常是标量。如果一行中的一个或多个值是值列表,则意味着该行是多行的压缩(作为 ..
和 ;
表示法的结果)。使用直接来自 Rosetta 代码范围扩展的代码扩展范围,适应此处使用的分隔符。
解析器只做了很小的修改。 :counter
不见了,主要的 collect
取而代之的是一个 :vars (tables)
:只出现了一个 table 的列表,这些是 objects 使用 new
宏。此外,还有一个新的 @(rebind values (values))
,因此 meta
table 以相同的表示形式出现:虽然它们只有一行,但我们希望它们的 rows
属性 来保存行列表,就像 data
tables.
@(do
(defstruct (location str) nil
str path
(:method parse (me)
(set me.path (tok-str me.str #/[^\/]+/)))
(:method format (me)
(set me.str `@{me.path "/"}`))
(:method level-up (me)
(new location path (butlast me.path)))
(:method is-prefix-of (me maybe-suffix)
(let ((mm (mismatch me.path maybe-suffix.path)))
(or (not mm) (eql mm (length me.path)))))
(:method print (me stream pretty-p)
(put-string `@{me.path "/"}` stream))
(:method equal (me) me.path)
(:postinit (me)
(if me.str
me.(parse)
me.(format))))
(defstruct (heading str) nil
str name type
(:method parse (me)
(tree-case (split-str me.str #/: */)
((nm ty) (set me.name nm me.type ty))
((nm) (set me.name nm me.type nm))))
(:method format-value (me arg)
(casequal me.type
("year" (fmt "~,04d" arg))
(("month" "day") (fmt "~,02d" arg))
(t (if (stringp arg)
arg
(tostringp arg)))))
(:method print (me stream pretty-p)
(put-string (or me.str
(if (equal me.name me.type)
`@{me.name}`
`@{me.name}: @{me.type}`))
stream))
(:postinit (me)
(when me.str
me.(parse))))
(defun expand-helper (list)
(cond
((null list) nil)
((consp (first list))
(append (range (first (first list))
(second (first list)))
(rangeexpand (rest list))))
(t (cons (first list) (rangeexpand (rest list))))))
(defun rangeexpand (list)
(uniq (expand-helper list)))
(defun make-values (string)
(if [#/\.\.|;/ string]
(let ((syntax (collect-each ((p (split-str string ";")))
(tree-case (split-str p "..")
((from to . junk)
;; if junk isn't nil, error!
(list (num-str from) (num-str to)))
((single . junk)
(num-str single))))))
(rangeexpand syntax))
(or (num-str string) string)))
(defstruct table nil
location headings rows type order
(:static order-cnt 0)
(:method merge (me other)
(new table
location other.location
headings (append me.headings other.headings)
type other.type
rows (append-each ((mr me.rows))
(collect-each ((or other.rows))
(append mr or)))
order other.order))
(:method cat (me other)
(let ((me-copy (copy-struct me)))
(set me-copy.rows (append me.rows other.rows))
me-copy))
(:method expand-rows (me)
(labels ((expand-row (row)
(build
(if [find-if consp row]
(while* [find-if consp row]
(let ((this (mapcar [iffi consp car] row))
(next (mapcar [iffi consp cdr] row)))
(add this)
(set row next)))
(add row)))))
[mappend expand-row me.rows]))
(:postinit (me)
(unless me.order
(set me.order (inc me.order-cnt))))))
@(define os)@/[ ]*/@(end)
@(define location)@\
@ (cases)@\
@/[a-z]+/@(eol)@\
@ (or)@\
@/[a-z]+//@(location)@\
@ (end)@\
@(end)
@(define heading)@/[a-z]+(:[^,]*)?/@(end)
@(define value)@/[^,]+/@(end)
@(define table (location headings values type))
@ (cases)
@ (cases)@\
[[@location]]@(or)[[]]@(bind location "")@\
@ (end)
@ (coll)@(os)@{headings (heading)}@(os)@(end)
@ (coll)@(os)@{values (value)}@(os)@(end)
@ (rebind values (values))
@ (bind type "meta")
@(os)
@ (or)
[@location]
@ (coll)@(os)@{headings (heading)}@(os)@(end)
@ (collect :gap 0)
@ (coll)@(os)@{values (value)}@(os)@(end)
@ (until)
@ (os)
@ (end)
@ (bind type "data")
@ (end)
@(end)
@(collect :vars (tables))
@ (table location headings values type)
@ (bind tables @(new table
location (new (location location))
headings (mapcar (do new (heading @1)) headings)
rows (mapcar (op mapcar make-values) values)
type type))
@(until)
@ (eof)
@(end)
@(do
(let* ((metas (keepqual "meta" tables (usl type)))
(datas (remqual "meta" tables (usl type)))
(sorted-metas [sort (copy metas) > (op length @1.location.path)])
(combined-datas (hash-values (group-reduce (hash :equal-based)
(usl location)
(do if @1 @1.(cat @2) @2)
datas)))
(augmented-datas (collect-each ((d combined-datas))
(each ((m sorted-metas))
(when m.location.(is-prefix-of d.location)
(set d m.(merge d))))
d)))
(each ((a augmented-datas))
(put-line `@{a.location}.csv:`)
(put-line `@{a.headings ","}`)
(each ((r a.(expand-rows)))
(put-line `@{(mapcar (ret @1.(format-value @2))
a.headings r) ","}`))
(put-line))))
使用 group-reduce
表达式处理具有相同位置的串联 table 的要求,该表达式依赖于散列 table 来识别相似的项目并使用 table
结构的 cat
方法。 table 通过生成自身的副本与另一个 table 相连,并用 rows
替换为将其原始 rows
附加到另一个
的副本。
合并元 tables 中的附加属性是通过遍历所有数据 tables 并应用匹配属性来执行的。对于每个数据 table,我们按照路径长度递减的顺序(从最特殊到最少)迭代所有元 table。从每个元 table 的位置是数据 table 位置的前缀,我们使用 table
merge
方法合并属性。 (这在功能上也有效,比如 cat
:它 returns 一个新的合并的 table)。合并意味着我们坚持元 table 中的所有标题,并对行执行 cross-producting 操作:左侧的每个新元行都与 table 的每一行配对向右延伸。
扩展包含多个值的行由 table
expand-rows
完成。这只是简单地复制了每一行,每个列表都由它的第一项(Lisp car
)代替。然后它遍历 cdr
:计算一个新行,其中列表被它们的 cdr
替换。如此重复,直到列表用完。例如 (1 (a b) 3 (x y))
将产生 (1 a 3 x)
,"remainder" 为 (1 (b) 3 (y))
。这个余数产生 (1 b 3 y)
和 (1 nil 3 nil)
的新余数。这不再包含 consp
值(全部为 atom
),因此迭代终止。
我有以下 lisp 数据,我想为其实现特定的输出。我使用来自我的 TXR 解析器的 @(do (prinl order) (prinl location) ...)
得到了这个输出。
(defvar order '(0 1 2 3 4 5))
(defvar location
'("shape" "shape/rectangle" "shape/square" "shape/rectangle" "shape/rectangle" ""))
(defvar headings
'(("geometer") ("id" "width: cm" "height: cm")
("id" "length: m") ("id" "width: cm" "height: cm")
("angle: °") ("year" "month" "day")))
(defvar values
'(("Alice")
(("1" "13" "15") ("2" "12" "14"))
(("1" "10") ("2" "5") ("3..5" "7")
("6;8" "15;12") ("7" "20") ("9..10" "25;30"))
(("3" "5" "12.2")) ("90") ("2017" "03" "01")))
(defvar type '("meta" "data" "data" "data" "meta" "meta"))
一天结束时,我想要的输出是 CSV 表格
[shape/rectangle]
year,month,day,geometer,angle: °,id,width: cm,height: cm
2017,03,01,90,Alice,1,13,15
2017,03,01,90,Alice,2,12,14
2017,03,01,90,Alice,3,5,12.2
[shape/square]
year,month,day,geometer,id,length: m
2017,03,01,Alice,1,10
2017,03,01,Alice,2,5
2017,03,01,Alice,3,7
2017,03,01,Alice,4,7
2017,03,01,Alice,5,7
2017,03,01,Alice,6,15
2017,03,01,Alice,8,12
2017,03,01,Alice,7,20
2017,03,01,Alice,9,25
2017,03,01,Alice,10,30
我写了一些用于解压缩值的 TXR lisp 代码:
(defun str-range-p (x)
(m^$ #/\d+\.\.\d+/ x))
(defun str-range-expand (x)
[apply range [mapcar int-str (split-str x "..")]])
(defun str-int-list-p (s)
(and (str-list-p s)
(all (str-list-expand s)
(lambda (x)
(or (int-str x)
(str-range-p x))))))
(defun str-list-p (x)
(search-str x ";"))
(defun str-list-expand (x)
(split-str x ";"))
(defun expand (s)
(cond ((str-int-list-p s)
(flatten [mapcar (lambda (x)
(if (str-range-p x)
(str-range-expand x)
(int-str x)))
(str-list-expand s)]))
((str-list-p s) (str-list-expand s))
((str-range-p s) (str-range-expand s))
((int-str s) (int-str s))
(t s)))
并检查位置字符串是否是另一个位置字符串的父级:
(defun level-up (x)
(cond ((equal x "") nil)
((search-str x "/")
(sub-str x 0 (search-str x "/" 0 t)))
(t "")))
(defun parent-location-p (x y)
(or (equal x y)
(equal x "")
(and (not (equal y ""))
(match-str (level-up y) x))))
我主要对您认为哪些 TXR lisp 内置函数可能有助于解决此任务的剩余部分以实现所需输出感兴趣。而且,您将如何以不同的方式处理现有代码以利用现有的 TXR lisp 功能?
此解决方案适用于问题的早期编辑中给出的示例数据。它不会将数据保存在不同的 .csv
文件中,但它的输出会指示数据的去向。
几个objects用来组织逻辑。位置由 locations
结构表示,该结构会自动将路径名称分解为组件以便于分析。标题表示为 heading
objects,它在某种程度上处理了类型符号;目前它仅用于将代表年、日和月的整数重新格式化为带前导零的正确符号。表表示为 table
objects,其中包含各种属性。但是,值只是列表。 table 包含行列表,而行只是值列表。值通常是标量。如果一行中的一个或多个值是值列表,则意味着该行是多行的压缩(作为 ..
和 ;
表示法的结果)。使用直接来自 Rosetta 代码范围扩展的代码扩展范围,适应此处使用的分隔符。
解析器只做了很小的修改。 :counter
不见了,主要的 collect
取而代之的是一个 :vars (tables)
:只出现了一个 table 的列表,这些是 objects 使用 new
宏。此外,还有一个新的 @(rebind values (values))
,因此 meta
table 以相同的表示形式出现:虽然它们只有一行,但我们希望它们的 rows
属性 来保存行列表,就像 data
tables.
@(do
(defstruct (location str) nil
str path
(:method parse (me)
(set me.path (tok-str me.str #/[^\/]+/)))
(:method format (me)
(set me.str `@{me.path "/"}`))
(:method level-up (me)
(new location path (butlast me.path)))
(:method is-prefix-of (me maybe-suffix)
(let ((mm (mismatch me.path maybe-suffix.path)))
(or (not mm) (eql mm (length me.path)))))
(:method print (me stream pretty-p)
(put-string `@{me.path "/"}` stream))
(:method equal (me) me.path)
(:postinit (me)
(if me.str
me.(parse)
me.(format))))
(defstruct (heading str) nil
str name type
(:method parse (me)
(tree-case (split-str me.str #/: */)
((nm ty) (set me.name nm me.type ty))
((nm) (set me.name nm me.type nm))))
(:method format-value (me arg)
(casequal me.type
("year" (fmt "~,04d" arg))
(("month" "day") (fmt "~,02d" arg))
(t (if (stringp arg)
arg
(tostringp arg)))))
(:method print (me stream pretty-p)
(put-string (or me.str
(if (equal me.name me.type)
`@{me.name}`
`@{me.name}: @{me.type}`))
stream))
(:postinit (me)
(when me.str
me.(parse))))
(defun expand-helper (list)
(cond
((null list) nil)
((consp (first list))
(append (range (first (first list))
(second (first list)))
(rangeexpand (rest list))))
(t (cons (first list) (rangeexpand (rest list))))))
(defun rangeexpand (list)
(uniq (expand-helper list)))
(defun make-values (string)
(if [#/\.\.|;/ string]
(let ((syntax (collect-each ((p (split-str string ";")))
(tree-case (split-str p "..")
((from to . junk)
;; if junk isn't nil, error!
(list (num-str from) (num-str to)))
((single . junk)
(num-str single))))))
(rangeexpand syntax))
(or (num-str string) string)))
(defstruct table nil
location headings rows type order
(:static order-cnt 0)
(:method merge (me other)
(new table
location other.location
headings (append me.headings other.headings)
type other.type
rows (append-each ((mr me.rows))
(collect-each ((or other.rows))
(append mr or)))
order other.order))
(:method cat (me other)
(let ((me-copy (copy-struct me)))
(set me-copy.rows (append me.rows other.rows))
me-copy))
(:method expand-rows (me)
(labels ((expand-row (row)
(build
(if [find-if consp row]
(while* [find-if consp row]
(let ((this (mapcar [iffi consp car] row))
(next (mapcar [iffi consp cdr] row)))
(add this)
(set row next)))
(add row)))))
[mappend expand-row me.rows]))
(:postinit (me)
(unless me.order
(set me.order (inc me.order-cnt))))))
@(define os)@/[ ]*/@(end)
@(define location)@\
@ (cases)@\
@/[a-z]+/@(eol)@\
@ (or)@\
@/[a-z]+//@(location)@\
@ (end)@\
@(end)
@(define heading)@/[a-z]+(:[^,]*)?/@(end)
@(define value)@/[^,]+/@(end)
@(define table (location headings values type))
@ (cases)
@ (cases)@\
[[@location]]@(or)[[]]@(bind location "")@\
@ (end)
@ (coll)@(os)@{headings (heading)}@(os)@(end)
@ (coll)@(os)@{values (value)}@(os)@(end)
@ (rebind values (values))
@ (bind type "meta")
@(os)
@ (or)
[@location]
@ (coll)@(os)@{headings (heading)}@(os)@(end)
@ (collect :gap 0)
@ (coll)@(os)@{values (value)}@(os)@(end)
@ (until)
@ (os)
@ (end)
@ (bind type "data")
@ (end)
@(end)
@(collect :vars (tables))
@ (table location headings values type)
@ (bind tables @(new table
location (new (location location))
headings (mapcar (do new (heading @1)) headings)
rows (mapcar (op mapcar make-values) values)
type type))
@(until)
@ (eof)
@(end)
@(do
(let* ((metas (keepqual "meta" tables (usl type)))
(datas (remqual "meta" tables (usl type)))
(sorted-metas [sort (copy metas) > (op length @1.location.path)])
(combined-datas (hash-values (group-reduce (hash :equal-based)
(usl location)
(do if @1 @1.(cat @2) @2)
datas)))
(augmented-datas (collect-each ((d combined-datas))
(each ((m sorted-metas))
(when m.location.(is-prefix-of d.location)
(set d m.(merge d))))
d)))
(each ((a augmented-datas))
(put-line `@{a.location}.csv:`)
(put-line `@{a.headings ","}`)
(each ((r a.(expand-rows)))
(put-line `@{(mapcar (ret @1.(format-value @2))
a.headings r) ","}`))
(put-line))))
使用 group-reduce
表达式处理具有相同位置的串联 table 的要求,该表达式依赖于散列 table 来识别相似的项目并使用 table
结构的 cat
方法。 table 通过生成自身的副本与另一个 table 相连,并用 rows
替换为将其原始 rows
附加到另一个
合并元 tables 中的附加属性是通过遍历所有数据 tables 并应用匹配属性来执行的。对于每个数据 table,我们按照路径长度递减的顺序(从最特殊到最少)迭代所有元 table。从每个元 table 的位置是数据 table 位置的前缀,我们使用 table
merge
方法合并属性。 (这在功能上也有效,比如 cat
:它 returns 一个新的合并的 table)。合并意味着我们坚持元 table 中的所有标题,并对行执行 cross-producting 操作:左侧的每个新元行都与 table 的每一行配对向右延伸。
扩展包含多个值的行由 table
expand-rows
完成。这只是简单地复制了每一行,每个列表都由它的第一项(Lisp car
)代替。然后它遍历 cdr
:计算一个新行,其中列表被它们的 cdr
替换。如此重复,直到列表用完。例如 (1 (a b) 3 (x y))
将产生 (1 a 3 x)
,"remainder" 为 (1 (b) 3 (y))
。这个余数产生 (1 b 3 y)
和 (1 nil 3 nil)
的新余数。这不再包含 consp
值(全部为 atom
),因此迭代终止。