如何将 Parslet 与字符串而不是 Parslet 切片一起使用

How do I use Parslet with strings not Parslet Slices

我已经开始使用 Parslet 来解析一些自定义数据。在示例中,生成的解析数据类似于:

{ :custom_string => "data"@6 }

我创建了类似

的转换
rule(:custom_string => simple(:x)) { x.to_s }

但它不匹配,大概是因为我传递的是 "data"@6 而不是 "data" 这不仅仅是一个简单的字符串。转换的所有 examples 都有带字符串的散列,而不是 Parslet::Slices ,这是解析器输出的。也许我错过了一步,但我在文档中看不到任何内容。

编辑:更多示例代码(简化版本但仍应具有解释性)

original_text = 'MSGSTART/DATA1/DATA2/0503/MAR'

require "parslet"
include Parslet

module ParseExample
  class Parser < Parslet::Parser
    rule(:fs)         { str("/") }
    rule(:newline)    { str("\n") | str("\r\n") }

    rule(:msgstart) { str("MSGSTART") }
    rule(:data1) { match("\w").repeat(1).as(:data1) }
    rule(:data2) { match("\w").repeat(1).as(:data2) }
    rule(:serial_number) { match("\w").repeat(1).as(:serial_number) }
    rule(:month) { match("\w").repeat(1).as(:month) }

    rule(:first_line) { msgstart >> fs >> data1 >> fs >> data2 >> fs >> serial_number >> fs >> month >> newline }

    rule(:document) { first_line >> newline.maybe }

    root(:document)
  end
end

module ParseExample
  class Transformer < Parslet::Transform

    rule(:data1 => simple(:x)) { x.to_s }
    rule(:data2 => simple(:x)) { x.to_s }
    rule(:serial_number => simple(:x)) { x.to_s }
    rule(:month => simple(:x)) { x.to_s }
  end
end

# Run by calling...
p = ParseExample::Parser.new
parse_result = p.parse(original_text)

# => {:data1=>"data1"@6, :data2=>"data2"@12, :serial_number=>"0503"@18, :month=>"MAR"@23}

t = ParseExample::Transformer.new
transformed = t.apply(parser_result)

# Actual result => {:data1=>"data1"@6, :data2=>"data2"@12, :serial_number=>"0503"@18, :month=>"MAR"@23}

# Expected result => {:data1=>"data1", :data2=>"data2", :serial_number=>"0503", :month=>"MAR"}

您不能替换单个 key/value 对。您必须立即替换整个哈希。

我也是第一次写变形金刚就被这个坑了。关键是转换规则匹配整个节点并替换它……在它的整体中。一旦一个节点被匹配,它就不会再被访问。

如果您确实使用了一个散列并且只匹配了一个 key/value 对,将其替换为一个值...您只是在同一散列中丢失了所有其他 key/value 对。

不过……有办法!

如果您确实想在匹配整个散列之前预处理散列中的所有节点,则散列的值需要是散列本身。然后你可以匹配那些并将它们转换为字符串。您通常可以通过简单地在解析器中添加另一个 'as' 来做到这一点。

例如:

original_text = 'MSGSTART/DATA1/DATA2/0503/MAR'

require "parslet"
include Parslet

module ParseExample
  class Parser < Parslet::Parser
    rule(:fs)         { str("/") }
    rule(:newline)    { str("\n") | str("\r\n") }

    rule(:msgstart) { str("MSGSTART") }

    rule(:string) {match("\w").repeat(1).as(:string)} # Notice the as!

    rule(:data1) { string.as(:data1) }
    rule(:data2) { string.as(:data2) }
    rule(:serial_number) { string.as(:serial_number) }
    rule(:month) { string.as(:month) }

    rule(:first_line) { 
        msgstart >> fs >> 
        data1 >> fs >> 
        data2 >> fs >> 
        serial_number >> fs >> 
        month >> newline.maybe 
    }

    rule(:document) { first_line >> newline.maybe }

    root(:document)
  end
end

# Run by calling...
p = ParseExample::Parser.new
parser_result = p.parse(original_text)

puts parser_result.inspect
# => {:data1=>{:string=>"DATA1"@9}, 
      :data2=>{:string=>"DATA2"@15}, 
      :serial_number=>{:string=>"0503"@21}, 
      :month=>{:string=>"MAR"@26}}

# See how the values in the hash are now all hashes themselves.

module ParseExample
  class Transformer < Parslet::Transform
    rule(:string => simple(:x)) { x.to_s }
  end
end

# We just need to match the "{:string => x}" hashes now...and replace them with strings

t = ParseExample::Transformer.new
transformed = t.apply(parser_result)

puts transformed.inspect
# => {:data1=>"DATA1", :data2=>"DATA2", :serial_number=>"0503", :month=>"MAR"}

# Tada!!!

如果您想处理整条线,请从中创建一个对象..比如说..

class Entry 
   def initialize(data1:, data2:, serial_number:,month:)
      @data1 = data1
      @data2 = data2
      @serial_number = serial_number
      @month = month
   end
end

module ParseExample
  class Transformer < Parslet::Transform
    rule(:string => simple(:x)) { x.to_s }

    # match the whole hash
    rule(:data1 => simple(:d1),
         :data2 => simple(:d2),
         :serial_number => simple(:s),
         :month => simple(:m)) { 
            Entry.new(data1: d1,data2: d2,serial_number: s,month: m)} 
  end
end

t = ParseExample::Transformer.new
transformed = t.apply(parser_result)

puts transformed.inspect
# => #<Entry:0x007fd5a3d26bf0 @data1="DATA1", @data2="DATA2", @serial_number="0503", @month="MAR">