将 Attoparsec 解析器转换为从另一种字符串类型解析
Convert Attoparsec parser to parse from another string type
是否有一些“简单”的方法(例如,我在 Attoparsec 或其他库中缺少的东西)将定义的从 ByteString 解析的 Attoparsec 解析器转换为从 Text 解析的解析器?
例如我有:
import Data.Attoparsec.ByteString.Char8
myTypeByteStringParser :: Parser MyType
转换成什么方法:
import Data.Attoparsec.Text
myTypeTextParser :: Parser MyType
它看起来确实像 contramap
(来自 hoogling 类型签名)但可能无法为解析器定义逆变?
我不确定这在一般情况下是否可行。 Attoparsec
中定义的 Parser
类型看起来不太适合修改输入类型。所以,如果你想将 Text
解析器与 ByteString
解析器结合起来,恐怕你可能运气不好。
就是说,如果您想要的是能够 运行 一个 ByteString
解析器对某些输入 Text
,您可以通过首先转换Text
输入一个 ByteString
。例如:
import Data.Text.Encoding
import Data.Attoparsec.ByteString.Char8
-- parse :: Parser a -> ByteString -> Result a
-- this is given by Attoparsec
parseText :: Parser a -> Text -> Result a
parseText p = parse p . encodeUtf8
同样,您可以使用 decodeUtf8
(或根据需要使用不同的 encoder/decoder)将 Text
解析器转换为 ByteString
解析器。
这通常是可行的,您不需要分叉 attoparsec
。粗心大意 attoparsec
没有充分暴露其内部结构,但不要让它阻止我们:
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE QuasiQuotes #-}
module Parsers where
import qualified Data.Attoparsec.ByteString as AB
import qualified Data.Attoparsec.Internal.Types as AIT
import qualified Data.Attoparsec.Text as AT
import Data.ByteString (ByteString)
import qualified Data.ByteString.Internal as BI
import Data.Text (Text)
import Data.Text.Encoding (decodeUtf8, encodeUtf8)
import qualified Data.Text.Internal as TI
import Unsafe.TrueName
bsToTextState :: AIT.State ByteString -> AIT.State Text
bsToTextState = bufferText . decodeUtf8 . unbufferBS where
unbufferBS :: AIT.State ByteString -> ByteString
unbufferBS [truename| ''AIT.State
Data.Attoparsec.ByteString.Buffer.Buffer
Buf | fp off len _ _ |] = BI.PS fp off len
bufferText :: Text -> AIT.State Text
bufferText (TI.Text arr off len) = [truename| ''AIT.State
Data.Attoparsec.Text.Buffer.Buffer
Buf |] arr off len len 0
textToBSState :: AIT.State Text -> AIT.State ByteString
textToBSState = bufferBS . encodeUtf8 . unbufferText where
unbufferText :: AIT.State Text -> Text
unbufferText [truename| ''AIT.State
Data.Attoparsec.Text.Buffer.Buffer
Buf | arr off len _ _ |] = TI.Text arr off len
bufferBS :: ByteString -> AIT.State ByteString
bufferBS (BI.PS fp off len) = [truename| ''AIT.State
Data.Attoparsec.ByteString.Buffer.Buffer
Buf |] fp off len len 0
mapIResult :: (i -> j) -> (j -> i) -> AIT.IResult i a -> AIT.IResult j a
mapIResult f g = go where
go = \case
AIT.Fail i ctx msg -> AIT.Fail (f i) ctx msg
AIT.Partial k -> AIT.Partial (go . k . g)
AIT.Done i r -> AIT.Done (f i) r
mapFailure :: (i -> j) -> (j -> i) -> (AIT.State j -> AIT.State i) ->
AIT.Failure i (AIT.State i) r -> AIT.Failure j (AIT.State j) r
mapFailure f g h k st p m ctx msg = mapIResult f g $ k (h st) p m ctx msg
mapSuccess :: (i -> j) -> (j -> i) -> (AIT.State j -> AIT.State i) ->
AIT.Success i (AIT.State i) a r -> AIT.Success j (AIT.State j) a r
mapSuccess f g h k st p m a = mapIResult f g $ k (h st) p m a
bsToTextParser :: AB.Parser a -> AT.Parser a
bsToTextParser (AIT.Parser bsP) = AIT.Parser textP where
textP st p m f s = mapIResult decodeUtf8 encodeUtf8 $ bsP
(textToBSState st) p m
(mapFailure encodeUtf8 decodeUtf8 bsToTextState f)
(mapSuccess encodeUtf8 decodeUtf8 bsToTextState s)
textToBSParser :: AT.Parser a -> AB.Parser a
textToBSParser (AIT.Parser textP) = AIT.Parser bsP where
bsP st p m f s = mapIResult encodeUtf8 decodeUtf8 $ textP
(bsToTextState st) p m
(mapFailure decodeUtf8 encodeUtf8 textToBSState f)
(mapSuccess decodeUtf8 encodeUtf8 textToBSState s)
{,un}buffer{BS,Text}
改编自各自的 internal modules Data.Attoparsec.{ByteString,Text}.Buffer
.
是我更新的好借口true-name
to work with more recent GHC though. Depending on how up-to-date you are, you may need the WIP from GitHub。
性能可能并不糟糕,只要您记住每次使用 textToBSParser
时,整个输入都会通过 encodeUtf8
任何剩余的通过 decodeUtf8
转换回来,反之亦然 bsToTextParser
。如果您只在顶层转换 Parser
一次,那么它与其他答案所建议的简单转换输入应该没有太大区别。
PS:我还没有测试过
$ ghci -XOverloadedStrings parsers.hs
*Parsers> textToBSParser AT.scientific `AB.parseTest` "123 "
Done " " 123.0
PPS:对于您自己的解析器,您可以利用 OverloadedStrings
并使用 {-# SPECIALISE p :: AT.Parser a #-}
编译指示编写 p :: IsString s => AIT.Parser s a
。我还没有探索这个想法的可行性。
是否有一些“简单”的方法(例如,我在 Attoparsec 或其他库中缺少的东西)将定义的从 ByteString 解析的 Attoparsec 解析器转换为从 Text 解析的解析器?
例如我有:
import Data.Attoparsec.ByteString.Char8
myTypeByteStringParser :: Parser MyType
转换成什么方法:
import Data.Attoparsec.Text
myTypeTextParser :: Parser MyType
它看起来确实像 contramap
(来自 hoogling 类型签名)但可能无法为解析器定义逆变?
我不确定这在一般情况下是否可行。 Attoparsec
中定义的 Parser
类型看起来不太适合修改输入类型。所以,如果你想将 Text
解析器与 ByteString
解析器结合起来,恐怕你可能运气不好。
就是说,如果您想要的是能够 运行 一个 ByteString
解析器对某些输入 Text
,您可以通过首先转换Text
输入一个 ByteString
。例如:
import Data.Text.Encoding
import Data.Attoparsec.ByteString.Char8
-- parse :: Parser a -> ByteString -> Result a
-- this is given by Attoparsec
parseText :: Parser a -> Text -> Result a
parseText p = parse p . encodeUtf8
同样,您可以使用 decodeUtf8
(或根据需要使用不同的 encoder/decoder)将 Text
解析器转换为 ByteString
解析器。
这通常是可行的,您不需要分叉 attoparsec
。粗心大意 attoparsec
没有充分暴露其内部结构,但不要让它阻止我们:
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE QuasiQuotes #-}
module Parsers where
import qualified Data.Attoparsec.ByteString as AB
import qualified Data.Attoparsec.Internal.Types as AIT
import qualified Data.Attoparsec.Text as AT
import Data.ByteString (ByteString)
import qualified Data.ByteString.Internal as BI
import Data.Text (Text)
import Data.Text.Encoding (decodeUtf8, encodeUtf8)
import qualified Data.Text.Internal as TI
import Unsafe.TrueName
bsToTextState :: AIT.State ByteString -> AIT.State Text
bsToTextState = bufferText . decodeUtf8 . unbufferBS where
unbufferBS :: AIT.State ByteString -> ByteString
unbufferBS [truename| ''AIT.State
Data.Attoparsec.ByteString.Buffer.Buffer
Buf | fp off len _ _ |] = BI.PS fp off len
bufferText :: Text -> AIT.State Text
bufferText (TI.Text arr off len) = [truename| ''AIT.State
Data.Attoparsec.Text.Buffer.Buffer
Buf |] arr off len len 0
textToBSState :: AIT.State Text -> AIT.State ByteString
textToBSState = bufferBS . encodeUtf8 . unbufferText where
unbufferText :: AIT.State Text -> Text
unbufferText [truename| ''AIT.State
Data.Attoparsec.Text.Buffer.Buffer
Buf | arr off len _ _ |] = TI.Text arr off len
bufferBS :: ByteString -> AIT.State ByteString
bufferBS (BI.PS fp off len) = [truename| ''AIT.State
Data.Attoparsec.ByteString.Buffer.Buffer
Buf |] fp off len len 0
mapIResult :: (i -> j) -> (j -> i) -> AIT.IResult i a -> AIT.IResult j a
mapIResult f g = go where
go = \case
AIT.Fail i ctx msg -> AIT.Fail (f i) ctx msg
AIT.Partial k -> AIT.Partial (go . k . g)
AIT.Done i r -> AIT.Done (f i) r
mapFailure :: (i -> j) -> (j -> i) -> (AIT.State j -> AIT.State i) ->
AIT.Failure i (AIT.State i) r -> AIT.Failure j (AIT.State j) r
mapFailure f g h k st p m ctx msg = mapIResult f g $ k (h st) p m ctx msg
mapSuccess :: (i -> j) -> (j -> i) -> (AIT.State j -> AIT.State i) ->
AIT.Success i (AIT.State i) a r -> AIT.Success j (AIT.State j) a r
mapSuccess f g h k st p m a = mapIResult f g $ k (h st) p m a
bsToTextParser :: AB.Parser a -> AT.Parser a
bsToTextParser (AIT.Parser bsP) = AIT.Parser textP where
textP st p m f s = mapIResult decodeUtf8 encodeUtf8 $ bsP
(textToBSState st) p m
(mapFailure encodeUtf8 decodeUtf8 bsToTextState f)
(mapSuccess encodeUtf8 decodeUtf8 bsToTextState s)
textToBSParser :: AT.Parser a -> AB.Parser a
textToBSParser (AIT.Parser textP) = AIT.Parser bsP where
bsP st p m f s = mapIResult encodeUtf8 decodeUtf8 $ textP
(bsToTextState st) p m
(mapFailure decodeUtf8 encodeUtf8 textToBSState f)
(mapSuccess decodeUtf8 encodeUtf8 textToBSState s)
{,un}buffer{BS,Text}
改编自各自的 internal modules Data.Attoparsec.{ByteString,Text}.Buffer
.
是我更新的好借口true-name
to work with more recent GHC though. Depending on how up-to-date you are, you may need the WIP from GitHub。
性能可能并不糟糕,只要您记住每次使用 textToBSParser
时,整个输入都会通过 encodeUtf8
任何剩余的通过 decodeUtf8
转换回来,反之亦然 bsToTextParser
。如果您只在顶层转换 Parser
一次,那么它与其他答案所建议的简单转换输入应该没有太大区别。
PS:我还没有测试过
$ ghci -XOverloadedStrings parsers.hs
*Parsers> textToBSParser AT.scientific `AB.parseTest` "123 "
Done " " 123.0
PPS:对于您自己的解析器,您可以利用 OverloadedStrings
并使用 {-# SPECIALISE p :: AT.Parser a #-}
编译指示编写 p :: IsString s => AIT.Parser s a
。我还没有探索这个想法的可行性。