如何使用管道将双标签替换为单个标签?
How to replace double tabs with single tabs using pipes?
我需要用单个制表符替换字节串中的所有连续制表符,如下所示:
"___\t___\t\t___\t\t\t___"
变成
"___\t___\t___\t___"
我不知道怎么做。
经过半小时的摸索,我设法替换了第一次出现的双制表符,就像这样(甚至这实际上是错误的——它向空字符串添加了一个制表符):
import qualified Pipes.ByteString as PB
import qualified Data.ByteString as B
removeConsecutiveTabs =
PB.break (== tab) . mapped %~ \p -> do
yield (B.singleton tab)
PB.dropWhile (== tab) p
但是,我仍然不知道如何替换所有出现的连续制表符。
这是一个不使用 PB.break
但仅使用基本管道操作的解决方案。问题之一是数据以块的形式出现,您必须跟踪最后一个块是否以制表符结尾:
{-# LANGUAGE NoMonomorphismRestriction #-}
{-# LANGUAGE OverloadedStrings #-}
module Lib5
where
import Pipes
import Pipes.ByteString
import qualified Data.ByteString.Char8 as BS
import Control.Monad
printAll = forever $ do a <- await; lift $ putStrLn $ "got: " ++ show a
endsWith bs ch = BS.length bs > 0 && BS.last bs == ch
convertDoubleTabs = await >>= go0
-- no precediing tab
go0 b = do
let (pre,post) = BS.breakSubstring "\t\t" b
yield pre
if BS.length post == 0
then if endsWith pre '\t'
then await >>= go1
else await >>= go0
else do yield "\t"
go0 (BS.drop 2 post)
-- last chunk ended in a tab
go1 b = do
if BS.length b == 0
then await >>= go1
else if BS.index b 0 == '\t'
then go0 (BS.drop 1 b)
else go0 b
example1 = runEffect $ each [ "this", "is\t an", "\t\texample\t", "\t."]
>-> convertDoubleTabs
>-> printAll
如果我找到使用 Pipes.ByteString
和镜头的解决方案,我会添加到这个答案中。
要转换所有连续的制表符:
convertTabs = await >>= go0
where
go0 b = do
let (pre,post) = BS.break (== '\t') b
yield pre
if BS.length post == 0
then await >>= go0
else do yield "\t"
go1 post
go1 b = do
let b' = BS.dropWhile (== '\t') b
if BS.null b'
then await >>= go1
else go0 b'
example2 = runEffect $ each [ "___\t___\t\t___\t\t\t___" ]
>-> convertTabs
>-> printAll
密钥正在字节流上运行,而不是字节串块之一。这可以通过管道字节串(和管道文本)中的 pack
来完成。这是一个不是特别复杂的演示:
{-# LANGUAGE OverloadedStrings #-}
import Pipes
import qualified Pipes.ByteString as PB
import qualified Data.ByteString as B
import Control.Monad
import Control.Lens (over)
test byst = runEffect $
removingConsecutiveTabs (PB.fromLazy byst) >-> PB.stdout
removingConsecutiveTabs :: Monad m
=> Producer B.ByteString m r
-> Producer B.ByteString m r
removingConsecutiveTabs = over PB.pack tabGatekeeper
tabGatekeeper :: Monad m => Producer PB.Word8 m r -> Producer PB.Word8 m r
tabGatekeeper = go False
where
go wasTab stream = do
ex <- lift $ next stream
case ex of
Left r -> return r
Right (x, stream') -> do
let thisIsATab = w8IsTab x
unless (wasTab && thisIsATab) $ yield x
go thisIsATab stream'
w8IsTab x = toEnum (fromIntegral x) == '\t'
GHCi> :set -XOverloadedStrings
GHCi> test "___\t___\t\t___\t\t\t___\n"
___ ___ ___ ___
试试这个:
{-# LANGUAGE OverloadedStrings #-}
import Pipes
import qualified Pipes.Prelude as P
import qualified Pipes.ByteString as PB
import Data.ByteString (ByteString)
import Control.Lens hiding (each)
cleanTabs p = do
p1 <- view (PB.span (/= 9)) p
x <- lift $ next p1
case x of
Left r -> return r
Right (a, p2) -> do
yield "\t"
let p3 = PB.dropWhile (== 9) (yield a >> p2)
cleanTabs p3
source :: Monad m => Producer ByteString m ()
source = each [ "this", "is\t an", "\t\texample\t", "\t.", "\t\tmiddle\t", "\there"]
example = do
putStrLn $ "input: " ++ (show $ P.toList source)
putStrLn $ "output:" ++ (show $ P.toList (cleanTabs source))
我需要用单个制表符替换字节串中的所有连续制表符,如下所示:
"___\t___\t\t___\t\t\t___"
变成
"___\t___\t___\t___"
我不知道怎么做。
经过半小时的摸索,我设法替换了第一次出现的双制表符,就像这样(甚至这实际上是错误的——它向空字符串添加了一个制表符):
import qualified Pipes.ByteString as PB
import qualified Data.ByteString as B
removeConsecutiveTabs =
PB.break (== tab) . mapped %~ \p -> do
yield (B.singleton tab)
PB.dropWhile (== tab) p
但是,我仍然不知道如何替换所有出现的连续制表符。
这是一个不使用 PB.break
但仅使用基本管道操作的解决方案。问题之一是数据以块的形式出现,您必须跟踪最后一个块是否以制表符结尾:
{-# LANGUAGE NoMonomorphismRestriction #-}
{-# LANGUAGE OverloadedStrings #-}
module Lib5
where
import Pipes
import Pipes.ByteString
import qualified Data.ByteString.Char8 as BS
import Control.Monad
printAll = forever $ do a <- await; lift $ putStrLn $ "got: " ++ show a
endsWith bs ch = BS.length bs > 0 && BS.last bs == ch
convertDoubleTabs = await >>= go0
-- no precediing tab
go0 b = do
let (pre,post) = BS.breakSubstring "\t\t" b
yield pre
if BS.length post == 0
then if endsWith pre '\t'
then await >>= go1
else await >>= go0
else do yield "\t"
go0 (BS.drop 2 post)
-- last chunk ended in a tab
go1 b = do
if BS.length b == 0
then await >>= go1
else if BS.index b 0 == '\t'
then go0 (BS.drop 1 b)
else go0 b
example1 = runEffect $ each [ "this", "is\t an", "\t\texample\t", "\t."]
>-> convertDoubleTabs
>-> printAll
如果我找到使用 Pipes.ByteString
和镜头的解决方案,我会添加到这个答案中。
要转换所有连续的制表符:
convertTabs = await >>= go0
where
go0 b = do
let (pre,post) = BS.break (== '\t') b
yield pre
if BS.length post == 0
then await >>= go0
else do yield "\t"
go1 post
go1 b = do
let b' = BS.dropWhile (== '\t') b
if BS.null b'
then await >>= go1
else go0 b'
example2 = runEffect $ each [ "___\t___\t\t___\t\t\t___" ]
>-> convertTabs
>-> printAll
密钥正在字节流上运行,而不是字节串块之一。这可以通过管道字节串(和管道文本)中的 pack
来完成。这是一个不是特别复杂的演示:
{-# LANGUAGE OverloadedStrings #-}
import Pipes
import qualified Pipes.ByteString as PB
import qualified Data.ByteString as B
import Control.Monad
import Control.Lens (over)
test byst = runEffect $
removingConsecutiveTabs (PB.fromLazy byst) >-> PB.stdout
removingConsecutiveTabs :: Monad m
=> Producer B.ByteString m r
-> Producer B.ByteString m r
removingConsecutiveTabs = over PB.pack tabGatekeeper
tabGatekeeper :: Monad m => Producer PB.Word8 m r -> Producer PB.Word8 m r
tabGatekeeper = go False
where
go wasTab stream = do
ex <- lift $ next stream
case ex of
Left r -> return r
Right (x, stream') -> do
let thisIsATab = w8IsTab x
unless (wasTab && thisIsATab) $ yield x
go thisIsATab stream'
w8IsTab x = toEnum (fromIntegral x) == '\t'
GHCi> :set -XOverloadedStrings
GHCi> test "___\t___\t\t___\t\t\t___\n"
___ ___ ___ ___
试试这个:
{-# LANGUAGE OverloadedStrings #-}
import Pipes
import qualified Pipes.Prelude as P
import qualified Pipes.ByteString as PB
import Data.ByteString (ByteString)
import Control.Lens hiding (each)
cleanTabs p = do
p1 <- view (PB.span (/= 9)) p
x <- lift $ next p1
case x of
Left r -> return r
Right (a, p2) -> do
yield "\t"
let p3 = PB.dropWhile (== 9) (yield a >> p2)
cleanTabs p3
source :: Monad m => Producer ByteString m ()
source = each [ "this", "is\t an", "\t\texample\t", "\t.", "\t\tmiddle\t", "\there"]
example = do
putStrLn $ "input: " ++ (show $ P.toList source)
putStrLn $ "output:" ++ (show $ P.toList (cleanTabs source))