haskell: xml 过滤一个子树
haskell: xml filtering a subtree
我正在努力删除一个元素及其所有具有 haskell 的子元素。
任务是从给定的 xml 文档中删除所有 table-tags(也许我不理解游标的概念或者它是我缺少的其他东西)。
我尝试了三种不同的方法:
- 使用 traversing/filtering 的镜头并使用新元素设置过滤值 - 此处仅替换标签而不替换内容
- 使用游标访问 table 元素 - 重置那里的内容并通过遍历游标直至文档根目录再次获取文档根目录 - 未过滤任何内容
- 以递归方式过滤文档根目录的子项 - 未过滤任何内容
工具
xml-conduit
xml-lens
ghc-8.0.1
输入(test.xml
)/输出
INPUT EXPECTED OUTPUT (for the filtered cases)
<?xml version="1.0"?> | <?xml version="1.0"?>
<root> | <root>
<a> | <a>
... | ...
</a> | </a>
<b> | <b>
<table> | <bb>
<!--table entries--> | ...
</table> | </bb>
<bb> | </b>
... | <c>
</bb> | <cc>
</b> | ...
<c> | </cc>
<cc> | </c>
... | </root>
</cc>
</c>
</root>
最小非工作示例
{-# LANGUAGE OverloadedStrings #-}
module Minimal where
import Control.Lens
import Data.Conduit.Text as CT
import Data.Default
import qualified Data.Text.Lazy.IO as TIO
import Text.XML
import Text.XML.Cursor
import qualified Text.XML.Lens as L
import Data.Maybe (isNothing, isJust)
main :: IO ()
main = do test <- Text.XML.readFile def "./test.xml"
pput $ filterDocument test
let cursor = fromDocument test
pput $ docUp $ elemUp $ getRoot ((head $ cursor $// checkName (== "table")) {child = []} )
pput $ docUp $ elemUp $ filterChildren (checkName (/= "table")) cursor
return ()
filterChildren :: Axis -> Cursor -> Cursor
filterChildren pred c = c {child = map (filterChildren pred) (c $/ pred) }
filterDocument :: Document -> Document
filterDocument doc = doc & (L.root.L.entire.filtered (\e -> isJust $ e^?L.named "table") .~ emptyElemt)
where emptyElemt = Element "empty" mempty []
-- helper functions --
docUp :: Element -> Document
docUp e = Document {documentRoot = e, documentPrologue = Prologue [] Nothing [], documentEpilogue = [] }
elemUp :: Cursor -> Element
elemUp cursor = Element {elementName = "DOC", elementAttributes = mempty , elementNodes = [node cursor]}
elemUp' :: [Cursor] -> Element
elemUp' cursors = Element {elementName = "DOC", elementAttributes = mempty , elementNodes = map node cursors}
getRoot :: Cursor -> Cursor
getRoot c = let p = (c $| parent)
in if null p then c else getRoot $ head p
pput :: Document -> IO ()
pput = TIO.putStrLn . renderText pretty
where pretty = def {rsPretty = True}
输出
> stack ghci
. . .
Ok, modules loaded: Minimal.
λ > main
<?xml version="1.0" encoding="UTF-8"?>
<root>
<a>
...
</a>
<b>
<empty>
<!-- table entries -->
</empty>
<bb>
...
</bb>
</b>
<c>
<cc>
...
</cc>
</c>
</root>
<?xml version="1.0" encoding="UTF-8"?>
<DOC>
<root>
<a>
...
</a>
<b>
<table>
<!-- table entries -->
</table>
<bb>
...
</bb>
</b>
<c>
<cc>
...
</cc>
</c>
</root>
</DOC>
<?xml version="1.0" encoding="UTF-8"?>
<DOC>
<root>
<a>
...
</a>
<b>
<table>
<!-- table entries -->
</table>
<bb>
...
</bb>
</b>
<c>
<cc>
...
</cc>
</c>
</root>
</DOC>
我不知道 Text.XML
,但这是 Text.XML.Light
的解决方案:
module Minimal where
import Data.Maybe(catMaybes)
import Text.XML.Light.Input
import Text.XML.Light.Output
import Text.XML.Light.Types
main :: IO ()
main = do
test <- parseXML <$> readFile "./test.xml"
mapM_ (putStrLn . ppContent) . catMaybes $ map cutTables test
cutTables :: Content -> Maybe Content
cutTables (Elem e) = if qName (elName e) == "table" then Nothing else
Just . Elem $ e { elContent = catMaybes . map cutTables $ elContent e }
cutTables x = Just x
此代码似乎可以根据 xml-conduit 执行您想要的操作。我是从yesod网络书入手example,通过一个简单的递归函数实现了变换
{-# LANGUAGE OverloadedStrings #-}
import qualified Data.Map as M
import Prelude hiding (readFile, writeFile)
import Text.XML
main :: IO ()
main = do
Document prologue root epilogue <- readFile def "test.xml"
let root' = transform root
writeFile def
{ rsPretty = True
} "output.html" $ Document prologue root' epilogue
transform :: Element -> Element
transform (Element _name attrs children) =
Element _name attrs (filterChildren children)
filterChildren :: [Node] -> [Node]
filterChildren = concatMap kickTable
where
kickTable :: Node -> [Node]
kickTable (NodeElement (Element "table" attrs children)) = -- Drop it
[ ]
kickTable (NodeElement (Element n attrs children)) = -- Recurse on
[ NodeElement (Element n attrs (filterChildren children)) ]
kickTable n = -- ok - whatever
[ n ]
我的 lens-foo 不够强大,无法说明为什么你的解决方案不起作用,但从文档中 - 你必须小心 filtered
不要违反遍历法则,虽然我不知道当你违反它们时会发生什么。
希望对您有所帮助。
我正在努力删除一个元素及其所有具有 haskell 的子元素。 任务是从给定的 xml 文档中删除所有 table-tags(也许我不理解游标的概念或者它是我缺少的其他东西)。
我尝试了三种不同的方法:
- 使用 traversing/filtering 的镜头并使用新元素设置过滤值 - 此处仅替换标签而不替换内容
- 使用游标访问 table 元素 - 重置那里的内容并通过遍历游标直至文档根目录再次获取文档根目录 - 未过滤任何内容
- 以递归方式过滤文档根目录的子项 - 未过滤任何内容
工具
xml-conduit
xml-lens
ghc-8.0.1
输入(test.xml
)/输出
INPUT EXPECTED OUTPUT (for the filtered cases)
<?xml version="1.0"?> | <?xml version="1.0"?>
<root> | <root>
<a> | <a>
... | ...
</a> | </a>
<b> | <b>
<table> | <bb>
<!--table entries--> | ...
</table> | </bb>
<bb> | </b>
... | <c>
</bb> | <cc>
</b> | ...
<c> | </cc>
<cc> | </c>
... | </root>
</cc>
</c>
</root>
最小非工作示例
{-# LANGUAGE OverloadedStrings #-}
module Minimal where
import Control.Lens
import Data.Conduit.Text as CT
import Data.Default
import qualified Data.Text.Lazy.IO as TIO
import Text.XML
import Text.XML.Cursor
import qualified Text.XML.Lens as L
import Data.Maybe (isNothing, isJust)
main :: IO ()
main = do test <- Text.XML.readFile def "./test.xml"
pput $ filterDocument test
let cursor = fromDocument test
pput $ docUp $ elemUp $ getRoot ((head $ cursor $// checkName (== "table")) {child = []} )
pput $ docUp $ elemUp $ filterChildren (checkName (/= "table")) cursor
return ()
filterChildren :: Axis -> Cursor -> Cursor
filterChildren pred c = c {child = map (filterChildren pred) (c $/ pred) }
filterDocument :: Document -> Document
filterDocument doc = doc & (L.root.L.entire.filtered (\e -> isJust $ e^?L.named "table") .~ emptyElemt)
where emptyElemt = Element "empty" mempty []
-- helper functions --
docUp :: Element -> Document
docUp e = Document {documentRoot = e, documentPrologue = Prologue [] Nothing [], documentEpilogue = [] }
elemUp :: Cursor -> Element
elemUp cursor = Element {elementName = "DOC", elementAttributes = mempty , elementNodes = [node cursor]}
elemUp' :: [Cursor] -> Element
elemUp' cursors = Element {elementName = "DOC", elementAttributes = mempty , elementNodes = map node cursors}
getRoot :: Cursor -> Cursor
getRoot c = let p = (c $| parent)
in if null p then c else getRoot $ head p
pput :: Document -> IO ()
pput = TIO.putStrLn . renderText pretty
where pretty = def {rsPretty = True}
输出
> stack ghci
. . .
Ok, modules loaded: Minimal.
λ > main
<?xml version="1.0" encoding="UTF-8"?>
<root>
<a>
...
</a>
<b>
<empty>
<!-- table entries -->
</empty>
<bb>
...
</bb>
</b>
<c>
<cc>
...
</cc>
</c>
</root>
<?xml version="1.0" encoding="UTF-8"?>
<DOC>
<root>
<a>
...
</a>
<b>
<table>
<!-- table entries -->
</table>
<bb>
...
</bb>
</b>
<c>
<cc>
...
</cc>
</c>
</root>
</DOC>
<?xml version="1.0" encoding="UTF-8"?>
<DOC>
<root>
<a>
...
</a>
<b>
<table>
<!-- table entries -->
</table>
<bb>
...
</bb>
</b>
<c>
<cc>
...
</cc>
</c>
</root>
</DOC>
我不知道 Text.XML
,但这是 Text.XML.Light
的解决方案:
module Minimal where
import Data.Maybe(catMaybes)
import Text.XML.Light.Input
import Text.XML.Light.Output
import Text.XML.Light.Types
main :: IO ()
main = do
test <- parseXML <$> readFile "./test.xml"
mapM_ (putStrLn . ppContent) . catMaybes $ map cutTables test
cutTables :: Content -> Maybe Content
cutTables (Elem e) = if qName (elName e) == "table" then Nothing else
Just . Elem $ e { elContent = catMaybes . map cutTables $ elContent e }
cutTables x = Just x
此代码似乎可以根据 xml-conduit 执行您想要的操作。我是从yesod网络书入手example,通过一个简单的递归函数实现了变换
{-# LANGUAGE OverloadedStrings #-}
import qualified Data.Map as M
import Prelude hiding (readFile, writeFile)
import Text.XML
main :: IO ()
main = do
Document prologue root epilogue <- readFile def "test.xml"
let root' = transform root
writeFile def
{ rsPretty = True
} "output.html" $ Document prologue root' epilogue
transform :: Element -> Element
transform (Element _name attrs children) =
Element _name attrs (filterChildren children)
filterChildren :: [Node] -> [Node]
filterChildren = concatMap kickTable
where
kickTable :: Node -> [Node]
kickTable (NodeElement (Element "table" attrs children)) = -- Drop it
[ ]
kickTable (NodeElement (Element n attrs children)) = -- Recurse on
[ NodeElement (Element n attrs (filterChildren children)) ]
kickTable n = -- ok - whatever
[ n ]
我的 lens-foo 不够强大,无法说明为什么你的解决方案不起作用,但从文档中 - 你必须小心 filtered
不要违反遍历法则,虽然我不知道当你违反它们时会发生什么。
希望对您有所帮助。