表示类型和出现次数:(所以)易于理解,(因此)难以编码
Representing Types And Occurrences: (so) easy to understand, (so) difficult to code
通过实例简要介绍类型和出现情况。
Ex1. abbacb
a
, b
, c
are the types.
a
occurres 2 times; b
occurres 3 times; c
occurres 1 times.
这可以更简洁地表示为[('a',2),('b',3),('c',1)]
(事实上,顺序无关紧要)。
Ex2. abbacb
ab
, bb
, ba
, ac
, cb
are sequences of types
Each sequence occurs only once.
这可以表示为[("ab",1),("bb",1),("ba",1),("ac",1),("cb",1)]
以下图形结构与前两个具有相同的信息内容:
('a',2) -- 'a' occurs 2 times
('b',1) -- "ab" occurs 1 times
('c',1) -- "ac" occurs 1 times
('b',2) -- 'b' occurs 2 times
('a',1) -- "ba" occurs 1 times
('b',1) -- "bb" occurs 1 times
('c',1) -- 'c' occurs 1 times
('b',1) -- "cb" occurs 1 times
在Haskell中:[(('a',2),[('b',1),('c',1)]),(('b',2),[('a',1),('b',1)]),(('c',1),[('b',1)])]
出现 3 个元素的序列:
('a',2) -- 'a' occurs 2 times
('b',1) -- "ab" occurs 1 times
('b',1) -- "abb" occurs 1 times
('c',1) -- "ac" occurs 1 times
('b',1) -- "acb" occurs 1 times
...
在Haskell中:
[
(('a',2), [(('b',1),[('b',1)]),(('c',1),[('b',1)])]),
(('b',2), [(('a',1),[('c',1)]),(('b',1),[('a',1)])])
]
类型为 [((Char, Int), [((Char, Int), [(Char, Int)])])]
即使只考虑两个和三个元素的序列,图形表示的可理解性也比Haskell中的要大得多。
此外,列表效率不高,所以我使用了 Data.Map
库,因此表示形式略有不同。
以下示例均以小派的数字为准。用小说的话可以得到有趣的结果
我的问题是:
三种类型的序列专用函数非常复杂。有可能大大简化它们吗?
我什至无法想象如何将函数泛化为 任意长度的序列 。有人知道怎么做吗?
使用以下数据类型递归应该更容易实现:
data TuplesTypesOccurences a = L (M.Map a Int) | B (M.Map a (Int,TuplesTypesOccurences a))
然而,通过这种方式不会失去对 Data.Map
库中所有函数的访问权限?
import qualified Data.Map as M
import Data.List (sortBy)
piDigits = "31415926535897932384626433832795028841971693993751058209749445923078164062862089986280348253421170679821480865132823066470938446095505822317253594081284811174502841027019385211055596446229489549303819644288109756659334461284756"
type TypesOccurrences a = M.Map a Int
toTypeOccurrences :: Ord k => [k] -> TypesOccurrences k -> TypesOccurrences k
toTypeOccurrences [] mp = mp
toTypeOccurrences (x:xs) mp = toTypeOccurrences xs $ M.insertWith (+) x 1 mp
-- ex. toTypeOccurrences piDigits M.empty
pprintTO :: Show a => TypesOccurrences a -> IO ()
pprintTO = mapM_ putStrLn . map (\(xs,n) -> show xs ++ " " ++ (show n)). sortBy (\x y -> compare (snd y) (snd x)) . M.toList
-- ex. pprintTO . M.filter (> 22) . toTypeOccurrences piDigits $ M.empty
type Seq2TypeOccurrences a = M.Map a (Int,TypesOccurrences a)
toSQ2TO :: Ord a => [a] -> Seq2TypeOccurrences a -> Seq2TypeOccurrences a
toSQ2TO [] mp = mp
toSQ2TO [x] mp = mp
toSQ2TO (x:y:xs) mp = toSQ2TO (y:xs) $
case M.lookup x mp of
Nothing -> M.insert x (1,M.singleton y 1) mp
Just (_,mp2) -> case M.lookup y mp2 of
Nothing -> M.update (\(n,mp2) -> Just (n+1,M.insert y 1 mp2)) x mp
Just _ -> M.update (\(n,mp2) -> Just (n+1,M.update (\m -> Just (m+1)) y mp2)) x mp
-- ex. toSQ2TO piDigits M.empty
pprintSQ2TO :: Show a => Seq2TypeOccurrences a -> IO ()
pprintSQ2TO = mapM_ putStrLn . map (\(x,(n,mp)) -> "(" ++ (show x) ++ "," ++ (show n) ++ ")\n\t" ++ (drop 2 . concatMap (("\n\t" ++) . show) . M.toList $ mp)) . M.toList
-- ex. pprintSQ2TO (toSQ2TO piDigits M.empty)
greaterThanSQ2TO :: Ord a => Int -> Seq2TypeOccurrences a -> Seq2TypeOccurrences a
greaterThanSQ2TO n = M.filter (\(_,mp2) -> not . M.null $ mp2) . M.map (\(o,mp2) -> (o,M.filter (> n) mp2)) . M.filter (\(m,mp) -> m > n)
-- ex. pprintSQ2TO . greaterThanSQ2TO 4 . toSQ2TO piDigits $ M.empty
descSortSQ2TO :: Ord a => Seq2TypeOccurrences a -> [([a], Int)]
descSortSQ2TO = sortBy (\xs ys -> compare (snd ys) (snd xs)) . concatMap (\(x,ys) -> zipWith (\x (y,n) -> ([x,y],n)) (repeat x) ys ) . map (\(x,(_,mp2)) -> (x,M.toList mp2)) . M.toList
-- mapM_ print . descSortSQ2TO . greaterThanSQ2TO 4 . toSQ2TO piDigits $ M.empty
unionSQ2TO :: Ord a => Seq2TypeOccurrences a -> Seq2TypeOccurrences a -> Seq2TypeOccurrences a
unionSQ2TO = M.unionWith (\(n1,mp1) (n2,mp2) -> (n1+n2, M.unionWith (+) mp1 mp2))
type Seq3TypeOccurrences a = M.Map a (Int,Seq2TypeOccurrences a)
toSQ3TO :: Ord k => [k] -> Seq3TypeOccurrences k -> Seq3TypeOccurrences k
toSQ3TO [] mp = mp
toSQ3TO [x] mp = mp
toSQ3TO [x,y] mp = mp
toSQ3TO (x:y:z:xs) mp = toSQ3TO (y:z:xs) $
case M.lookup x mp of
Nothing -> M.insert x (1,M.singleton y (1,M.singleton z 1)) mp
Just (_,mp2) -> case M.lookup y mp2 of
Nothing -> M.update (\(n,mp2) -> Just (n+1,M.insert y (1,M.singleton z 1) mp2)) x mp
Just (m,kns3) -> case M.lookup z kns3 of
Nothing -> M.update (\(n,_) -> Just (n+1,M.update (\(m,mp3) -> Just (m+1,M.insert z 1 mp3)) y mp2)) x mp
Just _ -> M.update (\(n,_) -> Just (n+1,M.update (\(m,mp3) -> Just (m+1,M.update (Just . (+1)) z mp3)) y mp2)) x mp
-- ex. toSQ3TO piDigits M.empty
pprint3 :: Show a => Seq3TypeOccurrences a -> IO ()
pprint3 = mapM_ putStrLn . map (\(x,(n,mp)) -> "(" ++ (show x) ++ "," ++ (show n) ++ ")" ++ (concatMap (\(x2,(n2,mp2)) -> "\n\t(" ++ (show x2) ++ "," ++ (show n2) ++ ")" ++ (f mp2)) . M.toList $ mp)) . M.toList
where
f = concatMap (\(x,n) -> "\n\t\t(" ++ (show x) ++ "," ++ (show n) ++ ")") . M.toList
-- pprint3 . toSQ3TO piDigits $ M.empty
pprint3B :: Show a => Seq3TypeOccurrences a -> IO ()
pprint3B = mapM_ putStrLn . map (\(xs,n) -> show xs ++ " " ++ (show n)) . concatMap (\(xs,mp) -> zipWith (\ys (z,n) -> (ys ++ [z],n)) (repeat xs) mp) . concatMap (\(x,mp) -> zipWith (\y (z,mp2) -> ([y,z],mp2)) (repeat x) mp) . map (\(x,(_,mp)) -> (x, map (\(y,(_,mp2)) -> (y, M.toList mp2)) $ M.toList mp)) . M.toList
-- pprint3B . toSQ3TO piDigits $ M.empty
greaterThan3Q2TO :: Ord a => Int -> Seq3TypeOccurrences a -> Seq3TypeOccurrences a
greaterThan3Q2TO n = M.filter (\(_,mp) -> not . M.null $ mp)
. M.map (\(m,mp) -> (m,M.filter (\(o,mp2) -> not . M.null $ mp2) mp))
. M.map (\(m,mp) -> (m,M.map (\(o,mp2) -> (o,M.filter (>n) mp2)) mp))
. M.filter (\(_,mp) -> not. M.null $ mp)
. M.map (\(m,mp) -> (m,M.filter ((n <) . fst) mp))
. M.filter (\(m,mp) -> m > n)
-- ex. pprint3B . greaterThan3Q2TO 2 . toSQ3TO piDigits $ M.empty
unionSQ3TO :: Ord a => Seq3TypeOccurrences a -> Seq3TypeOccurrences a -> Seq3TypeOccurrences a
unionSQ3TO = M.unionWith (\(n,mp2a) (m,mp2b) -> (n+m,unionSQ2TO mp2a mp2b))
您需要像这样定义一个递归数据结构:
data Trie = Nil | Trie (Map Char (Int, Trie))
这允许递归定义 show 和 add 函数。
这是一个实现。 运行 test3
查看其工作原理的示例。
import qualified Data.Map as M
import Text.PrettyPrint
import Data.List
data Trie = Nil | Trie (M.Map Char (Int, Trie))
showTrie :: String -> Trie -> Doc
showTrie _ Nil = empty
showTrie prefix (Trie m) =
vcat $
do (k,(count,t)) <- M.assocs m
let prefix' = prefix ++ [k]
return $
vcat [ lparen <> char '"' <> text prefix' <> char '"' <> comma <> int count <> rparen
, nest 4 (showTrie prefix' t)
]
-- add an element to a Trie
addTrie :: Trie -> String -> Trie
addTrie t [] = t
addTrie Nil xs = addTrie (Trie M.empty) xs
addTrie (Trie m) (x:xs) =
case M.lookup x m of
Nothing -> let t' = addTrie Nil xs
in Trie $ M.insert x (1,t') m
Just (c,t) -> let t' = addTrie t xs
in Trie $ M.insert x (c+1,t') m
test1 =
let t1 = addTrie Nil "abcd"
t2 = addTrie t1 "abce"
in putStrLn $ render $ showTrie "" t2
test2 n str =
putStrLn $ render $ showTrie "" $
foldr (flip addTrie) Nil (map (take n) (tails str))
test3 = test2 4 "31415926535897932384626433832795028841971693993751058209749445923078164062862089986280348253421170679821480865132823066470938446095505822317253594081284811174502841027019385211055596446229489549303819644288109756659334461284756"
通过实例简要介绍类型和出现情况。
Ex1.
abbacb
a
,b
,c
are the types.
a
occurres 2 times;b
occurres 3 times;c
occurres 1 times.
这可以更简洁地表示为[('a',2),('b',3),('c',1)]
(事实上,顺序无关紧要)。
Ex2.
abbacb
ab
,bb
,ba
,ac
,cb
are sequences of typesEach sequence occurs only once.
这可以表示为[("ab",1),("bb",1),("ba",1),("ac",1),("cb",1)]
以下图形结构与前两个具有相同的信息内容:
('a',2) -- 'a' occurs 2 times
('b',1) -- "ab" occurs 1 times
('c',1) -- "ac" occurs 1 times
('b',2) -- 'b' occurs 2 times
('a',1) -- "ba" occurs 1 times
('b',1) -- "bb" occurs 1 times
('c',1) -- 'c' occurs 1 times
('b',1) -- "cb" occurs 1 times
在Haskell中:[(('a',2),[('b',1),('c',1)]),(('b',2),[('a',1),('b',1)]),(('c',1),[('b',1)])]
出现 3 个元素的序列:
('a',2) -- 'a' occurs 2 times
('b',1) -- "ab" occurs 1 times
('b',1) -- "abb" occurs 1 times
('c',1) -- "ac" occurs 1 times
('b',1) -- "acb" occurs 1 times
...
在Haskell中:
[
(('a',2), [(('b',1),[('b',1)]),(('c',1),[('b',1)])]),
(('b',2), [(('a',1),[('c',1)]),(('b',1),[('a',1)])])
]
类型为 [((Char, Int), [((Char, Int), [(Char, Int)])])]
即使只考虑两个和三个元素的序列,图形表示的可理解性也比Haskell中的要大得多。
此外,列表效率不高,所以我使用了 Data.Map
库,因此表示形式略有不同。
以下示例均以小派的数字为准。用小说的话可以得到有趣的结果
我的问题是:
三种类型的序列专用函数非常复杂。有可能大大简化它们吗?
我什至无法想象如何将函数泛化为 任意长度的序列 。有人知道怎么做吗?
使用以下数据类型递归应该更容易实现:
data TuplesTypesOccurences a = L (M.Map a Int) | B (M.Map a (Int,TuplesTypesOccurences a))
然而,通过这种方式不会失去对
Data.Map
库中所有函数的访问权限?import qualified Data.Map as M import Data.List (sortBy) piDigits = "31415926535897932384626433832795028841971693993751058209749445923078164062862089986280348253421170679821480865132823066470938446095505822317253594081284811174502841027019385211055596446229489549303819644288109756659334461284756" type TypesOccurrences a = M.Map a Int toTypeOccurrences :: Ord k => [k] -> TypesOccurrences k -> TypesOccurrences k toTypeOccurrences [] mp = mp toTypeOccurrences (x:xs) mp = toTypeOccurrences xs $ M.insertWith (+) x 1 mp -- ex. toTypeOccurrences piDigits M.empty pprintTO :: Show a => TypesOccurrences a -> IO () pprintTO = mapM_ putStrLn . map (\(xs,n) -> show xs ++ " " ++ (show n)). sortBy (\x y -> compare (snd y) (snd x)) . M.toList -- ex. pprintTO . M.filter (> 22) . toTypeOccurrences piDigits $ M.empty type Seq2TypeOccurrences a = M.Map a (Int,TypesOccurrences a) toSQ2TO :: Ord a => [a] -> Seq2TypeOccurrences a -> Seq2TypeOccurrences a toSQ2TO [] mp = mp toSQ2TO [x] mp = mp toSQ2TO (x:y:xs) mp = toSQ2TO (y:xs) $ case M.lookup x mp of Nothing -> M.insert x (1,M.singleton y 1) mp Just (_,mp2) -> case M.lookup y mp2 of Nothing -> M.update (\(n,mp2) -> Just (n+1,M.insert y 1 mp2)) x mp Just _ -> M.update (\(n,mp2) -> Just (n+1,M.update (\m -> Just (m+1)) y mp2)) x mp -- ex. toSQ2TO piDigits M.empty pprintSQ2TO :: Show a => Seq2TypeOccurrences a -> IO () pprintSQ2TO = mapM_ putStrLn . map (\(x,(n,mp)) -> "(" ++ (show x) ++ "," ++ (show n) ++ ")\n\t" ++ (drop 2 . concatMap (("\n\t" ++) . show) . M.toList $ mp)) . M.toList -- ex. pprintSQ2TO (toSQ2TO piDigits M.empty) greaterThanSQ2TO :: Ord a => Int -> Seq2TypeOccurrences a -> Seq2TypeOccurrences a greaterThanSQ2TO n = M.filter (\(_,mp2) -> not . M.null $ mp2) . M.map (\(o,mp2) -> (o,M.filter (> n) mp2)) . M.filter (\(m,mp) -> m > n) -- ex. pprintSQ2TO . greaterThanSQ2TO 4 . toSQ2TO piDigits $ M.empty descSortSQ2TO :: Ord a => Seq2TypeOccurrences a -> [([a], Int)] descSortSQ2TO = sortBy (\xs ys -> compare (snd ys) (snd xs)) . concatMap (\(x,ys) -> zipWith (\x (y,n) -> ([x,y],n)) (repeat x) ys ) . map (\(x,(_,mp2)) -> (x,M.toList mp2)) . M.toList -- mapM_ print . descSortSQ2TO . greaterThanSQ2TO 4 . toSQ2TO piDigits $ M.empty unionSQ2TO :: Ord a => Seq2TypeOccurrences a -> Seq2TypeOccurrences a -> Seq2TypeOccurrences a unionSQ2TO = M.unionWith (\(n1,mp1) (n2,mp2) -> (n1+n2, M.unionWith (+) mp1 mp2)) type Seq3TypeOccurrences a = M.Map a (Int,Seq2TypeOccurrences a) toSQ3TO :: Ord k => [k] -> Seq3TypeOccurrences k -> Seq3TypeOccurrences k toSQ3TO [] mp = mp toSQ3TO [x] mp = mp toSQ3TO [x,y] mp = mp toSQ3TO (x:y:z:xs) mp = toSQ3TO (y:z:xs) $ case M.lookup x mp of Nothing -> M.insert x (1,M.singleton y (1,M.singleton z 1)) mp Just (_,mp2) -> case M.lookup y mp2 of Nothing -> M.update (\(n,mp2) -> Just (n+1,M.insert y (1,M.singleton z 1) mp2)) x mp Just (m,kns3) -> case M.lookup z kns3 of Nothing -> M.update (\(n,_) -> Just (n+1,M.update (\(m,mp3) -> Just (m+1,M.insert z 1 mp3)) y mp2)) x mp Just _ -> M.update (\(n,_) -> Just (n+1,M.update (\(m,mp3) -> Just (m+1,M.update (Just . (+1)) z mp3)) y mp2)) x mp -- ex. toSQ3TO piDigits M.empty pprint3 :: Show a => Seq3TypeOccurrences a -> IO () pprint3 = mapM_ putStrLn . map (\(x,(n,mp)) -> "(" ++ (show x) ++ "," ++ (show n) ++ ")" ++ (concatMap (\(x2,(n2,mp2)) -> "\n\t(" ++ (show x2) ++ "," ++ (show n2) ++ ")" ++ (f mp2)) . M.toList $ mp)) . M.toList where f = concatMap (\(x,n) -> "\n\t\t(" ++ (show x) ++ "," ++ (show n) ++ ")") . M.toList -- pprint3 . toSQ3TO piDigits $ M.empty pprint3B :: Show a => Seq3TypeOccurrences a -> IO () pprint3B = mapM_ putStrLn . map (\(xs,n) -> show xs ++ " " ++ (show n)) . concatMap (\(xs,mp) -> zipWith (\ys (z,n) -> (ys ++ [z],n)) (repeat xs) mp) . concatMap (\(x,mp) -> zipWith (\y (z,mp2) -> ([y,z],mp2)) (repeat x) mp) . map (\(x,(_,mp)) -> (x, map (\(y,(_,mp2)) -> (y, M.toList mp2)) $ M.toList mp)) . M.toList -- pprint3B . toSQ3TO piDigits $ M.empty greaterThan3Q2TO :: Ord a => Int -> Seq3TypeOccurrences a -> Seq3TypeOccurrences a greaterThan3Q2TO n = M.filter (\(_,mp) -> not . M.null $ mp) . M.map (\(m,mp) -> (m,M.filter (\(o,mp2) -> not . M.null $ mp2) mp)) . M.map (\(m,mp) -> (m,M.map (\(o,mp2) -> (o,M.filter (>n) mp2)) mp)) . M.filter (\(_,mp) -> not. M.null $ mp) . M.map (\(m,mp) -> (m,M.filter ((n <) . fst) mp)) . M.filter (\(m,mp) -> m > n) -- ex. pprint3B . greaterThan3Q2TO 2 . toSQ3TO piDigits $ M.empty unionSQ3TO :: Ord a => Seq3TypeOccurrences a -> Seq3TypeOccurrences a -> Seq3TypeOccurrences a unionSQ3TO = M.unionWith (\(n,mp2a) (m,mp2b) -> (n+m,unionSQ2TO mp2a mp2b))
您需要像这样定义一个递归数据结构:
data Trie = Nil | Trie (Map Char (Int, Trie))
这允许递归定义 show 和 add 函数。
这是一个实现。 运行 test3
查看其工作原理的示例。
import qualified Data.Map as M
import Text.PrettyPrint
import Data.List
data Trie = Nil | Trie (M.Map Char (Int, Trie))
showTrie :: String -> Trie -> Doc
showTrie _ Nil = empty
showTrie prefix (Trie m) =
vcat $
do (k,(count,t)) <- M.assocs m
let prefix' = prefix ++ [k]
return $
vcat [ lparen <> char '"' <> text prefix' <> char '"' <> comma <> int count <> rparen
, nest 4 (showTrie prefix' t)
]
-- add an element to a Trie
addTrie :: Trie -> String -> Trie
addTrie t [] = t
addTrie Nil xs = addTrie (Trie M.empty) xs
addTrie (Trie m) (x:xs) =
case M.lookup x m of
Nothing -> let t' = addTrie Nil xs
in Trie $ M.insert x (1,t') m
Just (c,t) -> let t' = addTrie t xs
in Trie $ M.insert x (c+1,t') m
test1 =
let t1 = addTrie Nil "abcd"
t2 = addTrie t1 "abce"
in putStrLn $ render $ showTrie "" t2
test2 n str =
putStrLn $ render $ showTrie "" $
foldr (flip addTrie) Nil (map (take n) (tails str))
test3 = test2 4 "31415926535897932384626433832795028841971693993751058209749445923078164062862089986280348253421170679821480865132823066470938446095505822317253594081284811174502841027019385211055596446229489549303819644288109756659334461284756"