表示类型和出现次数:(所以)易于理解,(因此)难以编码

Representing Types And Occurrences: (so) easy to understand, (so) difficult to code

通过实例简要介绍类型和出现情况。

Ex1. abbacb

a, b, c are the types.

a occurres 2 times; b occurres 3 times; c occurres 1 times.

这可以更简洁地表示为[('a',2),('b',3),('c',1)](事实上,顺序无关紧要)。

Ex2. abbacb

ab, bb, ba, ac, cb are sequences of types

Each sequence occurs only once.

这可以表示为[("ab",1),("bb",1),("ba",1),("ac",1),("cb",1)]

以下图形结构与前两个具有相同的信息内容:

('a',2)          -- 'a'  occurs 2 times
     ('b',1)     -- "ab" occurs 1 times
     ('c',1)     -- "ac" occurs 1 times
('b',2)          -- 'b'  occurs 2 times
     ('a',1)     -- "ba" occurs 1 times
     ('b',1)     -- "bb" occurs 1 times
('c',1)          -- 'c'  occurs 1 times
     ('b',1)     -- "cb" occurs 1 times

在Haskell中:[(('a',2),[('b',1),('c',1)]),(('b',2),[('a',1),('b',1)]),(('c',1),[('b',1)])]

出现 3 个元素的序列:

('a',2)              -- 'a'   occurs 2 times
     ('b',1)         -- "ab"  occurs 1 times
          ('b',1)    -- "abb" occurs 1 times
     ('c',1)         -- "ac"  occurs 1 times
          ('b',1)    -- "acb" occurs 1 times
...

在Haskell中:

[
    (('a',2), [(('b',1),[('b',1)]),(('c',1),[('b',1)])]),
    (('b',2), [(('a',1),[('c',1)]),(('b',1),[('a',1)])])
]

类型为 [((Char, Int), [((Char, Int), [(Char, Int)])])]

即使只考虑两个和三个元素的序列,图形表示的可理解性也比Haskell中的要大得多。

此外,列表效率不高,所以我使用了 Data.Map 库,因此表示形式略有不同。

以下示例均以小派的数字为准。用小说的话可以得到有趣的结果

我的问题是:

  1. 三种类型的序列专用函数非常复杂。有可能大大简化它们吗?

  2. 我什至无法想象如何将函数泛化为 任意长度的序列 。有人知道怎么做吗?

  3. 使用以下数据类型递归应该更容易实现:

    data TuplesTypesOccurences a = L (M.Map a Int) | B  (M.Map a (Int,TuplesTypesOccurences a))
    

    然而,通过这种方式不会失去对 Data.Map 库中所有函数的访问权限?

    import qualified Data.Map as M
    import Data.List (sortBy)
    
    piDigits = "31415926535897932384626433832795028841971693993751058209749445923078164062862089986280348253421170679821480865132823066470938446095505822317253594081284811174502841027019385211055596446229489549303819644288109756659334461284756"
    
    type TypesOccurrences a = M.Map a Int
    
    toTypeOccurrences :: Ord k => [k] -> TypesOccurrences k -> TypesOccurrences k  
    toTypeOccurrences [] mp = mp
    toTypeOccurrences (x:xs) mp = toTypeOccurrences xs $ M.insertWith (+) x 1 mp
    -- ex. toTypeOccurrences piDigits M.empty
    
    pprintTO :: Show a => TypesOccurrences a -> IO ()
    pprintTO = mapM_ putStrLn . map (\(xs,n) -> show xs ++ "  " ++ (show n)). sortBy (\x y -> compare (snd y) (snd x)) . M.toList
    -- ex. pprintTO . M.filter (> 22) . toTypeOccurrences piDigits $ M.empty
    
    type Seq2TypeOccurrences a = M.Map a (Int,TypesOccurrences a)
    
    toSQ2TO :: Ord a => [a] -> Seq2TypeOccurrences a -> Seq2TypeOccurrences a
    toSQ2TO []       mp = mp
    toSQ2TO [x]      mp = mp
    toSQ2TO (x:y:xs) mp = toSQ2TO (y:xs) $
      case M.lookup x mp of
        Nothing       -> M.insert x (1,M.singleton y 1) mp
        Just (_,mp2) -> case M.lookup y mp2 of
            Nothing -> M.update (\(n,mp2) -> Just (n+1,M.insert y 1 mp2)) x mp
            Just _  -> M.update (\(n,mp2) -> Just (n+1,M.update (\m -> Just (m+1)) y mp2)) x mp
    -- ex. toSQ2TO piDigits M.empty
    
    pprintSQ2TO :: Show a => Seq2TypeOccurrences a -> IO ()
    pprintSQ2TO = mapM_ putStrLn . map (\(x,(n,mp)) -> "(" ++ (show x) ++ "," ++ (show n) ++ ")\n\t" ++ (drop 2 . concatMap (("\n\t" ++) . show) . M.toList $ mp)) . M.toList
    -- ex. pprintSQ2TO (toSQ2TO piDigits  M.empty)
    
    greaterThanSQ2TO :: Ord a => Int -> Seq2TypeOccurrences a -> Seq2TypeOccurrences a
    greaterThanSQ2TO n =   M.filter (\(_,mp2) -> not . M.null $ mp2) . M.map (\(o,mp2) -> (o,M.filter (> n) mp2)) . M.filter (\(m,mp) -> m > n)
    -- ex. pprintSQ2TO . greaterThanSQ2TO 4 . toSQ2TO piDigits $ M.empty
    
    descSortSQ2TO :: Ord a => Seq2TypeOccurrences a -> [([a], Int)]
    descSortSQ2TO = sortBy (\xs ys -> compare (snd ys) (snd xs)) . concatMap (\(x,ys) -> zipWith (\x (y,n) -> ([x,y],n)) (repeat x) ys ) . map (\(x,(_,mp2)) -> (x,M.toList mp2)) . M.toList
    -- mapM_ print . descSortSQ2TO . greaterThanSQ2TO 4 . toSQ2TO piDigits $ M.empty
    
    unionSQ2TO :: Ord a => Seq2TypeOccurrences a -> Seq2TypeOccurrences a -> Seq2TypeOccurrences a
    unionSQ2TO = M.unionWith (\(n1,mp1) (n2,mp2) -> (n1+n2, M.unionWith (+) mp1 mp2))
    
    type Seq3TypeOccurrences a = M.Map a (Int,Seq2TypeOccurrences a)
    
    toSQ3TO :: Ord k => [k] -> Seq3TypeOccurrences k -> Seq3TypeOccurrences k
    toSQ3TO []    mp = mp
    toSQ3TO [x]   mp = mp
    toSQ3TO [x,y] mp = mp
    toSQ3TO (x:y:z:xs) mp = toSQ3TO (y:z:xs) $
      case M.lookup x mp of
        Nothing -> M.insert x (1,M.singleton y (1,M.singleton z 1)) mp
        Just (_,mp2) -> case M.lookup y mp2 of
           Nothing -> M.update (\(n,mp2) -> Just (n+1,M.insert y (1,M.singleton z 1) mp2)) x mp
           Just (m,kns3)  -> case M.lookup z kns3 of
                Nothing -> M.update (\(n,_) -> Just (n+1,M.update (\(m,mp3) -> Just (m+1,M.insert z 1    mp3)) y mp2)) x mp
                Just _  -> M.update (\(n,_) -> Just (n+1,M.update (\(m,mp3) -> Just (m+1,M.update (Just . (+1)) z mp3)) y mp2)) x mp
    -- ex. toSQ3TO piDigits M.empty
    
    pprint3 :: Show a => Seq3TypeOccurrences a -> IO ()
    pprint3 = mapM_ putStrLn . map (\(x,(n,mp)) -> "(" ++ (show x) ++ "," ++ (show n) ++ ")" ++ (concatMap (\(x2,(n2,mp2)) -> "\n\t(" ++ (show x2) ++ "," ++ (show n2) ++ ")" ++ (f mp2)) . M.toList $ mp)) . M.toList
      where
      f = concatMap (\(x,n) -> "\n\t\t(" ++ (show x) ++ "," ++ (show n) ++ ")") . M.toList
    -- pprint3 . toSQ3TO piDigits $ M.empty  
    
    pprint3B :: Show a => Seq3TypeOccurrences a -> IO ()
    pprint3B = mapM_ putStrLn . map (\(xs,n) -> show xs ++ "  " ++ (show n)) . concatMap (\(xs,mp) -> zipWith (\ys (z,n) -> (ys ++ [z],n)) (repeat xs) mp) . concatMap (\(x,mp) -> zipWith (\y (z,mp2) -> ([y,z],mp2)) (repeat x) mp) . map (\(x,(_,mp)) -> (x, map (\(y,(_,mp2)) -> (y, M.toList mp2)) $ M.toList mp)) . M.toList
    -- pprint3B . toSQ3TO piDigits $ M.empty 
    
    greaterThan3Q2TO :: Ord a => Int -> Seq3TypeOccurrences a ->  Seq3TypeOccurrences a
    greaterThan3Q2TO n = M.filter (\(_,mp) -> not . M.null $ mp)
      . M.map (\(m,mp) -> (m,M.filter (\(o,mp2) -> not . M.null $ mp2) mp))
      . M.map (\(m,mp) -> (m,M.map (\(o,mp2) -> (o,M.filter (>n) mp2)) mp))
      . M.filter (\(_,mp) -> not. M.null $ mp)
      . M.map (\(m,mp) -> (m,M.filter ((n <) . fst) mp))
      . M.filter (\(m,mp) -> m > n)
    -- ex. pprint3B . greaterThan3Q2TO 2 . toSQ3TO piDigits $ M.empty
    
    unionSQ3TO :: Ord a => Seq3TypeOccurrences a -> Seq3TypeOccurrences a -> Seq3TypeOccurrences a
    unionSQ3TO = M.unionWith (\(n,mp2a) (m,mp2b) -> (n+m,unionSQ2TO mp2a mp2b))
    

您需要像这样定义一个递归数据结构:

data Trie = Nil | Trie (Map Char (Int, Trie))

这允许递归定义 show 和 add 函数。

这是一个实现。 运行 test3 查看其工作原理的示例。

import qualified Data.Map as M
import Text.PrettyPrint
import Data.List

data Trie = Nil | Trie (M.Map Char (Int, Trie))

showTrie :: String -> Trie -> Doc
showTrie _ Nil = empty
showTrie prefix (Trie m) =
  vcat $ 
    do (k,(count,t)) <- M.assocs m
       let prefix' = prefix ++ [k]
       return $
         vcat [ lparen <> char '"' <> text prefix' <> char '"' <> comma <> int count <> rparen
              , nest 4 (showTrie prefix' t)
              ]

-- add an element to a Trie
addTrie :: Trie -> String -> Trie
addTrie t [] = t
addTrie Nil xs = addTrie (Trie M.empty) xs
addTrie (Trie m) (x:xs) = 
  case M.lookup x m of
    Nothing     -> let t' = addTrie Nil xs
                   in Trie $ M.insert x (1,t') m
    Just (c,t)  -> let t' = addTrie t xs
                   in Trie $ M.insert x (c+1,t') m

test1 = 
  let t1 = addTrie Nil "abcd"
      t2 = addTrie t1  "abce"
  in putStrLn $ render $ showTrie "" t2

test2 n str =
  putStrLn $ render $ showTrie "" $
      foldr (flip addTrie) Nil (map (take n) (tails str))

test3 = test2 4 "31415926535897932384626433832795028841971693993751058209749445923078164062862089986280348253421170679821480865132823066470938446095505822317253594081284811174502841027019385211055596446229489549303819644288109756659334461284756"