{-# LANGUAGE BangPatterns #-}
module AhoCorasick
( TrieAC
, emptyTAC
, insertTAC
, fromListTAC
, ACRoot
, fromTrieAC
, matchAC
) where
import Control.Applicative
import Control.DeepSeq
import Data.List
import Data.Maybe
import qualified Data.ByteString as B
import qualified Data.IntMap.Strict as IM
data ACRoot a = ACRoot !(IM.IntMap (ACNode a)) [a]
data ACNode a = ACNode !(IM.IntMap (ACNode a)) (Maybe (ACNode a)) [a]
fromTrieAC :: TrieAC a -> ACRoot a
fromTrieAC :: TrieAC a -> ACRoot a
fromTrieAC (TrieAC IntMap (TrieAC a)
tm [a]
routs) = IntMap (ACNode a) -> [a] -> ACRoot a
forall a. IntMap (ACNode a) -> [a] -> ACRoot a
ACRoot IntMap (ACNode a)
rmp [a]
routs where
rmp :: IntMap (ACNode a)
rmp = (TrieAC a -> ACNode a) -> IntMap (TrieAC a) -> IntMap (ACNode a)
forall a b. (a -> b) -> IntMap a -> IntMap b
IM.map TrieAC a -> ACNode a
go1 IntMap (TrieAC a)
tm
go1 :: TrieAC a -> ACNode a
go1 (TrieAC IntMap (TrieAC a)
m [a]
vs) = IntMap (ACNode a) -> Maybe (ACNode a) -> [a] -> ACNode a
forall a. IntMap (ACNode a) -> Maybe (ACNode a) -> [a] -> ACNode a
ACNode ((Key -> TrieAC a -> ACNode a)
-> IntMap (TrieAC a) -> IntMap (ACNode a)
forall a b. (Key -> a -> b) -> IntMap a -> IntMap b
IM.mapWithKey (Maybe (ACNode a) -> Key -> TrieAC a -> ACNode a
go Maybe (ACNode a)
forall a. Maybe a
Nothing) IntMap (TrieAC a)
m) Maybe (ACNode a)
forall a. Maybe a
Nothing ([a]
vs [a] -> [a] -> [a]
forall a. [a] -> [a] -> [a]
++ [a]
routs)
go :: Maybe (ACNode a) -> Key -> TrieAC a -> ACNode a
go Maybe (ACNode a)
psuf !Key
c (TrieAC IntMap (TrieAC a)
m [a]
vs) = IntMap (ACNode a) -> Maybe (ACNode a) -> [a] -> ACNode a
forall a. IntMap (ACNode a) -> Maybe (ACNode a) -> [a] -> ACNode a
ACNode ((Key -> TrieAC a -> ACNode a)
-> IntMap (TrieAC a) -> IntMap (ACNode a)
forall a b. (Key -> a -> b) -> IntMap a -> IntMap b
IM.mapWithKey (Maybe (ACNode a) -> Key -> TrieAC a -> ACNode a
go Maybe (ACNode a)
suf) IntMap (TrieAC a)
m) Maybe (ACNode a)
suf [a]
outs where
suf :: Maybe (ACNode a)
suf = Maybe (ACNode a) -> Maybe (ACNode a)
getSuf Maybe (ACNode a)
psuf
getSuf :: Maybe (ACNode a) -> Maybe (ACNode a)
getSuf Maybe (ACNode a)
Nothing = Key -> IntMap (ACNode a) -> Maybe (ACNode a)
forall a. Key -> IntMap a -> Maybe a
IM.lookup Key
c IntMap (ACNode a)
rmp
getSuf (Just (ACNode IntMap (ACNode a)
mp' Maybe (ACNode a)
suf' [a]
_)) = Key -> IntMap (ACNode a) -> Maybe (ACNode a)
forall a. Key -> IntMap a -> Maybe a
IM.lookup Key
c IntMap (ACNode a)
mp' Maybe (ACNode a) -> Maybe (ACNode a) -> Maybe (ACNode a)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Maybe (ACNode a) -> Maybe (ACNode a)
getSuf Maybe (ACNode a)
suf'
outs :: [a]
outs = [a]
vs [a] -> [a] -> [a]
forall a. [a] -> [a] -> [a]
++ [a] -> (ACNode a -> [a]) -> Maybe (ACNode a) -> [a]
forall b a. b -> (a -> b) -> Maybe a -> b
maybe [a]
routs (\(ACNode IntMap (ACNode a)
_ Maybe (ACNode a)
_ [a]
outs') -> [a]
outs') Maybe (ACNode a)
suf
matchAC :: ACRoot a -> B.ByteString -> [[a]]
matchAC :: ACRoot a -> ByteString -> [[a]]
matchAC (ACRoot IntMap (ACNode a)
rmp [a]
routs) !ByteString
s0 = [a]
routs [a] -> [[a]] -> [[a]]
forall a. a -> [a] -> [a]
: ByteString -> [[a]]
gor ByteString
s0 where
gor :: ByteString -> [[a]]
gor ByteString
s = case ByteString -> Maybe (Word8, ByteString)
B.uncons ByteString
s of
Maybe (Word8, ByteString)
Nothing -> []
Just (Word8
c,ByteString
s') -> case Key -> IntMap (ACNode a) -> Maybe (ACNode a)
forall a. Key -> IntMap a -> Maybe a
IM.lookup (Word8 -> Key
forall a. Enum a => a -> Key
fromEnum Word8
c) IntMap (ACNode a)
rmp of
Maybe (ACNode a)
Nothing -> [a]
routs [a] -> [[a]] -> [[a]]
forall a. a -> [a] -> [a]
: ByteString -> [[a]]
gor ByteString
s'
Just (ACNode IntMap (ACNode a)
mp Maybe (ACNode a)
suf [a]
outs) -> [a]
outs [a] -> [[a]] -> [[a]]
forall a. a -> [a] -> [a]
: IntMap (ACNode a) -> Maybe (ACNode a) -> ByteString -> [[a]]
go IntMap (ACNode a)
mp Maybe (ACNode a)
suf ByteString
s'
go :: IntMap (ACNode a) -> Maybe (ACNode a) -> ByteString -> [[a]]
go IntMap (ACNode a)
mp Maybe (ACNode a)
suf ByteString
s = case ByteString -> Maybe (Word8, ByteString)
B.uncons ByteString
s of
Maybe (Word8, ByteString)
Nothing -> []
Just (Word8
c, ByteString
s') -> case Key -> IntMap (ACNode a) -> Maybe (ACNode a)
forall a. Key -> IntMap a -> Maybe a
IM.lookup (Word8 -> Key
forall a. Enum a => a -> Key
fromEnum Word8
c) IntMap (ACNode a)
mp of
Maybe (ACNode a)
Nothing -> (ByteString -> [[a]])
-> (ACNode a -> ByteString -> [[a]])
-> Maybe (ACNode a)
-> ByteString
-> [[a]]
forall b a. b -> (a -> b) -> Maybe a -> b
maybe ByteString -> [[a]]
gor (\(ACNode IntMap (ACNode a)
mp' Maybe (ACNode a)
suf' [a]
_) -> IntMap (ACNode a) -> Maybe (ACNode a) -> ByteString -> [[a]]
go IntMap (ACNode a)
mp' Maybe (ACNode a)
suf') Maybe (ACNode a)
suf ByteString
s
Just (ACNode IntMap (ACNode a)
mp' Maybe (ACNode a)
suf' [a]
outs) -> [a]
outs [a] -> [[a]] -> [[a]]
forall a. a -> [a] -> [a]
: IntMap (ACNode a) -> Maybe (ACNode a) -> ByteString -> [[a]]
go IntMap (ACNode a)
mp' Maybe (ACNode a)
suf' ByteString
s'
data TrieAC a = TrieAC !(IM.IntMap (TrieAC a)) ![a] deriving Key -> TrieAC a -> ShowS
[TrieAC a] -> ShowS
TrieAC a -> String
(Key -> TrieAC a -> ShowS)
-> (TrieAC a -> String) -> ([TrieAC a] -> ShowS) -> Show (TrieAC a)
forall a. Show a => Key -> TrieAC a -> ShowS
forall a. Show a => [TrieAC a] -> ShowS
forall a. Show a => TrieAC a -> String
forall a.
(Key -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TrieAC a] -> ShowS
$cshowList :: forall a. Show a => [TrieAC a] -> ShowS
show :: TrieAC a -> String
$cshow :: forall a. Show a => TrieAC a -> String
showsPrec :: Key -> TrieAC a -> ShowS
$cshowsPrec :: forall a. Show a => Key -> TrieAC a -> ShowS
Show
emptyTAC :: TrieAC a
emptyTAC :: TrieAC a
emptyTAC = IntMap (TrieAC a) -> [a] -> TrieAC a
forall a. IntMap (TrieAC a) -> [a] -> TrieAC a
TrieAC IntMap (TrieAC a)
forall a. IntMap a
IM.empty []
insertTAC :: B.ByteString -> a -> TrieAC a -> TrieAC a
insertTAC :: ByteString -> a -> TrieAC a -> TrieAC a
insertTAC ByteString
s a
v = ByteString -> TrieAC a -> TrieAC a
go ByteString
s where
go :: ByteString -> TrieAC a -> TrieAC a
go ByteString
cs (TrieAC IntMap (TrieAC a)
m [a]
vs) = case ByteString -> Maybe (Word8, ByteString)
B.uncons ByteString
cs of
Maybe (Word8, ByteString)
Nothing -> IntMap (TrieAC a) -> [a] -> TrieAC a
forall a. IntMap (TrieAC a) -> [a] -> TrieAC a
TrieAC IntMap (TrieAC a)
m (a
va -> [a] -> [a]
forall a. a -> [a] -> [a]
:[a]
vs)
Just (Word8
c, ByteString
cs') -> IntMap (TrieAC a) -> [a] -> TrieAC a
forall a. IntMap (TrieAC a) -> [a] -> TrieAC a
TrieAC IntMap (TrieAC a)
m' [a]
vs where
m' :: IntMap (TrieAC a)
m' = (Maybe (TrieAC a) -> Maybe (TrieAC a))
-> Key -> IntMap (TrieAC a) -> IntMap (TrieAC a)
forall a. (Maybe a -> Maybe a) -> Key -> IntMap a -> IntMap a
IM.alter ((TrieAC a -> Maybe (TrieAC a)
forall a. a -> Maybe a
Just (TrieAC a -> Maybe (TrieAC a)) -> TrieAC a -> Maybe (TrieAC a)
forall a b. (a -> b) -> a -> b
$!) (TrieAC a -> Maybe (TrieAC a))
-> (Maybe (TrieAC a) -> TrieAC a)
-> Maybe (TrieAC a)
-> Maybe (TrieAC a)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> TrieAC a -> TrieAC a
go ByteString
cs' (TrieAC a -> TrieAC a)
-> (Maybe (TrieAC a) -> TrieAC a) -> Maybe (TrieAC a) -> TrieAC a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TrieAC a -> Maybe (TrieAC a) -> TrieAC a
forall a. a -> Maybe a -> a
fromMaybe TrieAC a
forall a. TrieAC a
emptyTAC) (Word8 -> Key
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
c) IntMap (TrieAC a)
m
fromListTAC :: [(B.ByteString, a)] -> TrieAC a
fromListTAC :: [(ByteString, a)] -> TrieAC a
fromListTAC = (TrieAC a -> (ByteString, a) -> TrieAC a)
-> TrieAC a -> [(ByteString, a)] -> TrieAC a
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\TrieAC a
t (ByteString
s, a
v) -> ByteString -> a -> TrieAC a -> TrieAC a
forall a. ByteString -> a -> TrieAC a -> TrieAC a
insertTAC ByteString
s a
v TrieAC a
t) TrieAC a
forall a. TrieAC a
emptyTAC
instance NFData a => NFData (ACNode a) where
rnf :: ACNode a -> ()
rnf (ACNode IntMap (ACNode a)
mp Maybe (ACNode a)
_outs [a]
suf) = [a]
suf [a] -> () -> ()
`seq` IntMap (ACNode a) -> ()
forall a. NFData a => a -> ()
rnf IntMap (ACNode a)
mp
instance NFData a => NFData (ACRoot a) where
rnf :: ACRoot a -> ()
rnf (ACRoot IntMap (ACNode a)
mp [a]
_outs) = IntMap (ACNode a) -> ()
forall a. NFData a => a -> ()
rnf IntMap (ACNode a)
mp