Mercurial > hg > toys
changeset 19:060e561c973a
Add levenshtein fun
author | Jordi Gutiérrez Hermoso <jordigh@octave.org> |
---|---|
date | Fri, 17 Apr 2015 15:29:23 -0400 |
parents | 95ba61c40f5b |
children | 2b8cbff9f9ce |
files | haskell/levenshtein/bbq.hs haskell/levenshtein/brb.hs haskell/levenshtein/lel.hs haskell/levenshtein/lol.hs haskell/levenshtein/omg.hs haskell/levenshtein/wtf.hs |
diffstat | 6 files changed, 155 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
new file mode 100644 --- /dev/null +++ b/haskell/levenshtein/bbq.hs @@ -0,0 +1,33 @@ +import Data.List (intercalate) + +dist :: Eq a => [a] -> [a] -> Int +dist a b + = last (if lab == 0 then mainDiag + else if lab > 0 + then lowers !! (lab - 1) + else uppers !! (-1 - lab)) + where mainDiag = oneDiag a b (head uppers) (-1 : head lowers) + uppers = eachDiag a b (mainDiag : uppers) -- upper diagonals + lowers = eachDiag b a (mainDiag : lowers) -- lower diagonals + eachDiag a [] diags = [] + eachDiag a (bch:bs) (lastDiag:diags) = + oneDiag a bs nextDiag lastDiag : eachDiag a bs diags + where nextDiag = head (tail diags) + oneDiag a b diagAbove diagBelow = thisdiag + where doDiag [] b nw n w = [] + doDiag a [] nw n w = [] + doDiag (ach:as) (bch:bs) nw n w = + me : (doDiag as bs me (tail n) (tail w)) + where me = if ach == bch + then nw + else 1 + min3 (head w) nw (head n) + firstelt = 1 + head diagBelow + thisdiag = + firstelt : doDiag a b firstelt diagAbove (tail diagBelow) + lab = length a - length b + min3 x y z = if x < y then x else min y z + +main = do + let omg = "[" ++ intercalate "," (map show [1..1000]) ++ "]" + let lol = "[" ++ intercalate "," (map show [2..1001]) ++ "]" + print (dist lol omg)
new file mode 100644 --- /dev/null +++ b/haskell/levenshtein/brb.hs @@ -0,0 +1,10 @@ +levenshtein :: Eq a => [a] -> [a] -> Int +levenshtein xs ys = go [0..length xs] ys where + go ts [] = ts !! length xs + go (t:ts) (y:ys) = go (t+1 : go' t (t+1) ts xs) ys where + go' t t' (t'' : ts) (x : xs) = + let t''' = if x == y then t else 1 + min (min t t') t'' + in t''' `seq` t''' : go' t'' t''' ts xs + go' _ _ _ _ = [] + +main = print $ levenshtein (show [1..1000]) (show [2..1001]) \ No newline at end of file
new file mode 100644 --- /dev/null +++ b/haskell/levenshtein/lel.hs @@ -0,0 +1,29 @@ +-- <Cale> JordiGH: When you do, lev1 is the continuation of the C++ code at the top of the outer for loop, and lev2 is the continuation of a point just after the loop test + +import qualified Data.Vector.Unboxed as V +import Data.Vector.Unboxed (Vector, (!)) + +levenshtein :: String -> String -> Int +levenshtein as bs = levenshteinV a b + where a = V.fromList as + b = V.fromList bs + +levenshteinV a b = lev1 a b 1 row + where row = V.generate (V.length b + 1) (\i -> i) + +lev1 a b i row | i <= V.length a = lev2 a b i row + | otherwise = row ! V.length b + +lev2 a b i row = lev1 a b (i+1) row' + where row' = V.unfoldrN (V.length b + 1) + (\(p,j) -> let p' = min (row ! (j-1) + + if a ! (i-1) == b ! (j-1) + then 0 + else 1) + (1 + min (row ! j) p) + in Just (p,(p',j+1))) + (i,1) + +main = print $ levenshtein lol omg + where lol = show [1..1000] + omg = show [2..1001] \ No newline at end of file
new file mode 100644 --- /dev/null +++ b/haskell/levenshtein/lol.hs @@ -0,0 +1,15 @@ +distance :: String -> String -> Int +distance s1 s2 = iter s1 s2 [0..length s2] where + iter (c:cs) s2 row@(e:es) = + iter cs s2 (e' : rest e' c s2 row) where + e' = e + 1 + iter [] _ row = last row + iter _ _ _ = error "iter (distance): unexpected arguments" + rest e c (c2:c2s) (e1:es@(e2:es')) = + seq k (k : rest k c c2s es) where + k = (min (e1 + if c == c2 then 0 else 1) $ + min (e+1) (e2+1)) + rest _ _ [] _ = [] + rest _ _ _ _ = error "rest (distance): unexpected arguments" + +main = print $ distance (show [1..1000]) (show [2..1001]) \ No newline at end of file
new file mode 100644 --- /dev/null +++ b/haskell/levenshtein/omg.hs @@ -0,0 +1,32 @@ +import Control.Monad.ST +import Control.Monad +import Data.STRef +import Data.Array.ST + +levenshtein :: String -> String -> Int +levenshtein a b = runST $ do + let asize = length a + let bsize = length b + prevrow <- newArray_ (0, bsize) :: ST s (STArray s Int Int) + forM_ [0 .. bsize] $ \i -> do + writeArray prevrow i i + thisrow <- newArray (0, bsize) 0 + prevrowp <- newSTRef prevrow + thisrowp <- newSTRef thisrow + forM_ [1 .. asize] $ \i -> do + thisrow <- readSTRef thisrowp + prevrow <- readSTRef prevrowp + writeArray thisrow i 0 + forM_ [1 .. bsize] $ \j -> do + x <- readArray prevrow (j - 1) + y <- readArray prevrow j + z <- readArray thisrow (j - 1) + let t = if (a !! (i - 1)) /= (b !! (j - 1)) then 1 else 0 + writeArray thisrow j $ min (x + t) (1 + min y z) + return () + writeSTRef prevrowp thisrow + writeSTRef thisrowp prevrow + prevrow <- readSTRef prevrowp + readArray prevrow bsize + +main = print $ levenshtein (show [1..1000]) (show [2..1001]) \ No newline at end of file
new file mode 100644 --- /dev/null +++ b/haskell/levenshtein/wtf.hs @@ -0,0 +1,36 @@ +import Control.Applicative +import Control.Monad +import Control.Monad.ST +import Control.Monad.State + +import qualified Data.Vector.Unboxed.Mutable as V +import qualified Data.ByteString as BS +import qualified Data.ByteString.Unsafe as BS +import qualified Data.ByteString.Char8 as BSC + +levenshtein :: BS.ByteString -> BS.ByteString -> Int +levenshtein a b = runST $ do + prevrowInit <- V.new (BS.length b + 1) + thisrowInit <- V.replicate (BS.length b + 1) 0 + + forM_ [0..BS.length b] $ \i -> V.unsafeWrite prevrowInit i i + (_, prevrow) <- flip execStateT (thisrowInit,prevrowInit) $ forM_ [1..BS.length a] $ + \i -> StateT $ \(thisrow, prevrow) -> do + V.unsafeWrite thisrow 0 i + forM_ [1..BS.length b] $ \j -> V.unsafeWrite thisrow j =<< do + let add + | BS.unsafeIndex a (i-1) /= BS.unsafeIndex b (j-1) = 1 + | otherwise = 0 + min <$> fmap (+ add) (V.read prevrow (j-1)) + <*> fmap (+ 1) (min <$> V.unsafeRead prevrow j <*> V.unsafeRead thisrow (j-1)) + return ((), (prevrow, thisrow)) + V.unsafeRead prevrow (BS.length b) + +lol :: BS.ByteString +lol = BSC.pack (show [1..1000]) + +omg :: BS.ByteString +omg = BSC.pack (show [2..1001]) + +main :: IO () +main = print $ levenshtein lol omg \ No newline at end of file