changeset 19:060e561c973a

Add levenshtein fun
author Jordi Gutiérrez Hermoso <jordigh@octave.org>
date Fri, 17 Apr 2015 15:29:23 -0400
parents 95ba61c40f5b
children 2b8cbff9f9ce
files haskell/levenshtein/bbq.hs haskell/levenshtein/brb.hs haskell/levenshtein/lel.hs haskell/levenshtein/lol.hs haskell/levenshtein/omg.hs haskell/levenshtein/wtf.hs
diffstat 6 files changed, 155 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
new file mode 100644
--- /dev/null
+++ b/haskell/levenshtein/bbq.hs
@@ -0,0 +1,33 @@
+import Data.List (intercalate)
+
+dist :: Eq a => [a] -> [a] -> Int
+dist a b
+    = last (if lab == 0 then mainDiag
+            else if lab > 0 
+                 then lowers !! (lab - 1)
+                 else uppers !! (-1 - lab))
+    where mainDiag = oneDiag a b (head uppers) (-1 : head lowers)
+          uppers = eachDiag a b (mainDiag : uppers) -- upper diagonals
+          lowers = eachDiag b a (mainDiag : lowers) -- lower diagonals
+          eachDiag a [] diags = []
+          eachDiag a (bch:bs) (lastDiag:diags) = 
+            oneDiag a bs nextDiag lastDiag : eachDiag a bs diags
+              where nextDiag = head (tail diags)
+          oneDiag a b diagAbove diagBelow = thisdiag
+              where doDiag [] b nw n w = []
+                    doDiag a [] nw n w = []
+                    doDiag (ach:as) (bch:bs) nw n w = 
+                      me : (doDiag as bs me (tail n) (tail w))
+                        where me = if ach == bch 
+                                   then nw 
+                                   else 1 + min3 (head w) nw (head n)
+                    firstelt = 1 + head diagBelow
+                    thisdiag = 
+                      firstelt : doDiag a b firstelt diagAbove (tail diagBelow)
+          lab = length a - length b
+          min3 x y z = if x < y then x else min y z
+
+main = do
+        let omg = "[" ++ intercalate "," (map show [1..1000]) ++ "]"
+        let lol = "[" ++ intercalate "," (map show [2..1001]) ++ "]"
+        print (dist lol omg)
new file mode 100644
--- /dev/null
+++ b/haskell/levenshtein/brb.hs
@@ -0,0 +1,10 @@
+levenshtein :: Eq a => [a] -> [a] -> Int
+levenshtein xs ys = go [0..length xs] ys where
+    go ts [] = ts !! length xs
+    go (t:ts) (y:ys) = go (t+1 : go' t (t+1) ts xs) ys where
+        go' t t' (t'' : ts) (x : xs) =
+             let t''' = if x == y then t else 1 + min (min t t') t''
+             in  t''' `seq` t''' : go' t'' t''' ts xs
+        go' _ _ _ _ = []
+
+main = print $ levenshtein (show [1..1000]) (show [2..1001])
\ No newline at end of file
new file mode 100644
--- /dev/null
+++ b/haskell/levenshtein/lel.hs
@@ -0,0 +1,29 @@
+-- <Cale> JordiGH: When you do, lev1 is the continuation of the C++ code at the top of the outer for loop, and lev2 is the continuation of a point just after the loop test
+
+import qualified Data.Vector.Unboxed as V
+import Data.Vector.Unboxed (Vector, (!))
+
+levenshtein :: String -> String -> Int
+levenshtein as bs = levenshteinV a b
+  where a = V.fromList as
+        b = V.fromList bs
+
+levenshteinV a b = lev1 a b 1 row
+  where row = V.generate (V.length b + 1) (\i -> i)
+
+lev1 a b i row | i <= V.length a = lev2 a b i row
+               | otherwise       = row ! V.length b
+
+lev2 a b i row = lev1 a b (i+1) row'
+  where row' = V.unfoldrN (V.length b + 1)
+               (\(p,j) -> let p' = min (row ! (j-1) + 
+                                        if a ! (i-1) == b ! (j-1) 
+                                        then 0 
+                                        else 1)
+                                   (1 + min (row ! j) p)
+                          in Just (p,(p',j+1)))
+        (i,1)
+
+main = print $ levenshtein lol omg
+  where lol = show [1..1000]
+        omg = show [2..1001]
\ No newline at end of file
new file mode 100644
--- /dev/null
+++ b/haskell/levenshtein/lol.hs
@@ -0,0 +1,15 @@
+distance :: String -> String -> Int
+distance s1 s2 = iter s1 s2 [0..length s2] where
+        iter (c:cs) s2 row@(e:es) =
+                iter cs s2 (e' : rest e' c s2 row) where
+                        e' = e + 1
+        iter [] _ row = last row
+        iter _ _ _ = error "iter (distance): unexpected arguments"
+        rest e c (c2:c2s) (e1:es@(e2:es')) =
+                seq k (k : rest k c c2s es) where
+                        k = (min (e1 + if c == c2 then 0 else 1) $
+                                min (e+1) (e2+1))
+        rest _ _ [] _ = []
+        rest _ _ _ _ = error "rest (distance): unexpected arguments"
+        
+main = print $ distance (show [1..1000]) (show [2..1001])
\ No newline at end of file
new file mode 100644
--- /dev/null
+++ b/haskell/levenshtein/omg.hs
@@ -0,0 +1,32 @@
+import Control.Monad.ST
+import Control.Monad
+import Data.STRef
+import Data.Array.ST
+
+levenshtein :: String -> String -> Int
+levenshtein a b = runST $ do
+    let asize = length a
+    let bsize = length b
+    prevrow <- newArray_ (0, bsize) :: ST s (STArray s Int Int)
+    forM_ [0 .. bsize] $ \i -> do
+        writeArray prevrow i i
+    thisrow <- newArray (0, bsize) 0
+    prevrowp <- newSTRef prevrow
+    thisrowp <- newSTRef thisrow
+    forM_ [1 .. asize] $ \i -> do
+        thisrow <- readSTRef thisrowp
+        prevrow <- readSTRef prevrowp
+        writeArray thisrow i 0
+        forM_ [1 .. bsize] $ \j -> do
+            x <- readArray prevrow (j - 1)
+            y <- readArray prevrow j
+            z <- readArray thisrow (j - 1)
+            let t = if (a !! (i - 1)) /= (b !! (j - 1)) then 1 else 0
+            writeArray thisrow j $ min (x + t) (1 + min y z)
+            return ()
+        writeSTRef prevrowp thisrow
+        writeSTRef thisrowp prevrow
+    prevrow <- readSTRef prevrowp
+    readArray prevrow bsize
+    
+main = print $ levenshtein (show [1..1000]) (show [2..1001]) 
\ No newline at end of file
new file mode 100644
--- /dev/null
+++ b/haskell/levenshtein/wtf.hs
@@ -0,0 +1,36 @@
+import Control.Applicative
+import Control.Monad
+import Control.Monad.ST
+import Control.Monad.State
+
+import qualified Data.Vector.Unboxed.Mutable as V
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.Unsafe as BS
+import qualified Data.ByteString.Char8 as BSC
+
+levenshtein :: BS.ByteString -> BS.ByteString -> Int
+levenshtein a b = runST $ do
+  prevrowInit <- V.new (BS.length b + 1)
+  thisrowInit <- V.replicate (BS.length b + 1) 0
+  
+  forM_ [0..BS.length b] $ \i -> V.unsafeWrite prevrowInit i i
+  (_, prevrow) <- flip execStateT (thisrowInit,prevrowInit) $ forM_ [1..BS.length a] $
+   \i -> StateT $ \(thisrow, prevrow) -> do     
+    V.unsafeWrite thisrow 0 i
+    forM_ [1..BS.length b] $ \j -> V.unsafeWrite thisrow j =<< do
+      let add
+            | BS.unsafeIndex a (i-1) /= BS.unsafeIndex b (j-1) = 1
+            | otherwise = 0
+      min <$> fmap (+ add) (V.read prevrow (j-1))
+          <*> fmap (+ 1) (min <$> V.unsafeRead prevrow j <*> V.unsafeRead thisrow (j-1))
+    return ((), (prevrow, thisrow))
+  V.unsafeRead prevrow (BS.length b)
+
+lol :: BS.ByteString
+lol = BSC.pack (show [1..1000])
+
+omg :: BS.ByteString
+omg = BSC.pack (show [2..1001])
+
+main :: IO ()
+main = print $ levenshtein lol omg
\ No newline at end of file