?

Log in

А ежели так?

Jan. 26th, 2010

01:36 am - Spellchecker in Haskell

Inspired by Peter Norvig's Pyton implementation (and discussion in Haskell-Cafe )

Here is my naive String based implementation in Haskell:

{-# LANGUAGE NoMonomorphismRestriction #-}
module Main where

import Data.Maybe
import Data.Char
import Data.List
import Control.Arrow
import qualified Data.Set as S
import Control.Monad
import System.Environment


edits1 = dels `cmb` perms `cmb` reps `cmb` ins
    where
        dels = proc tail (\h t -> [h ++ t])
        perms = proc (init . init) (\hs (t1:t2:ts) -> [hs ++ t2:t1:ts])
        reps = proc init (\hs (_:ts) -> [hs ++ a : ts | a <- alf])
        ins = proc id (\hs ts -> [hs ++ a : ts | a <- alf])

        proc r f = concat . uncurry (zipWith f) <<< inits &&& r . tails
        cmb l r s = l s ++ r s

        alf = ['a'..'z']


edits = con edits1 <<< con edits1 <<< return
    where con f s = s ++ concatMap f s

correct dict w = fromMaybe w $ (`S.member` dict) `find` edits w

main = do
    big:inp:_ <- getArgs
    cont <- readFile big
    test <- readFile inp
    let dict = S.fromList $ words cont
    print $ map (correct dict) $ words test
Runs in 2.5 sec: ./spell big.txt inp.txt +RTS -s ["spelling","became","correct","alms","territory","gutenberg","bowel"] 802,749,812 bytes allocated in the heap 245,621,948 bytes copied during GC 9,474,140 bytes maximum residency (18 sample(s)) 94,160 bytes maximum slop 27 MB total memory in use (0 MB lost due to fragmentation) Generation 0: 1514 collections, 0 parallel, 0.56s, 0.57s elapsed Generation 1: 18 collections, 0 parallel, 0.21s, 0.23s elapsed INIT time 0.00s ( 0.00s elapsed) MUT time 1.75s ( 1.80s elapsed) GC time 0.77s ( 0.80s elapsed) EXIT time 0.00s ( 0.00s elapsed) Total time 2.52s ( 2.60s elapsed) %GC time 30.4% (30.9% elapsed) Alloc rate 458,644,726 bytes per MUT second Productivity 69.5% of total user, 67.2% of total elapsed Can be blindly rewritten using ByteString:
{-# LANGUAGE NoMonomorphismRestriction, OverloadedStrings #-}
module Main where

import Data.Maybe
import Data.Char
import Data.List
import Control.Arrow
import qualified Data.Set as S
import Control.Monad
import System.Environment
import qualified Data.ByteString.Char8 as B


edits1 = dels `cmb` perms `cmb` reps `cmb` ins
    where
        dels = proc tail (\h t -> [h `B.append` t])
        perms = proc (init . init) (\hs ts -> [hs `B.append` ((B.index ts 1) `B.cons` ((B.head ts) `B.cons` ts))])
        reps = proc init (\hs ts -> [hs `B.append` (a `B.cons` (B.tail ts)) | a <- alf])
        ins = proc id (\hs ts -> [hs `B.append` (a `B.cons` ts) | a <- alf])

        proc r f = concat . uncurry (zipWith f) <<< B.inits &&& r . B.tails
        cmb l r s = l s ++ r s

        alf = ['a'..'z']

edits = con edits1 <<< con edits1 <<< return
    where con f s = s ++ concatMap f s

correct dict w = w `fromMaybe` find (`S.member` dict) (edits w)

main = do
    big:inp:_ <- getArgs
    cont <- B.readFile big
    test <- B.readFile inp
    let dict = S.fromList $ B.words cont
    print $ map (correct dict) $ B.words test
Runs slightly faster now (1.2 sec): ./spellB big.txt inp.txt +RTS -s ["spelling","became","correct","alms","territory","gutenberg","bowel"] 387,438,712 bytes allocated in the heap 89,389,660 bytes copied during GC 9,511,888 bytes maximum residency (5 sample(s)) 953,052 bytes maximum slop 23 MB total memory in use (0 MB lost due to fragmentation) Generation 0: 720 collections, 0 parallel, 0.22s, 0.23s elapsed Generation 1: 5 collections, 0 parallel, 0.03s, 0.03s elapsed INIT time 0.00s ( 0.00s elapsed) MUT time 0.99s ( 1.00s elapsed) GC time 0.25s ( 0.27s elapsed) EXIT time 0.00s ( 0.00s elapsed) Total time 1.23s ( 1.27s elapsed) %GC time 20.0% (20.9% elapsed) Alloc rate 392,926,719 bytes per MUT second Productivity 79.9% of total user, 77.6% of total elapsed

Nov. 30th, 2007

02:59 pm - Мое решение задачи "K" на Haskell

Условие задачи "K": CircRef

module Main
 where

import List
import Monad
import Char
import Control.Monad.Identity
import Control.Monad.State
import Control.Monad.Error
import Control.Monad.Reader
import System
import qualified Data.Map as Map
import Text.ParserCombinators.Parsec

type BinOp a = a -> a -> (Either String a)

type Env k n s = Map.Map k (Expr k n s)
                                                
data (Num n, Ord k) => Expr k n s = Lit n
					  | Ref k
					  | Fun2 (BinOp n) (Expr k n s) (Expr k n s)
					  | Const s
					  | Error String

data (Num n) => Result n s = Value n
			   | Str s

type Eval n s = ReaderT (Env String n s) (ErrorT String (StateT [String] Identity)) (Result n s)

runEval env st ev = fst $ runIdentity (runStateT (runErrorT (runReaderT ev env)) st)

eval :: (Num n) => (Expr String n s) -> (Eval n s)

eval (Lit v) = return $ Value v

eval (Const c) = return $ Str c

eval (Error e) = throwError e

eval (Ref r) = do env <- ask
		  case Map.lookup r env of
			Nothing   -> throwError ("NotFound:" ++ r)
			(Just rr) -> do s <- get
					case elem r s of
						True  -> throwError ("CircRef:" ++ (foldr1 ((++).(++"-")) (r:s)))
						False -> do modify (r:)
							    st <- get
							    case runEval env st (eval $ rr) of
								Left e -> throwError e
								Right v -> return v

eval (Fun2 op expr1 expr2) = do (Value arg1) <- toValue expr1
				(Value arg2) <- toValue expr2
				case arg1 `op` arg2 of
					Left e  -> throwError e
					Right v -> return $ Value v

toValue :: (Num n) => (Expr String n s) -> (Eval n s)
toValue expr = do env <-ask
		  st  <- get
	 	  case runEval env st $ eval expr of
			Left e -> throwError e
			Right v@(Value r) -> return v
			_ -> throwError "ArgError"

cell = do{ v <- value <|> constant <|> do{ char '=' ; expression } ; eof ; return v }

expression =  chainl1 term fun <|> term
	
term = cellRef <|> value <|> constant

value = do{ ds <- (many1 digit);return (Lit $ read ds) }

constant = do{ char '\''; s <- (many1 anyChar); return (Const s) }

fun::GenParser Char st (Expr [Char] Integer [Char] -> Expr [Char] Integer [Char] -> Expr [Char] Integer [Char])
fun = 	do f <- oneOf "+-*"
	   case lookup f [('+',(+)),('-',(-)),('*',(*))] of
		Just op -> return $ Fun2 (\x y -> Right (x `op` y))
	<|> do{ oneOf "/" ; return $ Fun2 (\x y -> if y == 0 then (Left "DivByZero") else Right (x `div` y))}

cellTag = do{ l <- (many1 letter);n <- many1 digit;return (l++n) }

cellRef = do{ r <- cellTag;return (Ref r)}



parseCell s = case parse cell s s of
		Left e -> Error "ParseError"
		Right c -> c

evalExpr e s = case runEval s [] $ eval e of
			Left e -> '#':e
			Right r -> case r of
					Value s -> show s
					Str s -> s

main = do flnm <- getArgs
	  cont <- readFile $ head flnm
	  let inp = map (\(n,l)-> zip [[a,n]|a<-['A'..]] $ map parseCell (words l)) $ zip ['1'..] $ tail $ lines cont
	  let evt = Map.fromList $ concat inp
	  putStr $ concatMap (\c -> if c==' ' then "\t\t" else [c]) $ unlines $ map unwords $ map (\l -> map (\(t,e) -> evalExpr e evt) l) inp

Update: Причесал немного код и подправил парсинг (использует теперь buildExpressionParser)