{-# LANGUAGE GeneralizedNewtypeDeriving #-}
-- | See <http://pubs.opengroup.org/onlinepubs/9699919799/utilities/awk.html> for the
-- full awk grammar.
module Test.Tasty.Patterns.Parser
  ( Parser
  , runParser
  , ParseResult(..)
  , expr
  , parseAwkExpr
  )
  where

import Prelude hiding (Ordering(..))
import Text.ParserCombinators.ReadP hiding (many, optional)
import Text.ParserCombinators.ReadPrec (readPrec_to_P, minPrec)
import Text.Read (readPrec)
import Data.Functor
import Data.Char
import Control.Applicative
import Control.Monad
import Test.Tasty.Patterns.Types
import Test.Tasty.Patterns.Expr

type Token = ReadP

-- | A separate 'Parser' data type ensures that we don't forget to skip
-- spaces.
newtype Parser a = Parser (ReadP a)
  deriving (forall a b. a -> Parser b -> Parser a
forall a b. (a -> b) -> Parser a -> Parser b
forall (f :: * -> *).
(forall a b. (a -> b) -> f a -> f b)
-> (forall a b. a -> f b -> f a) -> Functor f
<$ :: forall a b. a -> Parser b -> Parser a
$c<$ :: forall a b. a -> Parser b -> Parser a
fmap :: forall a b. (a -> b) -> Parser a -> Parser b
$cfmap :: forall a b. (a -> b) -> Parser a -> Parser b
Functor, Functor Parser
forall a. a -> Parser a
forall a b. Parser a -> Parser b -> Parser a
forall a b. Parser a -> Parser b -> Parser b
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall a b c. (a -> b -> c) -> Parser a -> Parser b -> Parser c
forall (f :: * -> *).
Functor f
-> (forall a. a -> f a)
-> (forall a b. f (a -> b) -> f a -> f b)
-> (forall a b c. (a -> b -> c) -> f a -> f b -> f c)
-> (forall a b. f a -> f b -> f b)
-> (forall a b. f a -> f b -> f a)
-> Applicative f
<* :: forall a b. Parser a -> Parser b -> Parser a
$c<* :: forall a b. Parser a -> Parser b -> Parser a
*> :: forall a b. Parser a -> Parser b -> Parser b
$c*> :: forall a b. Parser a -> Parser b -> Parser b
liftA2 :: forall a b c. (a -> b -> c) -> Parser a -> Parser b -> Parser c
$cliftA2 :: forall a b c. (a -> b -> c) -> Parser a -> Parser b -> Parser c
<*> :: forall a b. Parser (a -> b) -> Parser a -> Parser b
$c<*> :: forall a b. Parser (a -> b) -> Parser a -> Parser b
pure :: forall a. a -> Parser a
$cpure :: forall a. a -> Parser a
Applicative, Applicative Parser
forall a. Parser a
forall a. Parser a -> Parser [a]
forall a. Parser a -> Parser a -> Parser a
forall (f :: * -> *).
Applicative f
-> (forall a. f a)
-> (forall a. f a -> f a -> f a)
-> (forall a. f a -> f [a])
-> (forall a. f a -> f [a])
-> Alternative f
many :: forall a. Parser a -> Parser [a]
$cmany :: forall a. Parser a -> Parser [a]
some :: forall a. Parser a -> Parser [a]
$csome :: forall a. Parser a -> Parser [a]
<|> :: forall a. Parser a -> Parser a -> Parser a
$c<|> :: forall a. Parser a -> Parser a -> Parser a
empty :: forall a. Parser a
$cempty :: forall a. Parser a
Alternative, Applicative Parser
forall a. a -> Parser a
forall a b. Parser a -> Parser b -> Parser b
forall a b. Parser a -> (a -> Parser b) -> Parser b
forall (m :: * -> *).
Applicative m
-> (forall a b. m a -> (a -> m b) -> m b)
-> (forall a b. m a -> m b -> m b)
-> (forall a. a -> m a)
-> Monad m
return :: forall a. a -> Parser a
$creturn :: forall a. a -> Parser a
>> :: forall a b. Parser a -> Parser b -> Parser b
$c>> :: forall a b. Parser a -> Parser b -> Parser b
>>= :: forall a b. Parser a -> (a -> Parser b) -> Parser b
$c>>= :: forall a b. Parser a -> (a -> Parser b) -> Parser b
Monad, Monad Parser
Alternative Parser
forall a. Parser a
forall a. Parser a -> Parser a -> Parser a
forall (m :: * -> *).
Alternative m
-> Monad m
-> (forall a. m a)
-> (forall a. m a -> m a -> m a)
-> MonadPlus m
mplus :: forall a. Parser a -> Parser a -> Parser a
$cmplus :: forall a. Parser a -> Parser a -> Parser a
mzero :: forall a. Parser a
$cmzero :: forall a. Parser a
MonadPlus)

data ParseResult a = Success a | Invalid | Ambiguous [a]
  deriving (ParseResult a -> ParseResult a -> Bool
forall a. Eq a => ParseResult a -> ParseResult a -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: ParseResult a -> ParseResult a -> Bool
$c/= :: forall a. Eq a => ParseResult a -> ParseResult a -> Bool
== :: ParseResult a -> ParseResult a -> Bool
$c== :: forall a. Eq a => ParseResult a -> ParseResult a -> Bool
Eq, Int -> ParseResult a -> ShowS
forall a. Show a => Int -> ParseResult a -> ShowS
forall a. Show a => [ParseResult a] -> ShowS
forall a. Show a => ParseResult a -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [ParseResult a] -> ShowS
$cshowList :: forall a. Show a => [ParseResult a] -> ShowS
show :: ParseResult a -> String
$cshow :: forall a. Show a => ParseResult a -> String
showsPrec :: Int -> ParseResult a -> ShowS
$cshowsPrec :: forall a. Show a => Int -> ParseResult a -> ShowS
Show)

token :: Token a -> Parser a
token :: forall a. Token a -> Parser a
token Token a
a = forall a. Token a -> Parser a
Parser (Token a
a forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* ReadP ()
skipSpaces)

sym :: Char -> Parser ()
sym :: Char -> Parser ()
sym = forall (f :: * -> *) a. Functor f => f a -> f ()
void forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Token a -> Parser a
token forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> ReadP Char
char

str :: String -> Parser ()
str :: String -> Parser ()
str = forall (f :: * -> *) a. Functor f => f a -> f ()
void forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Token a -> Parser a
token forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ReadP String
string

-- | Run a parser
runParser
  :: Parser a
  -> String -- ^ text to parse
  -> ParseResult a
runParser :: forall a. Parser a -> String -> ParseResult a
runParser (Parser ReadP a
p) String
s =
  case forall a. (a -> Bool) -> [a] -> [a]
filter (forall (t :: * -> *) a. Foldable t => t a -> Bool
null forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> b
snd) forall a b. (a -> b) -> a -> b
$ forall a. ReadP a -> ReadS a
readP_to_S (ReadP ()
skipSpaces forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ReadP a
p) String
s of
    [(a
a, String
_)] -> forall a. a -> ParseResult a
Success a
a
    [] -> forall a. ParseResult a
Invalid
    [(a, String)]
as -> forall a. [a] -> ParseResult a
Ambiguous (forall a b. (a, b) -> a
fst forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [(a, String)]
as)

intP :: Parser Int
intP :: Parser Int
intP = forall a. Token a -> Parser a
token forall a b. (a -> b) -> a -> b
$
  -- we cannot use the standard Int ReadP parser because it recognizes
  -- negative numbers, making -1 ambiguous
  forall a. Read a => String -> a
read forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> ReadP String
munch1 Char -> Bool
isDigit

strP :: Parser String
strP :: Parser String
strP = forall a. Token a -> Parser a
token forall a b. (a -> b) -> a -> b
$ forall a. ReadPrec a -> Int -> ReadP a
readPrec_to_P forall a. Read a => ReadPrec a
readPrec Int
minPrec
  -- this deviates somewhat from the awk string literals, by design

-- | An awk ERE token such as @/foo/@. No special characters are recognized
-- at the moment, except @\@ as an escape character for @/@ and itself.
patP :: Parser String
patP :: Parser String
patP = forall a. Token a -> Parser a
token forall a b. (a -> b) -> a -> b
$ Char -> ReadP Char
char Char
'/' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Alternative f => f a -> f [a]
many ReadP Char
ch forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> ReadP Char
char Char
'/'
  where
    ch :: ReadP Char
ch =
      (Char -> Bool) -> ReadP Char
satisfy (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` String
"/\\") forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
      (Char -> ReadP Char
char Char
'\\' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Char -> Bool) -> ReadP Char
satisfy (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` String
"/\\"))

nfP :: Parser ()
nfP :: Parser ()
nfP = forall a. Token a -> Parser a
token forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. Functor f => f a -> f ()
void forall a b. (a -> b) -> a -> b
$ String -> ReadP String
string String
"NF"

-- | Built-in functions
builtin :: Parser Expr
builtin :: Parser Expr
builtin = forall (t :: * -> *) (m :: * -> *) a.
(Foldable t, MonadPlus m) =>
t (m a) -> m a
msum
  [ forall a. String -> Parser a -> Parser a
fn String
"length" forall a b. (a -> b) -> a -> b
$ Maybe Expr -> Expr
LengthFn forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser Expr
expr
    -- we don't support length without parentheses at all,
    -- because that makes length($1) ambiguous
    -- (we don't require spaces for concatenation)
  , forall a. String -> Parser a -> Parser a
fn String
"toupper" forall a b. (a -> b) -> a -> b
$ Expr -> Expr
ToUpperFn forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Expr
expr
  , forall a. String -> Parser a -> Parser a
fn String
"tolower" forall a b. (a -> b) -> a -> b
$ Expr -> Expr
ToLowerFn forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Expr
expr
  , forall a. String -> Parser a -> Parser a
fn String
"match" forall a b. (a -> b) -> a -> b
$ Expr -> String -> Expr
MatchFn forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Expr
expr forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser ()
sym Char
',' forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser String
patP
  , forall a. String -> Parser a -> Parser a
fn String
"substr" forall a b. (a -> b) -> a -> b
$ Expr -> Expr -> Maybe Expr -> Expr
SubstrFn forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Expr
expr forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser ()
sym Char
',' forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser Expr
expr forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*>
      forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional (Char -> Parser ()
sym Char
',' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Expr
expr)
  ]
  where
    fn :: String -> Parser a -> Parser a
    fn :: forall a. String -> Parser a -> Parser a
fn String
name Parser a
args = forall a. Token a -> Parser a
token (String -> ReadP String
string String
name) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Char -> Parser ()
sym Char
'(' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser a
args forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser ()
sym Char
')'

-- | Atomic expressions
expr0 :: Parser Expr
expr0 :: Parser Expr
expr0 =
  (Char -> Parser ()
sym Char
'(' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Expr
expr forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser ()
sym Char
')') forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
  (Int -> Expr
IntLit forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Int
intP) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
  (String -> Expr
StringLit forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser String
strP) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
  (String -> Expr
ERE forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser String
patP) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
  (Expr
NF forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Parser ()
nfP) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
  Parser Expr
builtin

-- | Arguments to unary operators: atomic expressions and field
-- expressions
expr1 :: Parser Expr
expr1 :: Parser Expr
expr1 = forall (m :: * -> *) a.
MonadPlus m =>
m a -> [[Operator m a]] -> m a
makeExprParser Parser Expr
expr0
  [ [ forall (m :: * -> *) a. m (a -> a) -> Operator m a
Prefix  (Expr -> Expr
Field forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Char -> Parser ()
sym Char
'$') ] ]

-- | Whether a parser is unary or non-unary.
--
-- This roughly corresponds to the @unary_expr@ and @non_unary_expr@
-- non-terminals in the awk grammar.
-- (Why roughly? See 'expr2'.)
data Unary = Unary | NonUnary

-- | Arithmetic expressions.
--
-- Unlike awk, non-unary expressions disallow unary operators everywhere,
-- not just in the leading position, to avoid extra complexity in
-- 'makeExprParser'.
--
-- For example, the expression
--
-- >1 3 + -4
--
-- is valid in awk because @3 + -4@ is non-unary, but we disallow it here
-- because 'makeExprParser' does not allow us to distinguish it from
--
-- >1 -4 + 3
--
-- which is ambiguous.
expr2 :: Unary -> Parser Expr
expr2 :: Unary -> Parser Expr
expr2 Unary
unary = forall (m :: * -> *) a.
MonadPlus m =>
m a -> [[Operator m a]] -> m a
makeExprParser Parser Expr
expr1
  [ [ forall (m :: * -> *) a. m (a -> a) -> Operator m a
Prefix  (Expr -> Expr
Not forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Char -> Parser ()
sym Char
'!') ] forall a. [a] -> [a] -> [a]
++
    (case Unary
unary of
      Unary
Unary -> [ forall (m :: * -> *) a. m (a -> a) -> Operator m a
Prefix  (Expr -> Expr
Neg forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Char -> Parser ()
sym Char
'-') ]
      Unary
NonUnary -> []
    )
  , [ forall (m :: * -> *) a. m (a -> a -> a) -> Operator m a
InfixL  (Expr -> Expr -> Expr
Add forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Char -> Parser ()
sym Char
'+')
    , forall (m :: * -> *) a. m (a -> a -> a) -> Operator m a
InfixL  (Expr -> Expr -> Expr
Sub forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Char -> Parser ()
sym Char
'-')
    ]
  ]

-- | Expressions that may include string concatenation
expr3 :: Parser Expr
expr3 :: Parser Expr
expr3 = Parser Expr
concatExpr forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Unary -> Parser Expr
expr2 Unary
Unary
  where
    -- The awk spec mandates that concatenation associates to the left.
    -- But concatenation is associative, so why would we care.
    concatExpr :: Parser Expr
concatExpr = Expr -> Expr -> Expr
Concat forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Expr
nonUnary forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (Parser Expr
nonUnary forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Expr
concatExpr)
    nonUnary :: Parser Expr
nonUnary = Unary -> Parser Expr
expr2 Unary
NonUnary

-- | Everything with lower precedence than concatenation
expr4 :: Parser Expr
expr4 :: Parser Expr
expr4 = forall (m :: * -> *) a.
MonadPlus m =>
m a -> [[Operator m a]] -> m a
makeExprParser Parser Expr
expr3
  [ [ forall (m :: * -> *) a. m (a -> a -> a) -> Operator m a
InfixN (Expr -> Expr -> Expr
LT forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Char -> Parser ()
sym Char
'<')
    , forall (m :: * -> *) a. m (a -> a -> a) -> Operator m a
InfixN (Expr -> Expr -> Expr
GT forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Char -> Parser ()
sym Char
'>')
    , forall (m :: * -> *) a. m (a -> a -> a) -> Operator m a
InfixN (Expr -> Expr -> Expr
LE forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ String -> Parser ()
str String
"<=")
    , forall (m :: * -> *) a. m (a -> a -> a) -> Operator m a
InfixN (Expr -> Expr -> Expr
GE forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ String -> Parser ()
str String
">=")
    , forall (m :: * -> *) a. m (a -> a -> a) -> Operator m a
InfixN (Expr -> Expr -> Expr
EQ forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ String -> Parser ()
str String
"==")
    , forall (m :: * -> *) a. m (a -> a -> a) -> Operator m a
InfixN (Expr -> Expr -> Expr
NE forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ String -> Parser ()
str String
"!=")
    ]
  , [ forall (m :: * -> *) a. m (a -> a) -> Operator m a
Postfix (forall a b c. (a -> b -> c) -> b -> a -> c
flip Expr -> String -> Expr
Match forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Char -> Parser ()
sym Char
'~' forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser String
patP)
    , forall (m :: * -> *) a. m (a -> a) -> Operator m a
Postfix (forall a b c. (a -> b -> c) -> b -> a -> c
flip Expr -> String -> Expr
NoMatch forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ String -> Parser ()
str String
"!~" forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser String
patP)
    ]
  , [ forall (m :: * -> *) a. m (a -> a -> a) -> Operator m a
InfixL (Expr -> Expr -> Expr
And forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ String -> Parser ()
str String
"&&") ]
  , [ forall (m :: * -> *) a. m (a -> a -> a) -> Operator m a
InfixL (Expr -> Expr -> Expr
Or  forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ String -> Parser ()
str String
"||") ]
  , [ forall (m :: * -> *) a. m (m (a -> a -> a -> a)) -> Operator m a
TernR  ((Expr -> Expr -> Expr -> Expr
If forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Char -> Parser ()
sym Char
':') forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Char -> Parser ()
sym Char
'?') ]
  ]

-- | The awk-like expression parser
expr :: Parser Expr
expr :: Parser Expr
expr = Parser Expr
expr4

-- | Parse an awk expression
parseAwkExpr :: String -> Maybe Expr
parseAwkExpr :: String -> Maybe Expr
parseAwkExpr String
s =
  case forall a. Parser a -> String -> ParseResult a
runParser Parser Expr
expr String
s of
    Success Expr
e -> forall a. a -> Maybe a
Just Expr
e
    ParseResult Expr
_ -> forall a. Maybe a
Nothing