Bug Vanquisher

23 October 2009

Story of a Parser

Filed under: Dev inside!, Tips — Tanveer Badar @ 8:31 AM

A quick and dirty, precedence parser I wrote in about 2 hours. Minimal error handling, (may be) able to parse expressions of the form

expr := expr combiner expr | expr op ( expr | list ) | term
combiner := AND | OR
op := ‘=’ | in
list := ( term ) | ( term , list )
term := ‘identifer or literal’

op will include usual comparison operators soon. List can only be specified if op is ‘in’. Code follows, you should be able to add the missing pieces yourself as not much more can be revealed.:

public static partial class Parser
{
    static Expression BuildTree( string expr )
    {
        int startIndex = 0;
        Stack<Expression> stack = new Stack<Expression>( );
        BuildTreeRecursive( expr , ref startIndex , stack );
        return stack.Pop( );
    }

    private static void BuildTreeRecursive( string expr , ref int startIndex , Stack<Expression> stack )
    {
        string combiner = null;
        while( startIndex < expr.Length )
        {
            Expression expr1 = null;
            if( string.IsNullOrEmpty( combiner ) )
            {
                expr1 = RecognizeExpression( expr , ref startIndex );
                EatWhiteSpace( expr , ref startIndex );
                combiner = RecognizeOperator( expr , ref startIndex );
            }
            Expression expr2 = null;
            switch( combiner.ToUpper( ) )
            {
                case "AND":
                    expr2 = RecognizeExpression( expr , ref startIndex );
                    stack.Push( new BinaryExpression
                    {
                        LeftSide = expr1 ,
                        RightSide = expr2 ,
                        Type = Expression.Operator.And
                    } );
                    EatWhiteSpace( expr , ref startIndex );
                    combiner = RecognizeOperator( expr , ref startIndex );
                    EatWhiteSpace( expr , ref startIndex );
                    break;
                case "OR":
                    BuildTreeRecursive( expr , ref startIndex , stack );
                    if( stack.Count > 0 )
                        expr2 = stack.Pop( );
                    if( stack.Count > 0 )
                        expr1 = stack.Pop( );
                    stack.Push( new BinaryExpression
                    {
                        LeftSide = expr2 ,
                        RightSide = expr1 ,
                        Type = Expression.Operator.Or
                    } );
                    EatWhiteSpace( expr , ref startIndex );
                    combiner = RecognizeOperator( expr , ref startIndex );
                    EatWhiteSpace( expr , ref startIndex );
                    break;
            }
        }
    }

    private static Expression RecognizeExpression( string expr , ref int startIndex )
    {
        EatWhiteSpace( expr , ref startIndex );
        string attrib = RecognizeAttribute( expr , ref startIndex );
        EatWhiteSpace( expr , ref startIndex );
        string op = RecognizeOperator( expr , ref startIndex );
        EatWhiteSpace( expr , ref startIndex );
        switch( op.ToUpper( ) )
        {
            case "=":
                string value = RecognizeWord( expr , ref startIndex );
                return new BinaryExpression
                {
                    LeftSide = new ConstantExpression( attrib ) ,
                    RightSide = new ConstantExpression( value ) ,
                    Type = Expression.Operator.Equal
                };
            case "IN":
                List<string> values = RecognizeList( expr , ref startIndex );
                return new BinaryExpression
                {
                    LeftSide = new ConstantExpression( attrib ) ,
                    Type = Expression.Operator.In
                };
        }
        return new Expression( );
    }

    private static List<string> RecognizeList( string expr , ref int startIndex )
    {
        List<string> values = new List<string>( );
        if( expr.Length <= startIndex || expr [ startIndex ] != ‘(‘ )
            return values;

        ++startIndex;

        do
        {
            EatWhiteSpace( expr , ref startIndex );
            values.Add( RecognizeWord( expr , ref startIndex ) );
            if( expr.Length <= startIndex || expr [ startIndex ] != ‘,’ )
                break;
        } while( startIndex < expr.Length );

        EatWhiteSpace( expr , ref startIndex );

        if( expr.Length <= startIndex || expr [ startIndex ] != ‘)’ )
            return values;

        ++startIndex;

        return values;
    }

    private static string RecognizeAttribute( string expr , ref int startIndex )
    {
        return RecognizeWord( expr , ref startIndex );
    }

    private static string RecognizeOperator( string expr , ref int startIndex )
    {
        int start = startIndex;
        while( startIndex < expr.Length && !char.IsWhiteSpace( expr [ startIndex ] ) )
            ++startIndex;
        if( start != startIndex )
            return expr.Substring( start , startIndex – start );
        return string.Empty;
    }

    private static void EatWhiteSpace( string expr , ref int startIndex )
    {
        while( startIndex < expr.Length && char.IsWhiteSpace( expr [ startIndex ] ) )
            ++startIndex;
    }

    private static string RecognizeWord( string expr , ref int startIndex )
    {
        int first = expr.IndexOf( "’" , startIndex ) , second = first != -1 ? expr.IndexOf( "’" , first + 1 ) : -1;
        if( first != -1 )
            if( second != -1 )
            {
                startIndex = second + 1;
                return expr.Substring( first , second – first );
            }
            else
            {
                startIndex = expr.Length;
                return expr.Substring( first );
            }
        return string.Empty;
    }
}

This is what you should pass.

string expr = "’attribute1′ = ‘value1’ and ‘attribute2’ = ‘value2’ or ‘attribute3’ in (‘value3’) or ‘attribute4’ = ‘value4’";

Expression expression = Parser.BuildTree( expr );

Advertisements

Leave a Comment »

No comments yet.

RSS feed for comments on this post. TrackBack URI

Leave a Reply

Please log in using one of these methods to post your comment:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

Blog at WordPress.com.

%d bloggers like this: