Mysql
 sql >> Datenbank >  >> RDS >> Mysql

Analysieren von vom Benutzer eingegebenen Volltextsuchabfragen in die WHERE-Klausel von MySQL mithilfe von PHP

Der folgende Code besteht aus den Klassen Tokenizer, Token und QueryBuilder. Es ist wahrscheinlich nicht die eleganteste Lösung aller Zeiten, aber es tut tatsächlich, was Sie wollten:

<?
// QueryBuilder Grammar:
// =====================
// SearchRule       := SimpleSearchRule { KeyWord }
// SimpleSearchRule := Expression | SimpleSearchRule { 'OR' Expression }
// Expression       := SimpleExpression | Expression { 'AND' SimpleExpression }
// SimpleExpression := '(' SimpleSearchRule ')' | FieldExpression

$input = 'from:me AND (to:john OR to:jenny) dinner party';

$fieldMapping = array(
    'id' => 'id',
    'from' => 'from',
    'to' => 'to',
    'title' => 'title',
    'description' => 'description',
    'time_created' => 'time_created'
);
$fullTextFields = array('title','description');

$qb = new QueryBuilder($fieldMapping, $fullTextFields);
try {
    echo $qb->parseSearchRule($input);
} catch(Exception $error) {
    echo 'Error occurred while parsing search query: <br/>'.$error->getMessage();
}

class Token {
    const   KEYWORD = 'KEYWORD',
            OPEN_PAR='OPEN_PAR',
            CLOSE_PAR='CLOSE_PAR',
            FIELD='FIELD',
            AND_OP='AND_OP',
            OR_OP='OR_OP';
    public $type;
    public $chars;
    public $position;

    function __construct($type,$chars,$position) {
        $this->type = $type;
        $this->chars = $chars;
        $this->position = $position;
    }

    function __toString() {
        return 'Token[ type='.$this->type.', chars='.$this->chars.', position='.$this->position.' ]';
    }
}

class Tokenizer {
    private $tokens = array();
    private $input;
    private $currentPosition;

    function __construct($input) {
        $this->input = trim($input);
        $this->currentPosition = 0;
    }

    /**
     * @return Token
     */
    function getToken() {
        if(count($this->tokens)==0) {
            $token = $this->nextToken();
            if($token==null) {
                return null;
            }
            array_push($this->tokens, $token);
        }
        return $this->tokens[0];
    }

    function consumeToken() {
        $token = $this->getToken();
        if($token==null) {
            return null;
        }
        array_shift($this->tokens);
        return $token;
    }

    protected function nextToken() {
        $reservedCharacters = '\:\s\(\)';
        $fieldExpr = '/^([^'.$reservedCharacters.']+)\:([^'.$reservedCharacters.']+)/';
        $keyWord = '/^([^'.$reservedCharacters.']+)/';
        $andOperator = '/^AND\s/';
        $orOperator = '/^OR\s/';
        // Remove whitespaces ..
        $whiteSpaces = '/^\s+/';
        $remaining = substr($this->input,$this->currentPosition);
        if(preg_match($whiteSpaces, $remaining, $matches)) {
            $this->currentPosition += strlen($matches[0]);
            $remaining = substr($this->input,$this->currentPosition);
        }
        if($remaining=='') {
            return null;
        }
        switch(substr($remaining,0,1)) {
            case '(':
                return new Token(Token::OPEN_PAR,'(',$this->currentPosition++);
            case ')':
                return new Token(Token::CLOSE_PAR,')',$this->currentPosition++);
        }
        if(preg_match($fieldExpr, $remaining, $matches)) {
            $token = new Token(Token::FIELD, $matches[0], $this->currentPosition);
            $this->currentPosition += strlen($matches[0]);
        } else if(preg_match($andOperator, $remaining, $matches)) {
            $token = new Token(Token::AND_OP, 'AND', $this->currentPosition);
            $this->currentPosition += 3;
        } else if(preg_match($orOperator, $remaining, $matches)) {
            $token = new Token(Token::OR_OP, 'OR', $this->currentPosition);
            $this->currentPosition += 2;
        } else if(preg_match($keyWord, $remaining, $matches)) {
            $token = new Token(Token::KEYWORD, $matches[0], $this->currentPosition);
            $this->currentPosition += strlen($matches[0]);
        } else throw new Exception('Unable to tokenize: '.$remaining);
        return $token;
    }
}

class QueryBuilder {
    private $fieldMapping;
    private $fulltextFields;

    function __construct($fieldMapping, $fulltextFields) {
        $this->fieldMapping = $fieldMapping;
        $this->fulltextFields = $fulltextFields;
    }

    function parseSearchRule($input) {
        $t = new Tokenizer($input);
        $token = $t->getToken();
        if($token==null) {
            return '';
        }
        $token = $t->getToken();
        if($token->type!=Token::KEYWORD) {
            $searchRule = $this->parseSimpleSearchRule($t);
        } else {
            $searchRule = '';
        }
        $keywords = '';
        while($token = $t->consumeToken()) {
            if($token->type!=Token::KEYWORD) {
                throw new Exception('Only keywords allowed at end of search rule.');
            }
            if($keywords!='') {
                $keywords .= ' ';
            }
            $keywords .= $token->chars;
        }
        if($keywords!='') {
            $matchClause = 'MATCH (`'.(implode('`,`',$this->fulltextFields)).'`) AGAINST (';
            $keywords = $matchClause.'\''.mysql_real_escape_string($keywords).'\' IN BOOLEAN MODE)';
            if($searchRule=='') {
                $searchRule = $keywords;
            } else {
                $searchRule = '('.$searchRule.') AND ('.$keywords.')';
            }
        }
        return $searchRule;
    }

    protected function parseSimpleSearchRule(Tokenizer $t) {
        $expressions = array();
        do {
            $repeat = false;
            $expressions[] = $this->parseExpression($t);
            $token = $t->getToken();
            if($token->type==Token::OR_OP) {
                $t->consumeToken();
                $repeat = true;
            }
        } while($repeat);
        return implode(' OR ', $expressions);
    }

    protected function parseExpression(Tokenizer $t) {
        $expressions = array();
        do {
            $repeat = false;
            $expressions[] = $this->parseSimpleExpression($t);
            $token = $t->getToken();
            if($token->type==Token::AND_OP) {
                $t->consumeToken();
                $repeat = true;
            }
        } while($repeat);
        return implode(' AND ', $expressions);
    }

    protected function parseSimpleExpression(Tokenizer $t) {
        $token = $t->consumeToken();
        if($token->type==Token::OPEN_PAR) {
            $spr = $this->parseSimpleSearchRule($t);
            $token = $t->consumeToken();
            if($token==null || $token->type!=Token::CLOSE_PAR) {
                throw new Exception('Expected closing parenthesis, found: '.$token->chars);
            }
            return '('.$spr.')';
        } else if($token->type==Token::FIELD) {
            $fieldVal = explode(':', $token->chars,2);
            if(isset($this->fieldMapping[$fieldVal[0]])) {
                return '`'.$this->fieldMapping[$fieldVal[0]].'` = \''.mysql_real_escape_string($fieldVal[1]).'\'';
            }
            throw new Exception('Unknown field selected: '.$token->chars);
        } else {
            throw new Exception('Expected opening parenthesis or field-expression, found: '.$token->chars);
        }
    }
}
?>

Eine geeignetere Lösung würde zuerst einen Analysebaum erstellen und ihn dann nach weiterer Analyse in eine Abfrage umwandeln.