Grammar换成Lexer,Grammar用作生成AST Tree

This commit is contained in:
Jerry Yan 2021-03-17 12:09:38 +08:00
parent e60b5f0f78
commit 42df1352d2
9 changed files with 135 additions and 21 deletions

View File

@ -12,9 +12,9 @@
## src目录解析 ## src目录解析
- `AST` 语法树 - `AST` 语法树
- `Grammar` 语法器,提供Token识别的规则及种类 - `Grammar` 语法器,解析`Token``Tokenizer`读取使用
- `Output` 输出器,将解析的内容输出(并不执行) - `Output` 输出器,将解析的内容输出(并不执行)
- `Lexer` 词法分析器,二次解析`Token``Tokenizer`读取使用 - `Lexer` 词法分析器,提供Token识别的规则及种类
- `Parser` 语法解析器,生成语法树 - `Parser` 语法解析器,生成语法树
- `Reader` 读取器,供`Lexer`读取使用 - `Reader` 读取器,供`Lexer`读取使用
- `Token` 所有的Token - `Token` 所有的Token

View File

@ -0,0 +1,67 @@
<?php
/**
* @filename DefaultLexer.php
* @author Jerry Yan <792602257@qq.com>
* @date 2021/3/17 11:56
*/
namespace JerryYan\DSL\Lexer;
use JerryYan\DSL\Token\{
Token,
TokenCurry,
TokenDefine,
TokenFake,
TokenLogicAnd,
TokenLogicEqual,
TokenLogicFake,
TokenLogicGreater,
TokenLogicLess,
TokenLogicNot,
TokenLogicNotEqual,
TokenLogicOr,
TokenNumber,
TokenUseVariable,
TokenVariable
};
class DefaultLexer implements LexerInterface
{
protected $tokens = [
Token::FAKE => TokenFake::class,
Token::CURRY => TokenCurry::class,
Token::LOGIC_AND => TokenLogicAnd::class,
Token::LOGIC_OR => TokenLogicOr::class,
Token::LOGIC_NOT => TokenLogicNot::class,
Token::LOGIC_EQUAL => TokenLogicEqual::class,
Token::LOGIC_NOT_EQUAL => TokenLogicNotEqual::class,
Token::LOGIC_GREATER => TokenLogicGreater::class,
Token::LOGIC_LESS => TokenLogicLess::class,
Token::LOGIC_FAKE => TokenLogicFake::class,
Token::VARIABLE => TokenVariable::class,
Token::NUMBER => TokenNumber::class,
Token::DEFINE => TokenDefine::class,
Token::USE_VARIABLE => TokenUseVariable::class,
];
protected $undefinedTokenType = TokenVariable::class;
/**
* @inheritDoc
*/
public function getTokenTypes(): array
{
return $this->tokens;
}
/**
* @inheritDoc
*/
public function getUndefinedTokenType(): string
{
return $this->undefinedTokenType;
}
}

View File

@ -0,0 +1,27 @@
<?php
/**
* @filename LexerInterface.php
* @author Jerry Yan <792602257@qq.com>
* @date 2021/3/17 11:55
*/
namespace JerryYan\DSL\Lexer;
interface LexerInterface
{
/**
* @return array<string, class-string>
* @author Jerry Yan <792602257@qq.com>
* @date 2021/1/26 16:27
*/
public function getTokenTypes(): array;
/**
* @return class-string
* @author Jerry Yan <792602257@qq.com>
* @date 2021/1/26 16:27
*/
public function getUndefinedTokenType(): string;
}

View File

@ -9,6 +9,12 @@
namespace JerryYan\DSL\Token; namespace JerryYan\DSL\Token;
/**
* Token链表基础结构
* @package JerryYan\DSL\Token
* @author Jerry Yan <792602257@qq.com>
* @date 2021/2/22 14:00
*/
abstract class TokenInterface abstract class TokenInterface
{ {
/** @var ?TokenInterface 上一个Token */ /** @var ?TokenInterface 上一个Token */
@ -27,11 +33,23 @@ abstract class TokenInterface
$this->_raw = $original; $this->_raw = $original;
} }
public function setPrevToken(?TokenInterface $token): void /**
* 设置链表上级
* @param self|null $token
* @author Jerry Yan <792602257@qq.com>
* @date 2021/2/22 14:01
*/
public function setPrevToken(?self $token): void
{ {
$this->prevToken = $token; $this->prevToken = $token;
} }
public function setNextToken(TokenInterface $token): void /**
* 设置链表下级
* @param self|null $token
* @author Jerry Yan <792602257@qq.com>
* @date 2021/2/22 14:02
*/
public function setNextToken(?self $token): void
{ {
$this->nextToken = $token; $this->nextToken = $token;
} }

View File

@ -9,7 +9,7 @@
namespace JerryYan\DSL\Tokenizer; namespace JerryYan\DSL\Tokenizer;
use JerryYan\DSL\Grammar\GrammarInterface; use JerryYan\DSL\Lexer\LexerInterface;
use JerryYan\DSL\Reader\ReaderInterface; use JerryYan\DSL\Reader\ReaderInterface;
use JerryYan\DSL\Token\TokenInterface; use JerryYan\DSL\Token\TokenInterface;
@ -21,17 +21,17 @@ class Tokenizer extends TokenizerInterface
protected $tokenNameMapping = []; protected $tokenNameMapping = [];
/** @var array<string, class-string<TokenInterface>> Token别名映射 */ /** @var array<string, class-string<TokenInterface>> Token别名映射 */
protected $regexNameMapping = []; protected $regexNameMapping = [];
/** @var GrammarInterface 语法器 */ /** @var LexerInterface 语法器 */
protected $grammar; protected $lexer;
public function __construct(GrammarInterface $grammar) public function __construct(LexerInterface $lexer)
{ {
$this->grammar = $grammar; $this->lexer = $lexer;
/** /**
* @var string $key * @var string $key
* @var TokenInterface $token * @var TokenInterface $token
*/ */
foreach ($grammar->getTokenTypes() as $key=>$token) { foreach ($lexer->getTokenTypes() as $key=> $token) {
$this->tokenMapping[$key] = $token; $this->tokenMapping[$key] = $token;
foreach ($token::$alias as $name) { foreach ($token::$alias as $name) {
$this->tokenNameMapping[$name] = $key; $this->tokenNameMapping[$name] = $key;
@ -81,7 +81,7 @@ class Tokenizer extends TokenizerInterface
} }
} }
if (!isset($this->tokenMapping[$name])) { if (!isset($this->tokenMapping[$name])) {
$undefinedType = $this->grammar->getUndefinedTokenType(); $undefinedType = $this->lexer->getUndefinedTokenType();
return new $undefinedType($originalName); return new $undefinedType($originalName);
} else { } else {
return new $this->tokenMapping[$name]($originalName); return new $this->tokenMapping[$name]($originalName);

View File

@ -7,7 +7,7 @@
namespace JerryYan\DSL\Test\Output; namespace JerryYan\DSL\Test\Output;
use JerryYan\DSL\Grammar\DefaultGrammar; use JerryYan\DSL\Lexer\DefaultLexer;
use JerryYan\DSL\Output\RawOutput; use JerryYan\DSL\Output\RawOutput;
use JerryYan\DSL\Reader\StringReader; use JerryYan\DSL\Reader\StringReader;
use JerryYan\DSL\Tokenizer\Tokenizer; use JerryYan\DSL\Tokenizer\Tokenizer;
@ -15,12 +15,13 @@ use PHPUnit\Framework\TestCase;
class RawOutputTest extends TestCase class RawOutputTest extends TestCase
{ {
private $text = "当 另外那个 与 另外一个 不相等时 或者 那个 和 这个 等于 -0.5 的时候"; private $text = " 当 另外那个 与 另外一个 不相等时 或者 那个 和 这个 等于 -0.5 的时候";
private $expect = "当 另外那个 与 另外一个 不相等时 或者 那个 和 这个 等于 -0.5 的时候";
private $output; private $output;
protected function setUp(): void protected function setUp(): void
{ {
$tokenizer = new Tokenizer(new DefaultGrammar()); $tokenizer = new Tokenizer(new DefaultLexer());
$reader = new StringReader($this->text); $reader = new StringReader($this->text);
$token = $tokenizer->tokenize($reader); $token = $tokenizer->tokenize($reader);
$this->output = new RawOutput($token); $this->output = new RawOutput($token);
@ -28,6 +29,6 @@ class RawOutputTest extends TestCase
public function testOutput() public function testOutput()
{ {
$this->assertEquals($this->text, $this->output->output(), '输出与预期不一致'); $this->assertEquals($this->expect, $this->output->output(), '输出与预期不一致');
} }
} }

View File

@ -7,7 +7,7 @@
namespace JerryYan\DSL\Test\Tokenizer; namespace JerryYan\DSL\Test\Tokenizer;
use JerryYan\DSL\Grammar\DefaultGrammar; use JerryYan\DSL\Lexer\DefaultLexer;
use JerryYan\DSL\Reader\StringReader; use JerryYan\DSL\Reader\StringReader;
use JerryYan\DSL\Token\TokenCurry; use JerryYan\DSL\Token\TokenCurry;
use JerryYan\DSL\Token\TokenLogicAnd; use JerryYan\DSL\Token\TokenLogicAnd;
@ -41,9 +41,10 @@ class TokenizerTest extends TestCase
TokenNumber::class, TokenNumber::class,
TokenLogicFake::class, TokenLogicFake::class,
]; ];
protected function setUp(): void protected function setUp(): void
{ {
$this->tokenizer = new Tokenizer(new DefaultGrammar()); $this->tokenizer = new Tokenizer(new DefaultLexer());
$this->reader = new StringReader($this->text); $this->reader = new StringReader($this->text);
} }
@ -55,7 +56,7 @@ class TokenizerTest extends TestCase
do { do {
$this->assertInstanceOf($this->textTokenType[$index], $tokens); $this->assertInstanceOf($this->textTokenType[$index], $tokens);
$tokens = $tokens->getNextToken(); $tokens = $tokens->getNextToken();
$index ++; $index++;
} while ($tokens->hasNextToken()); } while ($tokens->hasNextToken());
} }
} }