From f3b66aeac3028dbd116ae2952437e81c3f06d12c Mon Sep 17 00:00:00 2001 From: Jerry Yan <792602257@qq.com> Date: Tue, 26 Jan 2021 16:53:37 +0800 Subject: [PATCH] =?UTF-8?q?=E6=A8=A1=E5=BC=8F=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 9 +-- .../DefaultGrammar.php} | 28 +++++++-- src/Grammar/Grammar.php | 15 ----- src/Grammar/GrammarInterface.php | 14 ++++- src/Token/Factory/FactoryInterface.php | 62 ------------------- src/Tokenizer/Tokenizer.php | 59 ++++++++++++++++-- tests/Tokenizer/TokenizerTest.php | 4 +- 7 files changed, 95 insertions(+), 96 deletions(-) rename src/{Token/Factory/DefaultFactory.php => Grammar/DefaultGrammar.php} (73%) delete mode 100644 src/Grammar/Grammar.php delete mode 100644 src/Token/Factory/FactoryInterface.php diff --git a/README.md b/README.md index 07d5b7f..163bf83 100644 --- a/README.md +++ b/README.md @@ -11,10 +11,11 @@ ## src目录解析 -- `Grammar` 语法解析器,验证语法及拆分语法 +- `AST` 语法树 +- `Grammar` 语法器,提供Token识别的规则及种类 - `Output` 输出器,将解析的内容输出(并不执行) -- `Lexer` 词法解析器,将语义转换成正常的语法供`Grammar`使用 -- `Reader` 读取器,供`Tokenizer`读取使用 +- `Lexer` 词法分析器,二次解析`Token`供`Tokenizer`读取使用 +- `Parser` 语法解析器,生成语法树 +- `Reader` 读取器,供`Lexer`读取使用 - `Token` 所有的Token -- `Token/Factory` Token工厂,生成Token用的 - `Tokenizer` 转换成Token用的 diff --git a/src/Token/Factory/DefaultFactory.php b/src/Grammar/DefaultGrammar.php similarity index 73% rename from src/Token/Factory/DefaultFactory.php rename to src/Grammar/DefaultGrammar.php index 51a6764..b28be9f 100644 --- a/src/Token/Factory/DefaultFactory.php +++ b/src/Grammar/DefaultGrammar.php @@ -1,12 +1,12 @@ - * @date 2020/12/17 14:48 + * @date 2020/12/17 14:13 */ -namespace JerryYan\DSL\Token\Factory; +namespace JerryYan\DSL\Grammar; use JerryYan\DSL\Token\Token; @@ -25,9 +25,9 @@ use JerryYan\DSL\Token\TokenNumber; use JerryYan\DSL\Token\TokenUseVariable; use JerryYan\DSL\Token\TokenVariable; -class DefaultFactory extends FactoryInterface +class DefaultGrammar implements GrammarInterface { - protected $tokenMap = [ + protected $tokens = [ Token::FAKE => TokenFake::class, Token::CURRY => TokenCurry::class, Token::LOGIC_AND => TokenLogicAnd::class, @@ -44,5 +44,21 @@ class DefaultFactory extends FactoryInterface Token::USE_VARIABLE => TokenUseVariable::class, ]; - protected $undefinedTokenClass = TokenVariable::class; + protected $undefinedTokenType = TokenVariable::class; + + /** + * @inheritDoc + */ + public function getTokenTypes(): array + { + return $this->tokens; + } + + /** + * @inheritDoc + */ + public function getUndefinedTokenType(): string + { + return $this->undefinedTokenType; + } } \ No newline at end of file diff --git a/src/Grammar/Grammar.php b/src/Grammar/Grammar.php deleted file mode 100644 index fcb279e..0000000 --- a/src/Grammar/Grammar.php +++ /dev/null @@ -1,15 +0,0 @@ - - * @date 2020/12/17 14:13 - */ - - -namespace JerryYan\DSL\Grammar; - - -class Grammar extends GrammarInterface -{ - -} \ No newline at end of file diff --git a/src/Grammar/GrammarInterface.php b/src/Grammar/GrammarInterface.php index 79ff66a..43ab48a 100644 --- a/src/Grammar/GrammarInterface.php +++ b/src/Grammar/GrammarInterface.php @@ -9,7 +9,19 @@ namespace JerryYan\DSL\Grammar; -abstract class GrammarInterface +interface GrammarInterface { + /** + * @return array + * @author Jerry Yan <792602257@qq.com> + * @date 2021/1/26 16:27 + */ + public function getTokenTypes(): array; + /** + * @return class-string + * @author Jerry Yan <792602257@qq.com> + * @date 2021/1/26 16:27 + */ + public function getUndefinedTokenType(): string; } \ No newline at end of file diff --git a/src/Token/Factory/FactoryInterface.php b/src/Token/Factory/FactoryInterface.php deleted file mode 100644 index dd36590..0000000 --- a/src/Token/Factory/FactoryInterface.php +++ /dev/null @@ -1,62 +0,0 @@ - - * @date 2021/1/22 13:42 - */ - - -namespace JerryYan\DSL\Token\Factory; - - - -use JerryYan\DSL\Token\TokenInterface; -use JerryYan\DSL\Token\TokenUndefined; - -abstract class FactoryInterface -{ - /** @var array> Token类型及映射类 */ - protected $tokenMap = []; - /** @var array> Token别名映射 */ - protected $tokenNameMap = []; - /** @var array> Token别名映射 */ - protected $regexNameMap = []; - /** @var class-string 默认Token类 */ - protected $undefinedTokenClass = TokenUndefined::class; - - public function __construct() - { - /** - * @var string $key - * @var TokenInterface $token - */ - foreach ($this->tokenMap as $key=>$token) { - foreach ($token::$alias as $name) { - $this->tokenNameMap[$name] = $key; - } - foreach ($token::$regexAlias as $name) { - $this->regexNameMap[$name] = $key; - } - } - } - - public function getTokenByName(string $name): TokenInterface - { - $originalName = $name; - if (isset($this->tokenNameMap[$name])) { - $name = $this->tokenNameMap[$name]; - } else { - foreach ($this->regexNameMap as $regex => $newName) { - if (preg_match($regex, $name) === 1) { - $name = $newName; break; - } - } - } - if (!isset($this->tokenMap[$name])) { - return new $this->undefinedTokenClass($originalName); - } else { - return new $this->tokenMap[$name]($originalName); - } - } - -} \ No newline at end of file diff --git a/src/Tokenizer/Tokenizer.php b/src/Tokenizer/Tokenizer.php index 9430c1e..2b4257b 100644 --- a/src/Tokenizer/Tokenizer.php +++ b/src/Tokenizer/Tokenizer.php @@ -9,17 +9,37 @@ namespace JerryYan\DSL\Tokenizer; +use JerryYan\DSL\Grammar\GrammarInterface; use JerryYan\DSL\Reader\ReaderInterface; -use JerryYan\DSL\Token\Factory\FactoryInterface; use JerryYan\DSL\Token\TokenInterface; class Tokenizer extends TokenizerInterface { - /** @var FactoryInterface token工厂 */ - protected $tokenFactory; - public function __construct(FactoryInterface $tokenFactory) + /** @var array> Token类型及映射类 */ + protected $tokenMapping = []; + /** @var array> Token别名映射 */ + protected $tokenNameMapping = []; + /** @var array> Token别名映射 */ + protected $regexNameMapping = []; + /** @var GrammarInterface 语法器 */ + protected $grammar; + + public function __construct(GrammarInterface $grammar) { - $this->tokenFactory = $tokenFactory; + $this->grammar = $grammar; + /** + * @var string $key + * @var TokenInterface $token + */ + foreach ($grammar->getTokenTypes() as $key=>$token) { + $this->tokenMapping[$key] = $token; + foreach ($token::$alias as $name) { + $this->tokenNameMapping[$name] = $key; + } + foreach ($token::$regexAlias as $name) { + $this->regexNameMapping[$name] = $key; + } + } } /** @@ -33,11 +53,38 @@ class Tokenizer extends TokenizerInterface do { $currentTokenName = $reader->getCurrentToken(); - $currentToken = $this->tokenFactory->getTokenByName($currentTokenName); + $currentToken = $this->getTokenByName($currentTokenName); $currentToken->setPrevToken($lastToken); if ($lastToken !== NULL) $lastToken->setNextToken($currentToken); $lastToken = $currentToken; }while($reader->moveToNextToken()); return $lastToken->getFirstToken(); } + + /** + * 根据文字获取Token + * @param string $name + * @return TokenInterface + * @author Jerry Yan <792602257@qq.com> + * @date 2021/1/26 16:41 + */ + protected function getTokenByName(string $name): TokenInterface + { + $originalName = $name; + if (isset($this->tokenNameMapping[$name])) { + $name = $this->tokenNameMapping[$name]; + } else { + foreach ($this->regexNameMapping as $regex => $newName) { + if (preg_match($regex, $name) === 1) { + $name = $newName; break; + } + } + } + if (!isset($this->tokenMapping[$name])) { + $undefinedType = $this->grammar->getUndefinedTokenType(); + return new $undefinedType($originalName); + } else { + return new $this->tokenMapping[$name]($originalName); + } + } } \ No newline at end of file diff --git a/tests/Tokenizer/TokenizerTest.php b/tests/Tokenizer/TokenizerTest.php index 8a18a2b..3dc7154 100644 --- a/tests/Tokenizer/TokenizerTest.php +++ b/tests/Tokenizer/TokenizerTest.php @@ -7,8 +7,8 @@ namespace JerryYan\DSL\Test\Tokenizer; +use JerryYan\DSL\Grammar\DefaultGrammar; use JerryYan\DSL\Reader\StringReader; -use JerryYan\DSL\Token\Factory\DefaultFactory; use JerryYan\DSL\Token\TokenCurry; use JerryYan\DSL\Token\TokenLogicAnd; use JerryYan\DSL\Token\TokenInterface; @@ -43,7 +43,7 @@ class TokenizerTest extends TestCase ]; protected function setUp(): void { - $this->tokenizer = new Tokenizer(new DefaultFactory()); + $this->tokenizer = new Tokenizer(new DefaultGrammar()); $this->reader = new StringReader($this->text); }