From 9a0e925f6e2f0fa8ae2c4873a2c2cbade7050f75 Mon Sep 17 00:00:00 2001 From: Jerry Yan <792602257@qq.com> Date: Fri, 18 Dec 2020 18:12:47 +0800 Subject: [PATCH] =?UTF-8?q?StringReader=E9=83=A8=E5=88=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 2 +- composer.json | 9 ++ src/Reader/FileReader.php | 5 +- src/Reader/ReaderInterface.php | 66 ++++++++++++-- src/Reader/StreamReader.php | 15 ---- src/Reader/StringReader.php | 124 +++++++++++++++++++++++++++ src/Token/TokenAnd.php | 2 +- src/Token/TokenFactory.php | 22 ++++- src/Token/TokenOr.php | 15 ++++ src/Token/TokenUndefined.php | 15 ++++ src/Tokenizer/Tokenizer.php | 24 ++++++ src/Tokenizer/TokenizerInterface.php | 13 +++ tests/Reader/StringReaderTest.php | 54 ++++++++++++ 13 files changed, 338 insertions(+), 28 deletions(-) delete mode 100644 src/Reader/StreamReader.php create mode 100644 src/Reader/StringReader.php create mode 100644 src/Token/TokenOr.php create mode 100644 src/Token/TokenUndefined.php create mode 100644 tests/Reader/StringReaderTest.php diff --git a/.gitignore b/.gitignore index d76e12e..9828c02 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,4 @@ composer.phar vendor/ composer.lock - +.phpunit* diff --git a/composer.json b/composer.json index faac855..4bcf393 100644 --- a/composer.json +++ b/composer.json @@ -13,9 +13,18 @@ "php": ">=7.1", "ext-mbstring": "*" }, + "require-dev": { + "phpunit/phpunit": "^9.0", + "mockery/mockery": "^1.4" + }, "autoload": { "psr-4": { "JerryYan\\DSL\\": "src" } + }, + "autoload-dev": { + "psr-4": { + "JerryYan\\DSL\\Test\\": "tests" + } } } \ No newline at end of file diff --git a/src/Reader/FileReader.php b/src/Reader/FileReader.php index 044f8bd..09328d5 100644 --- a/src/Reader/FileReader.php +++ b/src/Reader/FileReader.php @@ -9,7 +9,10 @@ namespace JerryYan\DSL\Reader; -class FileReader extends ReaderInterface +class FileReader /** extends ReaderInterface */ { + public function __construct(string $fileName) + { + } } \ No newline at end of file diff --git a/src/Reader/ReaderInterface.php b/src/Reader/ReaderInterface.php index cfda073..febb39e 100644 --- a/src/Reader/ReaderInterface.php +++ b/src/Reader/ReaderInterface.php @@ -17,13 +17,42 @@ namespace JerryYan\DSL\Reader; */ abstract class ReaderInterface { - protected $currentLine = 0; + protected $currentLine = 1; protected $currentPosition = 0; protected $currentLinePosition = 0; - #abstract public function getNextChar(): ?string; - #abstract public function getCurrentToken(): ?string; - #abstract public function getNextToken(): ?string; - #abstract public function moveToNextToken(): ?string; + protected $nextPosition = 0; + + /** + * 获取下一个字符 + * @return string + * @author Jerry Yan <792602257@qq.com> + * @date 2020/12/18 12:06 + */ + abstract public function getNextChar(): string; + + /** + * 获取当前识别符 + * @return string + * @author Jerry Yan <792602257@qq.com> + * @date 2020/12/18 12:06 + */ + abstract public function getCurrentToken(): string; + + /** + * 获取下一个识别符 + * @return string + * @author Jerry Yan <792602257@qq.com> + * @date 2020/12/18 12:06 + */ + abstract public function getNextToken(): string; + + /** + * 移动至下一个识别符 + * @return bool + * @author Jerry Yan <792602257@qq.com> + * @date 2020/12/18 12:06 + */ + abstract public function moveToNextToken(): bool; /** * 跳过当前行 @@ -31,7 +60,7 @@ abstract class ReaderInterface * @author Jerry Yan <792602257@qq.com> * @date 2020/12/17 15:43 */ - #abstract public function skipCurrentLine(): bool; + abstract public function skipCurrentLine(): bool; /** * 从当前位置跳到结束位置 @@ -40,5 +69,28 @@ abstract class ReaderInterface * @author Jerry Yan <792602257@qq.com> * @date 2020/12/17 15:43 */ - #abstract public function skipUntil(string $end="*/"): bool; + abstract public function skipUntil(string $end="*/"): bool; + + /** + * 重置读取器 + * @author Jerry Yan <792602257@qq.com> + * @date 2020/12/18 12:01 + */ + public function reset(): void + { + $this->currentLine = 1; + $this->currentPosition = 0; + $this->nextPosition = 0; + $this->moveToNextToken(); + } + + /** + * 重置读取器 + * @author Jerry Yan <792602257@qq.com> + * @date 2020/12/18 12:01 + */ + public function resetCursor(): void + { + $this->reset(); + } } \ No newline at end of file diff --git a/src/Reader/StreamReader.php b/src/Reader/StreamReader.php deleted file mode 100644 index e747621..0000000 --- a/src/Reader/StreamReader.php +++ /dev/null @@ -1,15 +0,0 @@ - - * @date 2020/12/17 14:58 - */ - - -namespace JerryYan\DSL\Reader; - - -class StreamReader extends ReaderInterface -{ - -} \ No newline at end of file diff --git a/src/Reader/StringReader.php b/src/Reader/StringReader.php new file mode 100644 index 0000000..6e6112b --- /dev/null +++ b/src/Reader/StringReader.php @@ -0,0 +1,124 @@ + + * @date 2020/12/17 14:58 + */ + + +namespace JerryYan\DSL\Reader; + + +class StringReader extends ReaderInterface +{ + protected $string; + protected $currentToken; + + public function __construct(string $string) + { + $this->string = $string; + $this->moveToNextToken(); + } + + /** + * @inheritDoc + */ + public function getNextChar(int $startAt = null): string + { + if ($startAt === null) $startAt = $this->currentPosition; + return mb_substr($this->string, $startAt, 1); + } + + /** + * @inheritDoc + */ + public function getCurrentToken(): string + { + return $this->currentToken; + } + + /** + * @inheritDoc + */ + public function getNextToken(): string + { + $curToken = ""; + $curPos = $this->nextPosition; + while ($curChar = $this->getNextChar($curPos)) { + switch ($curChar) { + case " ": + // 如果开始的时候就有空白,跳过它 + if (empty($curToken)) { + continue 2; + } + // 否则就结束(已经匹配完成) + break 2; + case "\r": + case "\n": + break 2; + default: + $curToken .= $curChar; + } + } + return $curToken; + } + + /** + * @inheritDoc + */ + public function moveToNextToken(): bool + { + $curToken = ""; + $this->currentPosition = $this->nextPosition; + while ($curChar = $this->getNextChar($this->nextPosition)) { + $this->nextPosition++; + $this->currentLinePosition++; + switch ($curChar) { + case " ": + // 如果开始的时候就有空白,跳过它 + if (empty($curToken)) { + $this->currentPosition++; + continue 2; + } + // 否则就结束(已经匹配完成) + break 2; + case "\r": + if ($this->getNextChar($this->nextPosition+1) === "\n") { + // CRLF换行 + $this->nextPosition+=2; + } + // CR换行 + $this->currentLine++; + $this->currentLinePosition=0; + break 2; + case "\n": + // LF换行 + $this->currentLine++; + $this->currentLinePosition=0; + break 2; + default: + $curToken .= $curChar; + } + } + $this->currentToken = $curToken; + return true; + } + + /** + * @inheritDoc + */ + public function skipCurrentLine(): bool + { + // TODO: Implement skipCurrentLine() method. + return true; + } + + /** + * @inheritDoc + */ + public function skipUntil(string $end = "*/"): bool + { + // TODO: Implement skipUntil() method. + return true; + } +} \ No newline at end of file diff --git a/src/Token/TokenAnd.php b/src/Token/TokenAnd.php index 19192ff..c592f95 100644 --- a/src/Token/TokenAnd.php +++ b/src/Token/TokenAnd.php @@ -9,7 +9,7 @@ namespace JerryYan\DSL\Token; -class TokenAnd +class TokenAnd extends TokenInterface { } \ No newline at end of file diff --git a/src/Token/TokenFactory.php b/src/Token/TokenFactory.php index 9fe4825..5dfd04d 100644 --- a/src/Token/TokenFactory.php +++ b/src/Token/TokenFactory.php @@ -12,8 +12,24 @@ namespace JerryYan\DSL\Token; class TokenFactory { /** @var array Token类型及映射类 */ - #private $tokenMap = [ - # Token::AND => TokenAnd::class, - #]; + private $tokenMap = [ + Token::AND => TokenAnd::class, + Token::OR => TokenOr::class, + ]; + protected $tokenNameMap = [ + + ]; + + public function getTokenByName(string $name): TokenInterface + { + if (!isset($this->tokenNameMap[$name])) { + return new TokenUndefined(); + } + $tokenType = $this->tokenNameMap[$name]; + if (!isset($this->tokenMap[$tokenType])) { + return new TokenUndefined(); + } + return new $this->tokenMap[$tokenType]; + } } \ No newline at end of file diff --git a/src/Token/TokenOr.php b/src/Token/TokenOr.php new file mode 100644 index 0000000..16d1516 --- /dev/null +++ b/src/Token/TokenOr.php @@ -0,0 +1,15 @@ + + * @date 2020/12/17 15:20 + */ + + +namespace JerryYan\DSL\Token; + + +class TokenOr extends TokenInterface +{ + +} \ No newline at end of file diff --git a/src/Token/TokenUndefined.php b/src/Token/TokenUndefined.php new file mode 100644 index 0000000..0c9344f --- /dev/null +++ b/src/Token/TokenUndefined.php @@ -0,0 +1,15 @@ + + * @date 2020/12/18 12:15 + */ + + +namespace JerryYan\DSL\Token; + + +class TokenUndefined extends TokenInterface +{ + +} \ No newline at end of file diff --git a/src/Tokenizer/Tokenizer.php b/src/Tokenizer/Tokenizer.php index c7b1958..a165ba2 100644 --- a/src/Tokenizer/Tokenizer.php +++ b/src/Tokenizer/Tokenizer.php @@ -9,7 +9,31 @@ namespace JerryYan\DSL\Tokenizer; +use ArrayIterator; +use JerryYan\DSL\Reader\ReaderInterface; +use JerryYan\DSL\Token\TokenFactory; + class Tokenizer extends TokenizerInterface { + public function __construct(TokenFactory $tokenFactory) + { + $this->tokenFactory = $tokenFactory; + } + + /** + * @inheritDoc + */ + function tokenize(ReaderInterface $reader): ArrayIterator + { + $reader->resetCursor(); + $tokens = []; + while($reader->moveToNextToken()) + { + $currentTokenName = $reader->getCurrentToken(); + $currentToken = $this->tokenFactory->getTokenByName($currentTokenName); + $tokens[] = $currentToken; + } + return new ArrayIterator($tokens); + } } \ No newline at end of file diff --git a/src/Tokenizer/TokenizerInterface.php b/src/Tokenizer/TokenizerInterface.php index 729b11e..45aeb39 100644 --- a/src/Tokenizer/TokenizerInterface.php +++ b/src/Tokenizer/TokenizerInterface.php @@ -9,7 +9,20 @@ namespace JerryYan\DSL\Tokenizer; +use ArrayIterator; +use JerryYan\DSL\Reader\ReaderInterface; +use JerryYan\DSL\Token\TokenInterface; + abstract class TokenizerInterface { + protected $tokenFactory; + /** + * + * @param ReaderInterface $reader + * @return ArrayIterator + * @author Jerry Yan <792602257@qq.com> + * @date 2020/12/18 11:43 + */ + abstract function tokenize(ReaderInterface $reader): ArrayIterator; } \ No newline at end of file diff --git a/tests/Reader/StringReaderTest.php b/tests/Reader/StringReaderTest.php new file mode 100644 index 0000000..304cbea --- /dev/null +++ b/tests/Reader/StringReaderTest.php @@ -0,0 +1,54 @@ + + * @date 2020/12/18 12:19 + */ + +namespace JerryYan\DSL\Test\Reader; + +use JerryYan\DSL\Reader\StringReader; +use PHPUnit\Framework\TestCase; + +class StringReaderTest extends TestCase +{ + protected $readerWithCn; + protected $reader; + protected function setUp(): void + { + $this->reader = new StringReader(" Ahhh This Is 一个 新的 TOken"); + $this->readerWithCn = new StringReader(" 中文 这是 Is 一个 新的 TOken"); + } + + public function testGetNextChar() + { + $this->reader->reset(); + $this->assertEquals('A', $this->reader->getNextChar(), "不匹配"); + $this->readerWithCn->reset(); + $this->assertEquals('中', $this->readerWithCn->getNextChar(), "不匹配"); + } + + public function testGetCurrentToken() + { + $this->reader->reset(); + $this->assertEquals('Ahhh', $this->reader->getCurrentToken(), "不匹配"); + $this->readerWithCn->reset(); + $this->assertEquals('中文', $this->readerWithCn->getCurrentToken(), "不匹配"); + } + + /** + * @author Jerry Yan <792602257@qq.com> + * @date 2020/12/18 14:16 + * @depends testGetNextChar + * @depends testGetCurrentToken + */ + public function testMoveToNextToken() + { + $this->reader->reset(); + $this->reader->moveToNextToken(); + $this->assertEquals('This', $this->reader->getCurrentToken(), "不匹配"); + $this->readerWithCn->reset(); + $this->readerWithCn->moveToNextToken(); + $this->assertEquals('这是', $this->readerWithCn->getCurrentToken(), "不匹配"); + } +}