From 4c4be41f8b741ece501b3ababf6b2d1383df0cc5 Mon Sep 17 00:00:00 2001 From: Jerry Yan <792602257@qq.com> Date: Sat, 19 Dec 2020 12:46:43 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E4=B8=8B=E4=B8=80=E8=A1=8C?= =?UTF-8?q?=E7=A7=BB=E5=8A=A8=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Reader/ReaderInterface.php | 8 +- src/Reader/StringReader.php | 22 ++--- tests/Reader/StringReaderTest.php | 142 +++++++++++++++++------------- 3 files changed, 99 insertions(+), 73 deletions(-) diff --git a/src/Reader/ReaderInterface.php b/src/Reader/ReaderInterface.php index 3570aa5..224b805 100644 --- a/src/Reader/ReaderInterface.php +++ b/src/Reader/ReaderInterface.php @@ -19,9 +19,9 @@ abstract class ReaderInterface { protected $currentLine = 1; protected $currentPosition = 0; - protected $currentLinePosition = 0; protected $nextPosition = 0; protected $currentToken = ""; + protected $currentLineDelta = 0; /** * 获取下一个字符 @@ -73,6 +73,7 @@ abstract class ReaderInterface { $this->currentLine = 1; $this->currentPosition = 0; + $this->currentLineDelta = 0; $this->nextPosition = 0; $this->moveToNextToken(); } @@ -104,7 +105,7 @@ abstract class ReaderInterface public function getCurrentLinePosition(): int { - return $this->currentLinePosition; + return $this->currentPosition - $this->currentLineDelta; } public function getCurrentToken(): string @@ -115,13 +116,12 @@ abstract class ReaderInterface protected function moveCursorToNextChar(): void { $this->currentPosition++; - $this->currentLinePosition++; } protected function moveCursorToNextLine(int $chars = 1): void { $this->currentPosition += $chars; - $this->currentLinePosition = 0; + $this->currentLineDelta = $this->currentPosition; $this->currentLine++; } } \ No newline at end of file diff --git a/src/Reader/StringReader.php b/src/Reader/StringReader.php index 2b918c8..aedafaa 100644 --- a/src/Reader/StringReader.php +++ b/src/Reader/StringReader.php @@ -25,6 +25,7 @@ class StringReader extends ReaderInterface public function getNextChar(int $startAt = null): string { if ($startAt === null) $startAt = $this->currentPosition; + if (mb_strlen($this->string) <= $startAt) return ""; return mb_substr($this->string, $startAt, 1); } @@ -35,7 +36,7 @@ class StringReader extends ReaderInterface { $curToken = ""; $curPos = $this->nextPosition; - while ($curChar = $this->getNextChar($curPos)) { + while (mb_strlen($curChar = $this->getNextChar($curPos)) > 0) { $curPos++; switch ($curChar) { case " ": @@ -65,7 +66,7 @@ class StringReader extends ReaderInterface { $curToken = ""; $this->currentPosition = $this->nextPosition; - while ($curChar = $this->getNextChar($this->nextPosition)) { + while (mb_strlen($curChar = $this->getNextChar($this->nextPosition)) > 0) { $this->nextPosition++; switch ($curChar) { // TODO: 注释跳过 @@ -78,22 +79,24 @@ class StringReader extends ReaderInterface // 否则就结束(已经匹配完成) break 2; case "\r": - if ($this->getNextChar($this->nextPosition + 1) === "\n") { + if ($this->getNextChar($this->nextPosition) === "\n") { // CRLF换行 - $this->moveCursorToNextChar(); + if (empty($curToken)) { + $this->moveCursorToNextChar(); + } $this->nextPosition++; } // CR换行 - $this->moveCursorToNextLine(); if (empty($curToken)) { + $this->moveCursorToNextLine(); continue 2; } else { break 2; } case "\n": // LF换行 - $this->moveCursorToNextLine(); if (empty($curToken)) { + $this->moveCursorToNextLine(); continue 2; } else { break 2; @@ -103,7 +106,7 @@ class StringReader extends ReaderInterface } } $this->currentToken = $curToken; - return true; + return $curChar !== ""; } /** @@ -116,7 +119,7 @@ class StringReader extends ReaderInterface $curPos++; switch ($curChar) { case "\r": - if ($this->getNextChar($this->nextPosition + 1) === "\n") { + if ($this->getNextChar($curPos) === "\n") { // CRLF换行 $curPos++; } @@ -126,8 +129,7 @@ class StringReader extends ReaderInterface } } $this->nextPosition = $curPos; - $this->currentLine++; - $this->currentLinePosition = 0; + $this->moveCursorToNextLine($curPos - $this->currentPosition); return $this->moveToNextToken(); } diff --git a/tests/Reader/StringReaderTest.php b/tests/Reader/StringReaderTest.php index 428de43..e7faf2a 100644 --- a/tests/Reader/StringReaderTest.php +++ b/tests/Reader/StringReaderTest.php @@ -13,33 +13,52 @@ use PHPUnit\Framework\TestCase; class StringReaderTest extends TestCase { protected $readerWithCrLf; - protected $readerWithCn; protected $reader; + protected $things; + protected $thingsWithCrLf; + protected function setUp(): void { - $this->reader = new StringReader(" Ahhh This Is 一个 新的 TOken"); - $this->readerWithCn = new StringReader(" 中文 这是 Is 一个 新的 TOken"); - $this->readerWithCrLf = new StringReader(" 中文 \r\n\r 这是 \r Is \n 一个 新的 TOken"); + $this->things = [ + 'original' => " Ahhh This Is 一个 新的 TOken", + 'tokens' => ["Ahhh", "This", "Is", "一个", "新的", "TOken"], + 'nextTokens' => ["This", "Is", "一个", "新的", "TOken", ""], + 'positions' => [1, 7, 12, 15, 18, 21], + 'lines' => [1, 1, 1, 1, 1, 1], + 'linePositions' => [1, 7, 12, 15, 18, 21], + 'moveToNextLines' => [], + ]; + $this->thingsWithCrLf = [ + 'original' => " 中文 \r\n\r 这是 \r Is \n\n 一个 新的 TOken", + 'tokens' => ["中文", "这是", "Is", "一个", "新的", "TOken"], + 'nextTokens' => ["这是", "Is", "一个", "新的", "TOken", ""], + 'positions' => [1, 8, 13, 19, 22, 25], + 'lines' => [1, 3, 4, 6, 6, 6], + 'linePositions' => [1, 1, 1, 1, 4, 7], + 'moveToNextLines' => [1, 2, 3], + ]; + $this->reader = new StringReader($this->things['original']); + $this->readerWithCrLf = new StringReader($this->thingsWithCrLf['original']); } public function testGetNextChar() { $this->reader->reset(); - $this->assertEquals('A', $this->reader->getNextChar(), "不匹配"); - $this->assertEquals(1, $this->reader->getCurrentPosition(), "CurPos与预计不符"); - $this->readerWithCn->reset(); - $this->assertEquals('中', $this->readerWithCn->getNextChar(), "不匹配"); - $this->assertEquals(1, $this->readerWithCn->getCurrentPosition(), "CurPos与预计不符"); + $this->assertEquals(mb_substr(trim($this->things['original']), 0, 1), $this->reader->getNextChar(), "不匹配"); + $this->assertEquals($this->things['positions'][0], $this->reader->getCurrentPosition(), "CurPos与预计不符"); + $this->readerWithCrLf->reset(); + $this->assertEquals(mb_substr(trim($this->thingsWithCrLf['original']), 0, 1), $this->readerWithCrLf->getNextChar(), "不匹配"); + $this->assertEquals($this->thingsWithCrLf['positions'][0], $this->readerWithCrLf->getCurrentPosition(), "CurPos与预计不符"); } public function testGetCurrentToken() { $this->reader->reset(); - $this->assertEquals('Ahhh', $this->reader->getCurrentToken(), "不匹配"); - $this->assertEquals(6, $this->reader->getNextPosition(), "NextPos与预计不符"); - $this->readerWithCn->reset(); - $this->assertEquals('中文', $this->readerWithCn->getCurrentToken(), "不匹配"); - $this->assertEquals(4, $this->readerWithCn->getNextPosition(), "NextPos与预计不符"); + $this->assertEquals($this->things['positions'][0], $this->reader->getCurrentPosition(), "CurPos与预计不符"); + $this->assertEquals($this->things['tokens'][0], $this->reader->getCurrentToken(), "不匹配"); + $this->readerWithCrLf->reset(); + $this->assertEquals($this->thingsWithCrLf['positions'][0], $this->readerWithCrLf->getCurrentPosition(), "CurPos与预计不符"); + $this->assertEquals($this->thingsWithCrLf['tokens'][0], $this->readerWithCrLf->getCurrentToken(), "不匹配"); } /** @@ -53,25 +72,38 @@ class StringReaderTest extends TestCase public function testMoveToNextToken() { $this->reader->reset(); - $oldCurToken = $this->reader->getCurrentToken(); - $oldNextPos = $this->reader->getNextPosition(); - $oldNextToken = $this->reader->getNextToken(); - $this->reader->moveToNextToken(); - $this->assertNotEquals($oldCurToken, $this->reader->getCurrentPosition(), "CurToken与旧CurToken相同"); - $this->assertNotEquals($oldNextPos, $this->reader->getNextPosition(), "NextPos与旧NextPos相同"); - $this->assertEquals($oldNextToken, $this->reader->getCurrentToken(), "不匹配"); - $this->assertEquals(7, $this->reader->getCurrentPosition(), "CurPos与预计不符"); - $this->assertNotEquals($this->reader->getNextPosition(), $this->reader->getCurrentPosition(), "CurPos与NextPos相同"); - // CJK Support - $this->readerWithCn->reset(); - $oldCurTokenCn = $this->readerWithCn->getCurrentToken(); - $oldNextPosCn = $this->readerWithCn->getNextPosition(); - $oldNextTokenCn = $this->readerWithCn->getNextToken(); - $this->readerWithCn->moveToNextToken(); - $this->assertNotEquals($oldCurTokenCn, $this->readerWithCn->getCurrentPosition(), "CurToken与旧CurToken相同"); - $this->assertNotEquals($oldNextPosCn, $this->readerWithCn->getNextPosition(), "NextPos与旧NextPos相同"); - $this->assertEquals($oldNextTokenCn, $this->readerWithCn->getCurrentToken(), "不匹配"); - $this->assertEquals(5, $this->readerWithCn->getCurrentPosition(), "CurPos与预计不符"); + foreach ($this->things['nextTokens'] as $key=> $nextToken){ + $oldCurToken = $this->reader->getCurrentToken(); + $oldNextPos = $this->reader->getNextPosition(); + $oldNextToken = $this->reader->getNextToken(); + $this->assertEquals($nextToken, $oldNextToken, "不匹配"); + $this->assertEquals($this->things['positions'][$key], $this->reader->getCurrentPosition(), "CurPos与预计不符"); + $this->assertEquals($this->things['lines'][$key], $this->reader->getCurrentLine(), "CurLine与预计不符"); + $this->assertEquals($this->things['linePositions'][$key], $this->reader->getCurrentLinePosition(), "CLPos与预计不符"); + $hasNext = $this->reader->moveToNextToken(); + if ($hasNext) { + $this->assertNotEquals($oldCurToken, $this->reader->getCurrentToken(), "CurToken与旧CurToken相同"); + $this->assertNotEquals($oldNextPos, $this->reader->getNextPosition(), "NextPos与旧NextPos相同"); + $this->assertNotEquals($this->reader->getNextPosition(), $this->reader->getCurrentPosition(), "CurPos与NextPos相同"); + } + } + // Cr/LF Support + $this->readerWithCrLf->reset(); + foreach ($this->thingsWithCrLf['nextTokens'] as $key=> $nextToken){ + $oldCurToken = $this->readerWithCrLf->getCurrentToken(); + $oldNextPos = $this->readerWithCrLf->getNextPosition(); + $oldNextToken = $this->readerWithCrLf->getNextToken(); + $this->assertEquals($nextToken, $oldNextToken, "不匹配"); + $this->assertEquals($this->thingsWithCrLf['positions'][$key], $this->readerWithCrLf->getCurrentPosition(), "CurPos与预计不符"); + $this->assertEquals($this->thingsWithCrLf['lines'][$key], $this->readerWithCrLf->getCurrentLine(), "CurLine与预计不符"); + $this->assertEquals($this->thingsWithCrLf['linePositions'][$key], $this->readerWithCrLf->getCurrentLinePosition(), "CLPos与预计不符"); + $hasNext = $this->readerWithCrLf->moveToNextToken(); + if ($hasNext) { + $this->assertNotEquals($oldCurToken, $this->readerWithCrLf->getCurrentToken(), "CurToken与旧CurToken相同"); + $this->assertNotEquals($oldNextPos, $this->readerWithCrLf->getNextPosition(), "NextPos与旧NextPos相同"); + $this->assertNotEquals($this->readerWithCrLf->getNextPosition(), $this->readerWithCrLf->getCurrentPosition(), "CurPos与NextPos相同"); + } + } } /** @@ -82,45 +114,37 @@ class StringReaderTest extends TestCase public function testGetNextToken() { $this->reader->reset(); - $curPos = $this->readerWithCn->getCurrentPosition(); $nextPos = $this->reader->getNextPosition(); $string = $this->reader->getNextToken(); $this->assertEquals($string, $this->reader->getNextToken(), "不匹配"); $this->assertEquals($this->reader->getNextToken(), $this->reader->getNextToken(), "不匹配"); - $this->assertEquals($curPos, $this->reader->getCurrentPosition(), "CurPos不可以发生变化"); $this->assertEquals($nextPos, $this->reader->getNextPosition(), "NextPos不可以发生变化"); } public function testSkipCurrentLine() { $this->readerWithCrLf->resetCursor(); - $this->readerWithCrLf->skipCurrentLine(); - // moveToNextToken又移动了 - $this->assertEquals(3, $this->readerWithCrLf->getCurrentLine(), "行号不匹配"); - $this->assertEquals(1, $this->readerWithCrLf->getCurrentLinePosition(), "CLPos不匹配"); - $this->assertEquals("这是", $this->readerWithCrLf->getCurrentToken(), "Token不匹配"); - $this->assertEquals(8, $this->readerWithCrLf->getCurrentPosition(), "CurPos不匹配"); - $this->readerWithCrLf->skipCurrentLine(); - $this->assertEquals(4, $this->readerWithCrLf->getCurrentLine(), "行号不匹配"); - $this->assertEquals(1, $this->readerWithCrLf->getCurrentLinePosition(), "CLPos不匹配"); - $this->assertEquals("Is", $this->readerWithCrLf->getCurrentToken(), "Token不匹配"); - $this->assertEquals(13, $this->readerWithCrLf->getCurrentPosition(), "CurPos不匹配"); - $this->readerWithCrLf->skipCurrentLine(); - $this->assertEquals(5, $this->readerWithCrLf->getCurrentLine(), "行号不匹配"); - $this->assertEquals(1, $this->readerWithCrLf->getCurrentLinePosition(), "CLPos不匹配"); - $this->assertEquals("一个", $this->readerWithCrLf->getCurrentToken(), "Token不匹配"); - $this->assertEquals(18, $this->readerWithCrLf->getCurrentPosition(), "CurPos不匹配"); + foreach ($this->thingsWithCrLf['moveToNextLines'] as $key){ + $this->readerWithCrLf->skipCurrentLine(); + $this->assertEquals($this->thingsWithCrLf['lines'][$key], $this->readerWithCrLf->getCurrentLine(), "行号不匹配"); + $this->assertEquals($this->thingsWithCrLf['linePositions'][$key], $this->readerWithCrLf->getCurrentLinePosition(), "CLPos不匹配"); + $this->assertEquals($this->thingsWithCrLf['tokens'][$key], $this->readerWithCrLf->getCurrentToken(), "Token不匹配"); + $this->assertEquals($this->thingsWithCrLf['positions'][$key], $this->readerWithCrLf->getCurrentPosition(), "CurPos不匹配"); + $this->assertEquals($this->thingsWithCrLf['nextTokens'][$key], $this->readerWithCrLf->getNextToken(), "NextToken不匹配"); + } } public function testResetCursor() { - $this->reader->moveToNextToken(); - $curPos = $this->reader->getCurrentPosition(); - $nextPos = $this->reader->getNextPosition(); - $string = $this->reader->getCurrentToken(); - $this->reader->resetCursor(); - $this->assertNotEquals($curPos, $this->reader->getCurrentPosition(), "CurPos未发生变化"); - $this->assertNotEquals($nextPos, $this->reader->getNextPosition(), "NextPos未发生变化"); - $this->assertNotEquals($string, $this->reader->getCurrentToken(), "CurToken未发生变化"); + $this->readerWithCrLf->moveToNextToken(); + $curPos = $this->readerWithCrLf->getCurrentPosition(); + $nextPos = $this->readerWithCrLf->getNextPosition(); + $curLine = $this->readerWithCrLf->getCurrentLine(); + $string = $this->readerWithCrLf->getCurrentToken(); + $this->readerWithCrLf->resetCursor(); + $this->assertNotEquals($curPos, $this->readerWithCrLf->getCurrentPosition(), "CurPos未发生变化"); + $this->assertNotEquals($nextPos, $this->readerWithCrLf->getNextPosition(), "NextPos未发生变化"); + $this->assertNotEquals($curLine, $this->readerWithCrLf->getCurrentLine(), "CurLine未发生变化"); + $this->assertNotEquals($string, $this->readerWithCrLf->getCurrentToken(), "CurToken未发生变化"); } }