优化下一行移动逻辑

This commit is contained in:
Jerry Yan 2020-12-19 12:46:43 +08:00
parent f995b932e4
commit 4c4be41f8b
3 changed files with 99 additions and 73 deletions

View File

@ -19,9 +19,9 @@ abstract class ReaderInterface
{
protected $currentLine = 1;
protected $currentPosition = 0;
protected $currentLinePosition = 0;
protected $nextPosition = 0;
protected $currentToken = "";
protected $currentLineDelta = 0;
/**
* 获取下一个字符
@ -73,6 +73,7 @@ abstract class ReaderInterface
{
$this->currentLine = 1;
$this->currentPosition = 0;
$this->currentLineDelta = 0;
$this->nextPosition = 0;
$this->moveToNextToken();
}
@ -104,7 +105,7 @@ abstract class ReaderInterface
public function getCurrentLinePosition(): int
{
return $this->currentLinePosition;
return $this->currentPosition - $this->currentLineDelta;
}
public function getCurrentToken(): string
@ -115,13 +116,12 @@ abstract class ReaderInterface
protected function moveCursorToNextChar(): void
{
$this->currentPosition++;
$this->currentLinePosition++;
}
protected function moveCursorToNextLine(int $chars = 1): void
{
$this->currentPosition += $chars;
$this->currentLinePosition = 0;
$this->currentLineDelta = $this->currentPosition;
$this->currentLine++;
}
}

View File

@ -25,6 +25,7 @@ class StringReader extends ReaderInterface
public function getNextChar(int $startAt = null): string
{
if ($startAt === null) $startAt = $this->currentPosition;
if (mb_strlen($this->string) <= $startAt) return "";
return mb_substr($this->string, $startAt, 1);
}
@ -35,7 +36,7 @@ class StringReader extends ReaderInterface
{
$curToken = "";
$curPos = $this->nextPosition;
while ($curChar = $this->getNextChar($curPos)) {
while (mb_strlen($curChar = $this->getNextChar($curPos)) > 0) {
$curPos++;
switch ($curChar) {
case " ":
@ -65,7 +66,7 @@ class StringReader extends ReaderInterface
{
$curToken = "";
$this->currentPosition = $this->nextPosition;
while ($curChar = $this->getNextChar($this->nextPosition)) {
while (mb_strlen($curChar = $this->getNextChar($this->nextPosition)) > 0) {
$this->nextPosition++;
switch ($curChar) {
// TODO: 注释跳过
@ -78,22 +79,24 @@ class StringReader extends ReaderInterface
// 否则就结束(已经匹配完成)
break 2;
case "\r":
if ($this->getNextChar($this->nextPosition + 1) === "\n") {
if ($this->getNextChar($this->nextPosition) === "\n") {
// CRLF换行
$this->moveCursorToNextChar();
if (empty($curToken)) {
$this->moveCursorToNextChar();
}
$this->nextPosition++;
}
// CR换行
$this->moveCursorToNextLine();
if (empty($curToken)) {
$this->moveCursorToNextLine();
continue 2;
} else {
break 2;
}
case "\n":
// LF换行
$this->moveCursorToNextLine();
if (empty($curToken)) {
$this->moveCursorToNextLine();
continue 2;
} else {
break 2;
@ -103,7 +106,7 @@ class StringReader extends ReaderInterface
}
}
$this->currentToken = $curToken;
return true;
return $curChar !== "";
}
/**
@ -116,7 +119,7 @@ class StringReader extends ReaderInterface
$curPos++;
switch ($curChar) {
case "\r":
if ($this->getNextChar($this->nextPosition + 1) === "\n") {
if ($this->getNextChar($curPos) === "\n") {
// CRLF换行
$curPos++;
}
@ -126,8 +129,7 @@ class StringReader extends ReaderInterface
}
}
$this->nextPosition = $curPos;
$this->currentLine++;
$this->currentLinePosition = 0;
$this->moveCursorToNextLine($curPos - $this->currentPosition);
return $this->moveToNextToken();
}

View File

@ -13,33 +13,52 @@ use PHPUnit\Framework\TestCase;
class StringReaderTest extends TestCase
{
protected $readerWithCrLf;
protected $readerWithCn;
protected $reader;
protected $things;
protected $thingsWithCrLf;
protected function setUp(): void
{
$this->reader = new StringReader(" Ahhh This Is 一个 新的 TOken");
$this->readerWithCn = new StringReader(" 中文 这是 Is 一个 新的 TOken");
$this->readerWithCrLf = new StringReader(" 中文 \r\n\r 这是 \r Is \n 一个 新的 TOken");
$this->things = [
'original' => " Ahhh This Is 一个 新的 TOken",
'tokens' => ["Ahhh", "This", "Is", "一个", "新的", "TOken"],
'nextTokens' => ["This", "Is", "一个", "新的", "TOken", ""],
'positions' => [1, 7, 12, 15, 18, 21],
'lines' => [1, 1, 1, 1, 1, 1],
'linePositions' => [1, 7, 12, 15, 18, 21],
'moveToNextLines' => [],
];
$this->thingsWithCrLf = [
'original' => " 中文 \r\n\r 这是 \r Is \n\n 一个 新的 TOken",
'tokens' => ["中文", "这是", "Is", "一个", "新的", "TOken"],
'nextTokens' => ["这是", "Is", "一个", "新的", "TOken", ""],
'positions' => [1, 8, 13, 19, 22, 25],
'lines' => [1, 3, 4, 6, 6, 6],
'linePositions' => [1, 1, 1, 1, 4, 7],
'moveToNextLines' => [1, 2, 3],
];
$this->reader = new StringReader($this->things['original']);
$this->readerWithCrLf = new StringReader($this->thingsWithCrLf['original']);
}
public function testGetNextChar()
{
$this->reader->reset();
$this->assertEquals('A', $this->reader->getNextChar(), "不匹配");
$this->assertEquals(1, $this->reader->getCurrentPosition(), "CurPos与预计不符");
$this->readerWithCn->reset();
$this->assertEquals('中', $this->readerWithCn->getNextChar(), "不匹配");
$this->assertEquals(1, $this->readerWithCn->getCurrentPosition(), "CurPos与预计不符");
$this->assertEquals(mb_substr(trim($this->things['original']), 0, 1), $this->reader->getNextChar(), "不匹配");
$this->assertEquals($this->things['positions'][0], $this->reader->getCurrentPosition(), "CurPos与预计不符");
$this->readerWithCrLf->reset();
$this->assertEquals(mb_substr(trim($this->thingsWithCrLf['original']), 0, 1), $this->readerWithCrLf->getNextChar(), "不匹配");
$this->assertEquals($this->thingsWithCrLf['positions'][0], $this->readerWithCrLf->getCurrentPosition(), "CurPos与预计不符");
}
public function testGetCurrentToken()
{
$this->reader->reset();
$this->assertEquals('Ahhh', $this->reader->getCurrentToken(), "不匹配");
$this->assertEquals(6, $this->reader->getNextPosition(), "NextPos与预计不符");
$this->readerWithCn->reset();
$this->assertEquals('中文', $this->readerWithCn->getCurrentToken(), "不匹配");
$this->assertEquals(4, $this->readerWithCn->getNextPosition(), "NextPos与预计不符");
$this->assertEquals($this->things['positions'][0], $this->reader->getCurrentPosition(), "CurPos与预计不符");
$this->assertEquals($this->things['tokens'][0], $this->reader->getCurrentToken(), "不匹配");
$this->readerWithCrLf->reset();
$this->assertEquals($this->thingsWithCrLf['positions'][0], $this->readerWithCrLf->getCurrentPosition(), "CurPos与预计不符");
$this->assertEquals($this->thingsWithCrLf['tokens'][0], $this->readerWithCrLf->getCurrentToken(), "不匹配");
}
/**
@ -53,25 +72,38 @@ class StringReaderTest extends TestCase
public function testMoveToNextToken()
{
$this->reader->reset();
$oldCurToken = $this->reader->getCurrentToken();
$oldNextPos = $this->reader->getNextPosition();
$oldNextToken = $this->reader->getNextToken();
$this->reader->moveToNextToken();
$this->assertNotEquals($oldCurToken, $this->reader->getCurrentPosition(), "CurToken与旧CurToken相同");
$this->assertNotEquals($oldNextPos, $this->reader->getNextPosition(), "NextPos与旧NextPos相同");
$this->assertEquals($oldNextToken, $this->reader->getCurrentToken(), "不匹配");
$this->assertEquals(7, $this->reader->getCurrentPosition(), "CurPos与预计不符");
$this->assertNotEquals($this->reader->getNextPosition(), $this->reader->getCurrentPosition(), "CurPos与NextPos相同");
// CJK Support
$this->readerWithCn->reset();
$oldCurTokenCn = $this->readerWithCn->getCurrentToken();
$oldNextPosCn = $this->readerWithCn->getNextPosition();
$oldNextTokenCn = $this->readerWithCn->getNextToken();
$this->readerWithCn->moveToNextToken();
$this->assertNotEquals($oldCurTokenCn, $this->readerWithCn->getCurrentPosition(), "CurToken与旧CurToken相同");
$this->assertNotEquals($oldNextPosCn, $this->readerWithCn->getNextPosition(), "NextPos与旧NextPos相同");
$this->assertEquals($oldNextTokenCn, $this->readerWithCn->getCurrentToken(), "不匹配");
$this->assertEquals(5, $this->readerWithCn->getCurrentPosition(), "CurPos与预计不符");
foreach ($this->things['nextTokens'] as $key=> $nextToken){
$oldCurToken = $this->reader->getCurrentToken();
$oldNextPos = $this->reader->getNextPosition();
$oldNextToken = $this->reader->getNextToken();
$this->assertEquals($nextToken, $oldNextToken, "不匹配");
$this->assertEquals($this->things['positions'][$key], $this->reader->getCurrentPosition(), "CurPos与预计不符");
$this->assertEquals($this->things['lines'][$key], $this->reader->getCurrentLine(), "CurLine与预计不符");
$this->assertEquals($this->things['linePositions'][$key], $this->reader->getCurrentLinePosition(), "CLPos与预计不符");
$hasNext = $this->reader->moveToNextToken();
if ($hasNext) {
$this->assertNotEquals($oldCurToken, $this->reader->getCurrentToken(), "CurToken与旧CurToken相同");
$this->assertNotEquals($oldNextPos, $this->reader->getNextPosition(), "NextPos与旧NextPos相同");
$this->assertNotEquals($this->reader->getNextPosition(), $this->reader->getCurrentPosition(), "CurPos与NextPos相同");
}
}
// Cr/LF Support
$this->readerWithCrLf->reset();
foreach ($this->thingsWithCrLf['nextTokens'] as $key=> $nextToken){
$oldCurToken = $this->readerWithCrLf->getCurrentToken();
$oldNextPos = $this->readerWithCrLf->getNextPosition();
$oldNextToken = $this->readerWithCrLf->getNextToken();
$this->assertEquals($nextToken, $oldNextToken, "不匹配");
$this->assertEquals($this->thingsWithCrLf['positions'][$key], $this->readerWithCrLf->getCurrentPosition(), "CurPos与预计不符");
$this->assertEquals($this->thingsWithCrLf['lines'][$key], $this->readerWithCrLf->getCurrentLine(), "CurLine与预计不符");
$this->assertEquals($this->thingsWithCrLf['linePositions'][$key], $this->readerWithCrLf->getCurrentLinePosition(), "CLPos与预计不符");
$hasNext = $this->readerWithCrLf->moveToNextToken();
if ($hasNext) {
$this->assertNotEquals($oldCurToken, $this->readerWithCrLf->getCurrentToken(), "CurToken与旧CurToken相同");
$this->assertNotEquals($oldNextPos, $this->readerWithCrLf->getNextPosition(), "NextPos与旧NextPos相同");
$this->assertNotEquals($this->readerWithCrLf->getNextPosition(), $this->readerWithCrLf->getCurrentPosition(), "CurPos与NextPos相同");
}
}
}
/**
@ -82,45 +114,37 @@ class StringReaderTest extends TestCase
public function testGetNextToken()
{
$this->reader->reset();
$curPos = $this->readerWithCn->getCurrentPosition();
$nextPos = $this->reader->getNextPosition();
$string = $this->reader->getNextToken();
$this->assertEquals($string, $this->reader->getNextToken(), "不匹配");
$this->assertEquals($this->reader->getNextToken(), $this->reader->getNextToken(), "不匹配");
$this->assertEquals($curPos, $this->reader->getCurrentPosition(), "CurPos不可以发生变化");
$this->assertEquals($nextPos, $this->reader->getNextPosition(), "NextPos不可以发生变化");
}
public function testSkipCurrentLine()
{
$this->readerWithCrLf->resetCursor();
$this->readerWithCrLf->skipCurrentLine();
// moveToNextToken又移动了
$this->assertEquals(3, $this->readerWithCrLf->getCurrentLine(), "行号不匹配");
$this->assertEquals(1, $this->readerWithCrLf->getCurrentLinePosition(), "CLPos不匹配");
$this->assertEquals("这是", $this->readerWithCrLf->getCurrentToken(), "Token不匹配");
$this->assertEquals(8, $this->readerWithCrLf->getCurrentPosition(), "CurPos不匹配");
$this->readerWithCrLf->skipCurrentLine();
$this->assertEquals(4, $this->readerWithCrLf->getCurrentLine(), "行号不匹配");
$this->assertEquals(1, $this->readerWithCrLf->getCurrentLinePosition(), "CLPos不匹配");
$this->assertEquals("Is", $this->readerWithCrLf->getCurrentToken(), "Token不匹配");
$this->assertEquals(13, $this->readerWithCrLf->getCurrentPosition(), "CurPos不匹配");
$this->readerWithCrLf->skipCurrentLine();
$this->assertEquals(5, $this->readerWithCrLf->getCurrentLine(), "行号不匹配");
$this->assertEquals(1, $this->readerWithCrLf->getCurrentLinePosition(), "CLPos不匹配");
$this->assertEquals("一个", $this->readerWithCrLf->getCurrentToken(), "Token不匹配");
$this->assertEquals(18, $this->readerWithCrLf->getCurrentPosition(), "CurPos不匹配");
foreach ($this->thingsWithCrLf['moveToNextLines'] as $key){
$this->readerWithCrLf->skipCurrentLine();
$this->assertEquals($this->thingsWithCrLf['lines'][$key], $this->readerWithCrLf->getCurrentLine(), "行号不匹配");
$this->assertEquals($this->thingsWithCrLf['linePositions'][$key], $this->readerWithCrLf->getCurrentLinePosition(), "CLPos不匹配");
$this->assertEquals($this->thingsWithCrLf['tokens'][$key], $this->readerWithCrLf->getCurrentToken(), "Token不匹配");
$this->assertEquals($this->thingsWithCrLf['positions'][$key], $this->readerWithCrLf->getCurrentPosition(), "CurPos不匹配");
$this->assertEquals($this->thingsWithCrLf['nextTokens'][$key], $this->readerWithCrLf->getNextToken(), "NextToken不匹配");
}
}
public function testResetCursor()
{
$this->reader->moveToNextToken();
$curPos = $this->reader->getCurrentPosition();
$nextPos = $this->reader->getNextPosition();
$string = $this->reader->getCurrentToken();
$this->reader->resetCursor();
$this->assertNotEquals($curPos, $this->reader->getCurrentPosition(), "CurPos未发生变化");
$this->assertNotEquals($nextPos, $this->reader->getNextPosition(), "NextPos未发生变化");
$this->assertNotEquals($string, $this->reader->getCurrentToken(), "CurToken未发生变化");
$this->readerWithCrLf->moveToNextToken();
$curPos = $this->readerWithCrLf->getCurrentPosition();
$nextPos = $this->readerWithCrLf->getNextPosition();
$curLine = $this->readerWithCrLf->getCurrentLine();
$string = $this->readerWithCrLf->getCurrentToken();
$this->readerWithCrLf->resetCursor();
$this->assertNotEquals($curPos, $this->readerWithCrLf->getCurrentPosition(), "CurPos未发生变化");
$this->assertNotEquals($nextPos, $this->readerWithCrLf->getNextPosition(), "NextPos未发生变化");
$this->assertNotEquals($curLine, $this->readerWithCrLf->getCurrentLine(), "CurLine未发生变化");
$this->assertNotEquals($string, $this->readerWithCrLf->getCurrentToken(), "CurToken未发生变化");
}
}