From 1aeef5016bb542a0b94049d388216d8d1de724e4 Mon Sep 17 00:00:00 2001 From: Jerry Yan <792602257@qq.com> Date: Fri, 22 Jan 2021 17:32:15 +0800 Subject: [PATCH] =?UTF-8?q?=E6=AD=A3=E5=88=99=E5=8C=B9=E9=85=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 5 +++-- src/Token/Factory/DefaultFactory.php | 2 ++ src/Token/Factory/FactoryInterface.php | 21 +++++++++++++++++---- src/Token/Token.php | 5 ++++- src/Token/TokenInterface.php | 4 +++- src/Token/TokenNumber.php | 17 +++++++++++++++++ tests/Tokenizer/TokenizerTest.php | 6 +++++- 7 files changed, 51 insertions(+), 9 deletions(-) create mode 100644 src/Token/TokenNumber.php diff --git a/README.md b/README.md index ef826b0..07d5b7f 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,8 @@ - `Grammar` 语法解析器,验证语法及拆分语法 - `Output` 输出器,将解析的内容输出(并不执行) -- `Parser` 解析器,使用`Grammar`将内容解析成`Tokenizer`用的东西 +- `Lexer` 词法解析器,将语义转换成正常的语法供`Grammar`使用 - `Reader` 读取器,供`Tokenizer`读取使用 - `Token` 所有的Token -- `Tokenizer` Token生成器,生成Token用的 +- `Token/Factory` Token工厂,生成Token用的 +- `Tokenizer` 转换成Token用的 diff --git a/src/Token/Factory/DefaultFactory.php b/src/Token/Factory/DefaultFactory.php index 31ff88c..adca8c3 100644 --- a/src/Token/Factory/DefaultFactory.php +++ b/src/Token/Factory/DefaultFactory.php @@ -16,6 +16,7 @@ use JerryYan\DSL\Token\TokenLogicAnd; use JerryYan\DSL\Token\TokenLogicEqual; use JerryYan\DSL\Token\TokenLogicFake; use JerryYan\DSL\Token\TokenLogicOr; +use JerryYan\DSL\Token\TokenNumber; use JerryYan\DSL\Token\TokenUseVariable; use JerryYan\DSL\Token\TokenVariable; @@ -28,6 +29,7 @@ class DefaultFactory extends FactoryInterface Token::LOGIC_EQUAL => TokenLogicEqual::class, Token::LOGIC_FAKE => TokenLogicFake::class, Token::VARIABLE => TokenVariable::class, + Token::NUMBER => TokenNumber::class, Token::DEFINE => TokenDefine::class, Token::USE_VARIABLE => TokenUseVariable::class, ]; diff --git a/src/Token/Factory/FactoryInterface.php b/src/Token/Factory/FactoryInterface.php index 7672f52..dd36590 100644 --- a/src/Token/Factory/FactoryInterface.php +++ b/src/Token/Factory/FactoryInterface.php @@ -19,16 +19,23 @@ abstract class FactoryInterface protected $tokenMap = []; /** @var array> Token别名映射 */ protected $tokenNameMap = []; + /** @var array> Token别名映射 */ + protected $regexNameMap = []; /** @var class-string 默认Token类 */ protected $undefinedTokenClass = TokenUndefined::class; public function __construct() { + /** + * @var string $key + * @var TokenInterface $token + */ foreach ($this->tokenMap as $key=>$token) { - if (property_exists($token, "alias")){ - foreach ($token::$alias as $name) { - $this->tokenNameMap[$name] = $key; - } + foreach ($token::$alias as $name) { + $this->tokenNameMap[$name] = $key; + } + foreach ($token::$regexAlias as $name) { + $this->regexNameMap[$name] = $key; } } } @@ -38,6 +45,12 @@ abstract class FactoryInterface $originalName = $name; if (isset($this->tokenNameMap[$name])) { $name = $this->tokenNameMap[$name]; + } else { + foreach ($this->regexNameMap as $regex => $newName) { + if (preg_match($regex, $name) === 1) { + $name = $newName; break; + } + } } if (!isset($this->tokenMap[$name])) { return new $this->undefinedTokenClass($originalName); diff --git a/src/Token/Token.php b/src/Token/Token.php index 7d1b766..d79ac88 100644 --- a/src/Token/Token.php +++ b/src/Token/Token.php @@ -29,14 +29,17 @@ final class Token const LOGIC_GREATER_EQUAL = "LOGIC_GREATER_EQUAL"; const LOGIC_LESS = "LOGIC_LESS"; const LOGIC_LESS_EQUAL = "LOGIC_LESS_EQUAL"; - const LOGIC_FAKE = "LOGIC_FAKE"; // 变量相关 const DEFINE = "DEFINE"; const USE_VARIABLE = "USE_VARIABLE"; const VARIABLE = "VARIABLE"; const ASSIGN = "ASSIGN"; const CALL = "CALL"; + const NUMBER = "NUMBER"; + // 语义相关 const FAKE = "FAKE"; + const CURRY = "CURRY"; + const LOGIC_FAKE = "LOGIC_FAKE"; // 运算符相关 const OP_CONCAT = "OP_CONCAT"; const OP_CONCAT_EQUAL = "OP_CONCAT_EQUAL"; diff --git a/src/Token/TokenInterface.php b/src/Token/TokenInterface.php index c0e626d..d6667cf 100644 --- a/src/Token/TokenInterface.php +++ b/src/Token/TokenInterface.php @@ -18,7 +18,9 @@ abstract class TokenInterface /** @var string 原始数据 */ protected $_raw = ""; /** @var array 定义别名 */ - static $alias = []; + public static $alias = []; + /** @var array 定义正则别名 */ + public static $regexAlias = []; public function __construct(string $original) { diff --git a/src/Token/TokenNumber.php b/src/Token/TokenNumber.php new file mode 100644 index 0000000..1caf17d --- /dev/null +++ b/src/Token/TokenNumber.php @@ -0,0 +1,17 @@ + + * @date 2021/1/22 17:25 + */ + + +namespace JerryYan\DSL\Token; + + +class TokenNumber extends TokenInterface +{ + public static $regexAlias = [ + '/(-?(\d*\.?\d+|\d+\.?\d*)([Ee][+-]?\d+)?)/' + ]; +} \ No newline at end of file diff --git a/tests/Tokenizer/TokenizerTest.php b/tests/Tokenizer/TokenizerTest.php index 1edce68..876205a 100644 --- a/tests/Tokenizer/TokenizerTest.php +++ b/tests/Tokenizer/TokenizerTest.php @@ -11,8 +11,10 @@ use JerryYan\DSL\Reader\StringReader; use JerryYan\DSL\Token\Factory\DefaultFactory; use JerryYan\DSL\Token\TokenLogicAnd; use JerryYan\DSL\Token\TokenInterface; +use JerryYan\DSL\Token\TokenLogicEqual; use JerryYan\DSL\Token\TokenLogicFake; use JerryYan\DSL\Token\TokenLogicOr; +use JerryYan\DSL\Token\TokenNumber; use JerryYan\DSL\Token\TokenVariable; use JerryYan\DSL\Tokenizer\Tokenizer; use PHPUnit\Framework\TestCase; @@ -21,7 +23,7 @@ class TokenizerTest extends TestCase { protected $tokenizer; protected $reader; - private $text = "当 这个 和 那个 或者 那个 和 这个 的时候"; + private $text = "当 这个 和 那个 或者 那个 和 这个 等于 -0.5 的时候"; /** @var class-string[] 预期的类型 */ private $textTokenType = [ TokenLogicFake::class, @@ -32,6 +34,8 @@ class TokenizerTest extends TestCase TokenVariable::class, TokenLogicAnd::class, TokenVariable::class, + TokenLogicEqual::class, + TokenNumber::class, TokenLogicFake::class, ]; protected function setUp(): void