diff --git a/spec/lexer.spec.js b/spec/lexer.spec.js index de0f81370..e24235594 100644 --- a/spec/lexer.spec.js +++ b/spec/lexer.spec.js @@ -1,6 +1,6 @@ -const lexerSpec = require('../src/services/search/lexer.js'); +const lexerSpec = require('../src/services/search/lexer'); -describe("Lexer", function() { +describe("Lexer fulltext", () => { it("simple lexing", () => { expect(lexerSpec("hello world").fulltextTokens) .toEqual(["hello", "world"]); @@ -26,4 +26,36 @@ describe("Lexer", function() { expect(lexerSpec("'unfinished quote").fulltextTokens) .toEqual(["unfinished quote"]); }); + + it("parenthesis and symbols in fulltext section are just normal characters", () => { + expect(lexerSpec("what's u=p ").fulltextTokens) + .toEqual(["what's", "u=p", ""]); + }); + + it("escaping special characters", () => { + expect(lexerSpec("hello \\#\\@\\'").fulltextTokens) + .toEqual(["hello", "#@'"]); + }); +}); + +describe("Lexer expression", () => { + it("simple attribute existence", () => { + expect(lexerSpec("#label @relation").expressionTokens) + .toEqual(["#label", "@relation"]); + }); + + it("simple label operators", () => { + expect(lexerSpec("#label*=*text").expressionTokens) + .toEqual(["#label", "*=*", "text"]); + }); + + it("spaces in attribute names and values", () => { + expect(lexerSpec(`#'long label'="hello o' world" @'long relation'`).expressionTokens) + .toEqual(["#long label", "=", "hello o' world", "@long relation"]); + }); + + it("complex expressions with and, or and parenthesis", () => { + expect(lexerSpec(`# (#label=text OR #second=text) AND @relation`).expressionTokens) + .toEqual(["#", "(", "#label", "=", "text", "OR", "#second", "=", "text", ")", "AND", "@relation"]); + }); }); diff --git a/src/services/search/lexer.js b/src/services/search/lexer.js index 02510079d..301355d30 100644 --- a/src/services/search/lexer.js +++ b/src/services/search/lexer.js @@ -5,12 +5,20 @@ function lexer(str) { let quotes = false; let fulltextEnded = false; let currentWord = ''; - let symbol = false; - function isSymbol(chr) { + function isOperatorSymbol(chr) { return ['=', '*', '>', '<', '!'].includes(chr); } + function previusOperatorSymbol() { + if (currentWord.length === 0) { + return false; + } + else { + return isOperatorSymbol(currentWord[currentWord.length - 1]); + } + } + function finishWord() { if (currentWord === '') { return; @@ -42,7 +50,11 @@ function lexer(str) { } else if (['"', "'", '`'].includes(chr)) { if (!quotes) { - if (currentWord.length === 0) { + if (currentWord.length === 0 || fulltextEnded) { + if (previusOperatorSymbol()) { + finishWord(); + } + quotes = chr; } else { @@ -63,19 +75,26 @@ function lexer(str) { continue; } else if (!quotes) { - if (chr === '#' || chr === '@') { + if (currentWord.length === 0 && (chr === '#' || chr === '@')) { fulltextEnded = true; + currentWord = chr; + continue; } else if (chr === ' ') { finishWord(); continue; } - else if (fulltextEnded && symbol !== isSymbol(chr)) { + else if (fulltextEnded && ['(', ')'].includes(chr)) { + finishWord(); + currentWord += chr; + finishWord(); + continue; + } + else if (fulltextEnded && previusOperatorSymbol() !== isOperatorSymbol(chr)) { finishWord(); currentWord += chr; - symbol = isSymbol(chr); continue; } }