| 
									
										
										
										
											2024-07-18 21:35:17 +03:00
										 |  |  | import lex from "../../src/services/search/services/lex.js"; | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | describe("Lexer fulltext", () => { | 
					
						
							|  |  |  |   it("simple lexing", () => { | 
					
						
							|  |  |  |     expect(lex("hello world").fulltextTokens.map((t) => t.token)).toEqual([ | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     "hello", | 
					
						
							|  |  |  |     "world", | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ]); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     expect(lex("hello, world").fulltextTokens.map((t) => t.token)).toEqual([ | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     "hello", | 
					
						
							|  |  |  |     "world", | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ]); | 
					
						
							|  |  |  |   }); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   it("use quotes to keep words together", () => { | 
					
						
							|  |  |  |     expect( | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     lex("'hello world' my friend").fulltextTokens.map((t) => t.token) | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ).toEqual(["hello world", "my", "friend"]); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     expect( | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     lex('"hello world" my friend').fulltextTokens.map((t) => t.token) | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ).toEqual(["hello world", "my", "friend"]); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     expect( | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     lex("`hello world` my friend").fulltextTokens.map((t) => t.token) | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ).toEqual(["hello world", "my", "friend"]); | 
					
						
							|  |  |  |   }); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   it("you can use different quotes and other special characters inside quotes", () => { | 
					
						
							|  |  |  |     expect( | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     lex("'i can use \" or ` or #~=*' without problem").fulltextTokens.map( | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |         (t) => t.token | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     ) | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ).toEqual(['i can use " or ` or #~=*', "without", "problem"]); | 
					
						
							|  |  |  |   }); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   it("I can use backslash to escape quotes", () => { | 
					
						
							|  |  |  |     expect(lex('hello \\"world\\"').fulltextTokens.map((t) => t.token)).toEqual( | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     ["hello", '"world"'] | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     expect(lex("hello \\'world\\'").fulltextTokens.map((t) => t.token)).toEqual( | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     ["hello", "'world'"] | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     expect(lex("hello \\`world\\`").fulltextTokens.map((t) => t.token)).toEqual( | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     ["hello", "`world`"] | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     expect( | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     lex('"hello \\"world\\"').fulltextTokens.map((t) => t.token) | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ).toEqual(['hello "world"']); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     expect( | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     lex("'hello \\'world\\''").fulltextTokens.map((t) => t.token) | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ).toEqual(["hello 'world'"]); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     expect( | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     lex("`hello \\`world\\``").fulltextTokens.map((t) => t.token) | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ).toEqual(["hello `world`"]); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     expect(lex("\\#token").fulltextTokens.map((t) => t.token)).toEqual([ | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     "#token", | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ]); | 
					
						
							|  |  |  |   }); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   it("quote inside a word does not have a special meaning", () => { | 
					
						
							|  |  |  |     const lexResult = lex("d'Artagnan is dead #hero = d'Artagnan"); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     expect(lexResult.fulltextTokens.map((t) => t.token)).toEqual([ | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     "d'artagnan", | 
					
						
							|  |  |  |     "is", | 
					
						
							|  |  |  |     "dead", | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ]); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     expect(lexResult.expressionTokens.map((t) => t.token)).toEqual([ | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     "#hero", | 
					
						
							|  |  |  |     "=", | 
					
						
							|  |  |  |     "d'artagnan", | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ]); | 
					
						
							|  |  |  |   }); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   it("if quote is not ended then it's just one long token", () => { | 
					
						
							|  |  |  |     expect(lex("'unfinished quote").fulltextTokens.map((t) => t.token)).toEqual( | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     ["unfinished quote"] | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ); | 
					
						
							|  |  |  |   }); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   it("parenthesis and symbols in fulltext section are just normal characters", () => { | 
					
						
							|  |  |  |     expect( | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     lex("what's u=p <b(r*t)h>").fulltextTokens.map((t) => t.token) | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ).toEqual(["what's", "u=p", "<b(r*t)h>"]); | 
					
						
							|  |  |  |   }); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   it("operator characters in expressions are separate tokens", () => { | 
					
						
							|  |  |  |     expect( | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     lex("# abc+=-def**-+d").expressionTokens.map((t) => t.token) | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ).toEqual(["#", "abc", "+=-", "def", "**-+", "d"]); | 
					
						
							|  |  |  |   }); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   it("escaping special characters", () => { | 
					
						
							|  |  |  |     expect(lex("hello \\#\\~\\'").fulltextTokens.map((t) => t.token)).toEqual([ | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     "hello", | 
					
						
							|  |  |  |     "#~'", | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ]); | 
					
						
							|  |  |  |   }); | 
					
						
							|  |  |  | }); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | describe("Lexer expression", () => { | 
					
						
							|  |  |  |   it("simple attribute existence", () => { | 
					
						
							|  |  |  |     expect( | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     lex("#label ~relation").expressionTokens.map((t) => t.token) | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ).toEqual(["#label", "~relation"]); | 
					
						
							|  |  |  |   }); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   it("simple label operators", () => { | 
					
						
							|  |  |  |     expect(lex("#label*=*text").expressionTokens.map((t) => t.token)).toEqual([ | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     "#label", | 
					
						
							|  |  |  |     "*=*", | 
					
						
							|  |  |  |     "text", | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ]); | 
					
						
							|  |  |  |   }); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   it("simple label operator with in quotes", () => { | 
					
						
							|  |  |  |     expect(lex("#label*=*'text'").expressionTokens).toEqual([ | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     { token: "#label", inQuotes: false, startIndex: 0, endIndex: 5 }, | 
					
						
							|  |  |  |     { token: "*=*", inQuotes: false, startIndex: 6, endIndex: 8 }, | 
					
						
							|  |  |  |     { token: "text", inQuotes: true, startIndex: 10, endIndex: 13 }, | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ]); | 
					
						
							|  |  |  |   }); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   it("simple label operator with param without quotes", () => { | 
					
						
							|  |  |  |     expect(lex("#label*=*text").expressionTokens).toEqual([ | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     { token: "#label", inQuotes: false, startIndex: 0, endIndex: 5 }, | 
					
						
							|  |  |  |     { token: "*=*", inQuotes: false, startIndex: 6, endIndex: 8 }, | 
					
						
							|  |  |  |     { token: "text", inQuotes: false, startIndex: 9, endIndex: 12 }, | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ]); | 
					
						
							|  |  |  |   }); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   it("simple label operator with empty string param", () => { | 
					
						
							|  |  |  |     expect(lex("#label = ''").expressionTokens).toEqual([ | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     { token: "#label", inQuotes: false, startIndex: 0, endIndex: 5 }, | 
					
						
							|  |  |  |     { token: "=", inQuotes: false, startIndex: 7, endIndex: 7 }, | 
					
						
							|  |  |  |     // weird case for empty strings which ends up with endIndex < startIndex :-(
 | 
					
						
							|  |  |  |     { token: "", inQuotes: true, startIndex: 10, endIndex: 9 }, | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ]); | 
					
						
							|  |  |  |   }); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   it("note. prefix also separates fulltext from expression", () => { | 
					
						
							|  |  |  |     expect( | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     lex(`hello fulltext note.labels.capital = Prague`).expressionTokens.map( | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |         (t) => t.token | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     ) | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ).toEqual(["note", ".", "labels", ".", "capital", "=", "prague"]); | 
					
						
							|  |  |  |   }); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   it("note. prefix in quotes will note start expression", () => { | 
					
						
							|  |  |  |     expect( | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     lex(`hello fulltext "note.txt"`).expressionTokens.map((t) => t.token) | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ).toEqual([]); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     expect( | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     lex(`hello fulltext "note.txt"`).fulltextTokens.map((t) => t.token) | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ).toEqual(["hello", "fulltext", "note.txt"]); | 
					
						
							|  |  |  |   }); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   it("complex expressions with and, or and parenthesis", () => { | 
					
						
							|  |  |  |     expect( | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     lex(`# (#label=text OR #second=text) AND ~relation`).expressionTokens.map( | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |         (t) => t.token | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     ) | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ).toEqual([ | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     "#", | 
					
						
							|  |  |  |     "(", | 
					
						
							|  |  |  |     "#label", | 
					
						
							|  |  |  |     "=", | 
					
						
							|  |  |  |     "text", | 
					
						
							|  |  |  |     "or", | 
					
						
							|  |  |  |     "#second", | 
					
						
							|  |  |  |     "=", | 
					
						
							|  |  |  |     "text", | 
					
						
							|  |  |  |     ")", | 
					
						
							|  |  |  |     "and", | 
					
						
							|  |  |  |     "~relation", | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ]); | 
					
						
							|  |  |  |   }); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   it("dot separated properties", () => { | 
					
						
							|  |  |  |     expect( | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     lex( | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |         `# ~author.title = 'Hugh Howey' AND note.'book title' = 'Silo'` | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     ).expressionTokens.map((t) => t.token) | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ).toEqual([ | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     "#", | 
					
						
							|  |  |  |     "~author", | 
					
						
							|  |  |  |     ".", | 
					
						
							|  |  |  |     "title", | 
					
						
							|  |  |  |     "=", | 
					
						
							|  |  |  |     "hugh howey", | 
					
						
							|  |  |  |     "and", | 
					
						
							|  |  |  |     "note", | 
					
						
							|  |  |  |     ".", | 
					
						
							|  |  |  |     "book title", | 
					
						
							|  |  |  |     "=", | 
					
						
							|  |  |  |     "silo", | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ]); | 
					
						
							|  |  |  |   }); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   it("negation of label and relation", () => { | 
					
						
							|  |  |  |     expect( | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     lex(`#!capital ~!neighbor`).expressionTokens.map((t) => t.token) | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ).toEqual(["#!capital", "~!neighbor"]); | 
					
						
							|  |  |  |   }); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   it("negation of sub-expression", () => { | 
					
						
							|  |  |  |     expect( | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     lex(`# not(#capital) and note.noteId != "root"`).expressionTokens.map( | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |         (t) => t.token | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     ) | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ).toEqual([ | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     "#", | 
					
						
							|  |  |  |     "not", | 
					
						
							|  |  |  |     "(", | 
					
						
							|  |  |  |     "#capital", | 
					
						
							|  |  |  |     ")", | 
					
						
							|  |  |  |     "and", | 
					
						
							|  |  |  |     "note", | 
					
						
							|  |  |  |     ".", | 
					
						
							|  |  |  |     "noteid", | 
					
						
							|  |  |  |     "!=", | 
					
						
							|  |  |  |     "root", | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ]); | 
					
						
							|  |  |  |   }); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   it("order by multiple labels", () => { | 
					
						
							|  |  |  |     expect(lex(`# orderby #a,#b`).expressionTokens.map((t) => t.token)).toEqual( | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     ["#", "orderby", "#a", ",", "#b"] | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ); | 
					
						
							|  |  |  |   }); | 
					
						
							|  |  |  | }); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | describe("Lexer invalid queries and edge cases", () => { | 
					
						
							|  |  |  |   it("concatenated attributes", () => { | 
					
						
							|  |  |  |     expect(lex("#label~relation").expressionTokens.map((t) => t.token)).toEqual( | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     ["#label", "~relation"] | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ); | 
					
						
							|  |  |  |   }); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   it("trailing escape \\", () => { | 
					
						
							|  |  |  |     expect(lex("abc \\").fulltextTokens.map((t) => t.token)).toEqual([ | 
					
						
							| 
									
										
										
										
											2024-12-22 15:45:54 +02:00
										 |  |  |     "abc", | 
					
						
							|  |  |  |     "\\", | 
					
						
							| 
									
										
										
										
											2024-05-08 23:59:11 +02:00
										 |  |  |     ]); | 
					
						
							|  |  |  |   }); | 
					
						
							|  |  |  | }); |