web: Fix Vendored Lex package. Add Unit Tests (#22083)

* Fix API reference paths.

* Clean up vendored code.

* Flesh out test.

* Fix edgecase.

* Clean up return value.

* Fix linter.
This commit is contained in:
Teffen Ellis
2026-05-06 16:31:17 +02:00
committed by GitHub
parent 16de9d1b44
commit 1db6c3af8b
9 changed files with 514 additions and 122 deletions
+152 -115
View File
@@ -7,152 +7,186 @@
*/
/**
* @typedef {(this: Lexer, chr: string) => any} DefunctFunction
* A token produced by a {@link LexerAction}. The lexer is agnostic to the
* concrete token shape; consumers pick whatever representation suits them.
*
* @typedef {unknown} Token
*/
/**
* @typedef {(this: Lexer, ...args: RegExpExecArray) => string | string[] | undefined} RuleAction
* A rule action. Invoked with the regex match (full match followed by capture
* groups) bound to the owning {@link Lexer} so it can read or set `state`,
* `index`, and `reject`.
*
* Return values:
* - `null` (or `undefined` from an implicit return) — discard the match and continue scanning.
* - a single token — yield it from {@link Lexer.lex}.
* - an array of tokens — yield the first; queue the rest for subsequent calls.
*
* @callback LexerAction
* @this {Lexer}
* @param {...string} match
* @returns {Token | Token[] | null | void}
*/
/**
* @typedef {Object} Rule
* @property {RegExp} pattern
* @property {boolean} global
* @property {RuleAction} action
* @property {number[]} start
* @typedef {object} LexerRule
* @property {RegExp} pattern Sticky-compiled pattern used to probe the input.
* @property {boolean} global Whether the user-supplied pattern was global.
* @property {LexerAction} action
* @property {number[]} start States in which the rule is active. `[0]` is the default state; an empty array means "any state".
*/
/**
* @typedef {Object} Match
* @typedef {object} LexerMatch
* @property {RegExpExecArray} result
* @property {RuleAction} action
* @property {LexerAction} action
* @property {number} length
* @property {boolean} global Whether the producing rule was declared with the `g` flag.
*/
/**
* Handler invoked when no rule matches at the current position.
*
* @callback DefunctHandler
* @this {Lexer}
* @param {string} chr The unexpected character.
* @returns {Token | Token[] | null | void}
*/
/**
* @type {DefunctHandler}
*/
function defaultDefunct(chr) {
throw new Error(`Unexpected character at index ${this.index - 1}: ${chr}`);
}
/**
* Lexer class for tokenizing input strings.
*/
export class Lexer {
/**
* @type {string[]}
*/
tokens = [];
/**
* @type {Rule[]}
*/
rules = [];
/**
* @type {number}
*/
remove = 0;
/**
* Current lexer state. Rules whose `start` array contains this value (or
* is empty) are eligible to match. Odd-numbered states are also matched
* by rules declared with `start: [0]`, mirroring flex's inclusive states.
*
* @type {number}
*/
state = 0;
/**
* @type {number}
*/
/** @type {number} */
index = 0;
/**
* @type {string}
*/
/** @type {string} */
input = "";
/**
* @param {DefunctFunction} [defunct]
* When set to `true` from inside an action, the current match is rolled
* back and the next-best match is tried instead.
*
* @type {boolean}
*/
reject = false;
/** @type {LexerRule[]} */
#rules = [];
/** @type {Token[]} */
#tokens = [];
/** @type {number} */
#remove = 0;
/** @type {DefunctHandler} */
#defunct;
/**
* @param {DefunctHandler} [defunct] Optional handler for unexpected characters.
*/
constructor(defunct) {
defunct ||= function (chr) {
throw new Error("Unexpected character at index " + (this.index - 1) + ": " + chr);
};
this.defunct = defunct;
this.#defunct = typeof defunct === "function" ? defunct : defaultDefunct;
}
/**
* Add a lexing rule.
* Register a tokenization rule.
*
* @param {RegExp} pattern
* @param {RuleAction} action
* @param {number[]} [start]
* @returns {Lexer}
* @param {LexerAction} action
* @param {number[]} [start] States in which the rule is active. Defaults to `[0]`.
* @returns {this}
*/
addRule = (pattern, action, start) => {
addRule(pattern, action, start) {
const global = pattern.global;
if (!global || !pattern.sticky) {
let flags = "gy";
if (pattern.multiline) flags += "m";
if (pattern.ignoreCase) flags += "i";
if (pattern.unicode) flags += "u";
pattern = new RegExp(pattern.source, flags);
}
if (!Array.isArray(start)) start = [0];
this.rules.push({
pattern: pattern,
global: global,
action: action,
start: start,
this.#rules.push({
pattern,
global,
action,
start: Array.isArray(start) ? start : [0],
});
return this;
};
}
/**
* Set the input string for lexing.
* Reset the lexer and load a new input string.
*
* @param {string} input
* @returns {Lexer}
* @returns {this}
*/
setInput = (input) => {
this.remove = 0;
setInput(input) {
this.#remove = 0;
this.state = 0;
this.index = 0;
this.tokens.length = 0;
this.#tokens.length = 0;
this.input = input;
return this;
};
}
/**
* Lex the next token from the input.
* Produce the next token from the input, or `null` once exhausted.
*
* @returns {string | string[] | undefined}
* @returns {Token | null}
*/
lex = () => {
if (this.tokens.length) return this.tokens.shift();
lex() {
if (this.#tokens.length) return /** @type {Token} */ (this.#tokens.shift());
this.reject = true;
while (this.index <= this.input.length) {
const matches = this.scan().splice(this.remove);
const matches = this.#scan().splice(this.#remove);
const index = this.index;
while (matches.length) {
if (!this.reject) {
break;
}
const match = matches.shift();
if (!this.reject) break;
if (!match) break;
const result = match.result;
const length = match.length;
const match = /** @type {LexerMatch} */ (matches.shift());
const { result, length } = match;
this.index += length;
this.reject = false;
this.remove++;
this.#remove++;
let token = match.action.apply(this, result);
let token = match.action.apply(
this,
/** @type {string[]} */ (/** @type {unknown} */ (result)),
);
if (this.reject) {
this.index = result.index;
} else if (Array.isArray(token)) {
this.tokens = token.slice(1);
token = token[0];
} else {
if (length) this.remove = 0;
} else if (token !== null && token !== undefined) {
if (Array.isArray(token)) {
this.#tokens = token.slice(1);
token = token[0];
}
if (length) this.#remove = 0;
return token;
}
}
@@ -161,79 +195,82 @@ export class Lexer {
if (index < input.length) {
if (this.reject) {
this.remove = 0;
const token = this.defunct(input.charAt(this.index++));
if (typeof token !== "undefined") {
this.#remove = 0;
const token = this.#defunct(input.charAt(this.index++));
if (token !== null && token !== undefined) {
if (Array.isArray(token)) {
this.tokens = token.slice(1);
this.#tokens = token.slice(1);
return token[0];
}
return token;
}
} else {
if (this.index !== index) this.remove = 0;
if (this.index !== index) this.#remove = 0;
this.reject = true;
}
} else if (matches.length) this.reject = true;
else break;
} else if (matches.length) {
this.reject = true;
} else {
break;
}
}
};
return null;
}
/**
* Scan the input for matches.
* Probe every state-eligible rule at the current position, returning the
* matches sorted by length (longest first), with global rules pinned
* after non-global ones to preserve flex's "longest non-global wins"
* tie-breaking.
*
* @returns {Match[]}
* @returns {LexerMatch[]}
*/
scan = () => {
/**
* @type {Match[]}
*/
#scan() {
/** @type {LexerMatch[]} */
const matches = [];
let index = 0;
const state = this.state;
const lastIndex = this.index;
const input = this.input;
for (let i = 0, length = this.rules.length; i < length; i++) {
const rule = this.rules[i];
for (const rule of this.#rules) {
const start = rule.start;
const states = start.length;
const eligible =
!states || start.indexOf(state) >= 0 || (state % 2 && states === 1 && !start[0]);
if (!states || start.indexOf(state) >= 0 || (state % 2 && states === 1 && !start[0])) {
const pattern = rule.pattern;
pattern.lastIndex = lastIndex;
const result = pattern.exec(input);
if (!eligible) continue;
if (!result || result.index !== lastIndex) {
continue;
}
const pattern = rule.pattern;
pattern.lastIndex = lastIndex;
const result = pattern.exec(input);
let j = matches.push({
result: result,
action: rule.action,
length: result[0].length,
});
if (!result || result.index !== lastIndex) continue;
if (rule.global) {
index = j;
}
let j = matches.push({
result,
action: rule.action,
length: result[0].length,
global: rule.global,
});
while (--j > index) {
const k = j - 1;
while (--j > 0) {
const k = j - 1;
const cur = matches[j];
const prev = matches[k];
const longer = cur.length > prev.length;
const tieFavorsCur = cur.length === prev.length && prev.global && !cur.global;
if (matches[j].length > matches[k].length) {
const temple = matches[j];
matches[j] = matches[k];
matches[k] = temple;
}
}
if (!longer && !tieFavorsCur) break;
matches[j] = prev;
matches[k] = cur;
}
}
return matches;
};
}
}
export default Lexer;
+1 -1
View File
@@ -23,6 +23,7 @@ import { certificateProvider, certificateSelector } from "#admin/brands/Certific
import {
Application,
AuthenticationEnum,
Brand,
CoreApi,
CoreApplicationsListRequest,
@@ -31,7 +32,6 @@ import {
FlowsApi,
UsageEnum,
} from "@goauthentik/api";
import { AuthenticationEnum } from "@goauthentik/api/dist/models/AuthenticationEnum.js";
import YAML from "yaml";
+1 -1
View File
@@ -17,6 +17,7 @@ import { DesignationToLabel, LayoutToLabel } from "#admin/flows/utils";
import { policyEngineModes } from "#admin/policies/PolicyEngineModes";
import {
AuthenticationEnum,
DeniedActionEnum,
Flow,
FlowDesignationEnum,
@@ -24,7 +25,6 @@ import {
FlowsApi,
UsageEnum,
} from "@goauthentik/api";
import { AuthenticationEnum } from "@goauthentik/api/dist/models/AuthenticationEnum.js";
import { msg } from "@lit/localize";
import { html, TemplateResult } from "lit";
+1 -1
View File
@@ -1,6 +1,6 @@
import { ModelForm } from "#elements/forms/ModelForm";
import type { Stage } from "@goauthentik/api/dist/models/Stage";
import type { Stage } from "@goauthentik/api";
import { msg } from "@lit/localize";
@@ -19,10 +19,10 @@ import {
CoreGroupsListRequest,
Group,
StagesApi,
UserCreationModeEnum,
UserTypeEnum,
UserWriteStage,
} from "@goauthentik/api";
import { UserCreationModeEnum } from "@goauthentik/api/dist/models/UserCreationModeEnum.js";
import { msg } from "@lit/localize";
import { html, TemplateResult } from "lit";
+317
View File
@@ -0,0 +1,317 @@
/* eslint-disable func-names */
import { Lexer } from "lex";
import { describe, expect, it, vi } from "vitest";
const drain = (lexer: Lexer): unknown[] => {
const out: unknown[] = [];
let token: unknown;
while ((token = lexer.lex()) !== null) {
out.push(token);
}
return out;
};
describe("Lexer", () => {
describe("addRule", () => {
it("returns the lexer for chaining", () => {
const lexer = new Lexer();
expect(lexer.addRule(/a/, () => "a")).toBe(lexer);
});
it("preserves multiline, ignoreCase, and unicode flags when re-compiling", () => {
const lexer = new Lexer(() => null);
const seen: string[] = [];
lexer.addRule(/^a/im, (m) => {
seen.push(m);
});
lexer.setInput("A\nA");
drain(lexer);
expect(seen).toEqual(["A", "A"]);
});
it("matches unicode patterns", () => {
const lexer = new Lexer();
lexer.addRule(/\p{Letter}+/u, (m) => m);
lexer.setInput("café");
expect(lexer.lex()).toBe("café");
});
});
describe("setInput", () => {
it("resets state, index, and pending tokens", () => {
const lexer = new Lexer();
lexer.addRule(/./, (c) => c);
lexer.setInput("ab");
expect(lexer.lex()).toBe("a");
lexer.state = 7;
lexer.setInput("xy");
expect(lexer.state).toBe(0);
expect(lexer.index).toBe(0);
expect(lexer.lex()).toBe("x");
expect(lexer.lex()).toBe("y");
});
it("returns the lexer for chaining", () => {
const lexer = new Lexer();
expect(lexer.setInput("")).toBe(lexer);
});
});
describe("tokenization", () => {
it("tokenizes a simple expression", () => {
const lexer = new Lexer();
lexer
.addRule(/\s+/, () => null)
.addRule(/[a-zA-Z]+/, (m) => ({ type: "ident", value: m }))
.addRule(/\d+/, (m) => ({ type: "num", value: Number(m) }))
.addRule(/[+\-*/]/, (m) => ({ type: "op", value: m }));
lexer.setInput("foo + 12 * bar");
expect(drain(lexer)).toEqual([
{ type: "ident", value: "foo" },
{ type: "op", value: "+" },
{ type: "num", value: 12 },
{ type: "op", value: "*" },
{ type: "ident", value: "bar" },
]);
});
it("skips matches whose action returns null", () => {
const lexer = new Lexer();
lexer.addRule(/\s+/, () => null).addRule(/\S+/, (m) => m);
lexer.setInput(" foo bar ");
expect(drain(lexer)).toEqual(["foo", "bar"]);
});
it("returns null once the input is exhausted", () => {
const lexer = new Lexer();
lexer.addRule(/./, (c) => c);
lexer.setInput("a");
expect(lexer.lex()).toBe("a");
expect(lexer.lex()).toBeNull();
expect(lexer.lex()).toBeNull();
});
it("passes capture groups to the action", () => {
const lexer = new Lexer();
const calls: string[][] = [];
lexer.addRule(/(\w+)=(\w+)/, (...args) => {
calls.push(args);
return args[0];
});
lexer.setInput("foo=bar");
lexer.lex();
expect(calls).toEqual([["foo=bar", "foo", "bar"]]);
});
it("binds `this` to the lexer inside the action", () => {
const lexer = new Lexer();
let captured: Lexer | undefined;
lexer.addRule(/a/, function () {
// eslint-disable-next-line consistent-this, @typescript-eslint/no-this-alias
captured = this;
return "a";
});
lexer.setInput("a");
lexer.lex();
expect(captured).toBe(lexer);
});
});
describe("longest-match tie-breaking", () => {
it("prefers the longer non-global match", () => {
const lexer = new Lexer();
lexer.addRule(/if/, () => "KW_IF").addRule(/iffy/, () => "IDENT_IFFY");
lexer.setInput("iffy");
expect(lexer.lex()).toBe("IDENT_IFFY");
});
it("treats global rules as fallbacks behind non-global rules of the same length", () => {
const lexer = new Lexer();
lexer.addRule(/[a-z]+/g, (m) => `g:${m}`).addRule(/foo/, (m) => `s:${m}`);
lexer.setInput("foo");
expect(lexer.lex()).toBe("s:foo");
});
});
describe("multi-token return", () => {
it("yields the first token immediately and queues the rest", () => {
const lexer = new Lexer();
lexer.addRule(/a/, () => ["A1", "A2", "A3"]);
lexer.setInput("a");
expect(lexer.lex()).toBe("A1");
expect(lexer.lex()).toBe("A2");
expect(lexer.lex()).toBe("A3");
expect(lexer.lex()).toBeNull();
});
it("drains the queue before scanning further input", () => {
const lexer = new Lexer();
lexer.addRule(/a/, () => ["A1", "A2"]).addRule(/b/, () => "B");
lexer.setInput("ab");
expect(drain(lexer)).toEqual(["A1", "A2", "B"]);
});
});
describe("reject", () => {
it("falls through to the next-best match when an action sets reject", () => {
const lexer = new Lexer();
const order: string[] = [];
lexer
.addRule(/foo/, function () {
order.push("first");
this.reject = true;
})
.addRule(/foo/, () => {
order.push("second");
return "FOO";
});
lexer.setInput("foo");
expect(lexer.lex()).toBe("FOO");
expect(order).toEqual(["first", "second"]);
});
it("rolls back the lexer index when an action rejects", () => {
const lexer = new Lexer();
lexer
.addRule(/abc/, function () {
this.reject = true;
})
.addRule(/a/, (m) => m);
lexer.setInput("abc");
expect(lexer.lex()).toBe("a");
expect(lexer.index).toBe(1);
});
});
describe("defunct handling", () => {
it("throws by default on unexpected characters", () => {
const lexer = new Lexer();
lexer.addRule(/a/, (m) => m);
lexer.setInput("a@");
expect(lexer.lex()).toBe("a");
expect(() => lexer.lex()).toThrow(/Unexpected character at index 1: @/);
});
it("invokes a custom defunct handler with the offending character", () => {
const defunct = vi.fn((chr: string) => `?${chr}`);
const lexer = new Lexer(defunct);
lexer.addRule(/a/, (m) => m);
lexer.setInput("a@b");
expect(drain(lexer)).toEqual(["a", "?@", "?b"]);
expect(defunct).toHaveBeenCalledTimes(2);
expect(defunct.mock.calls[0]?.[0]).toBe("@");
});
it("ignores defunct return values that are null", () => {
const lexer = new Lexer((_chr) => null);
lexer.addRule(/a/, (m) => m);
lexer.setInput("@@a");
expect(lexer.lex()).toBe("a");
expect(lexer.lex()).toBeNull();
});
it("supports array returns from the defunct handler", () => {
const lexer = new Lexer((chr) => [`bad:${chr}`, "extra"]);
lexer.addRule(/a/, (m) => m);
lexer.setInput("@");
expect(lexer.lex()).toBe("bad:@");
expect(lexer.lex()).toBe("extra");
});
it("falls back to the default handler when given a non-function", () => {
// @ts-expect-error — exercising the runtime guard
const lexer = new Lexer("not a function");
lexer.setInput("@");
expect(() => lexer.lex()).toThrow(/Unexpected character/);
});
});
describe("states", () => {
it("only fires rules whose start array includes the current state", () => {
const lexer = new Lexer();
lexer
.addRule(/"/, function () {
this.state = 2;
})
.addRule(
/"/,
function () {
this.state = 0;
},
[2],
)
.addRule(/[^"]+/, (m) => `STR:${m}`, [2])
.addRule(/[a-z]+/, (m) => `ID:${m}`);
lexer.setInput('foo"hello"bar');
expect(drain(lexer)).toEqual(["ID:foo", "STR:hello", "ID:bar"]);
});
it("treats an empty start array as 'active in any state'", () => {
const lexer = new Lexer();
lexer
.addRule(/!/, function () {
this.state = 5;
return "BANG";
})
.addRule(/./, (m) => m, []);
lexer.setInput("a!b");
expect(drain(lexer)).toEqual(["a", "BANG", "b"]);
});
it("matches inclusive `[0]` rules from odd-numbered states", () => {
const lexer = new Lexer();
lexer
.addRule(/#/, function () {
this.state = 1;
})
.addRule(/[a-z]+/, (m) => m);
lexer.setInput("ab#cd");
expect(drain(lexer)).toEqual(["ab", "cd"]);
});
it("does not match `[0]` rules from even non-zero states", () => {
const lexer = new Lexer();
lexer
.addRule(/#/, function () {
this.state = 2;
})
.addRule(/[a-z]+/, (m) => m);
lexer.setInput("ab#cd");
expect(lexer.lex()).toBe("ab");
expect(() => lexer.lex()).toThrow(/Unexpected character/);
});
});
});
+35
View File
@@ -0,0 +1,35 @@
// @file TSConfig used by the web package during development.
{
"extends": "@goauthentik/tsconfig",
"compilerOptions": {
"types": ["node"],
"checkJs": true,
"allowJs": true,
"composite": true,
"resolveJsonModule": true,
"allowSyntheticDefaultImports": true,
"emitDeclarationOnly": true,
"target": "esnext",
"module": "preserve",
"moduleResolution": "bundler",
"lib": ["DOM", "DOM.Iterable", "ESNext"],
"noUncheckedIndexedAccess": true
},
"include": ["./**/*", "../**/*"],
"exclude": [
// ---
"**/out/**/*",
"**/dist/**/*",
"storybook-static",
// TODO: @lit/localize-tools v0.8.0 has a nullish coalescing typing error.
// Remove when we upgrade past that.
"scripts/pseudolocalize.mjs",
"scripts/build-locales.mjs"
],
"references": [
{
"path": "../.."
}
]
}
+6 -3
View File
@@ -41,9 +41,12 @@ export default defineConfig({
projects: [
{
test: {
include: ["./unit/**/*.{test,spec}.ts", "**/*.unit.{test,spec}.ts"],
name: "unit",
include: ["./test/unit/**/*.{test,spec}.ts", "**/*.unit.{test,spec}.ts"],
name: "Unit Tests",
environment: "node",
typecheck: {
tsconfig: "./tsconfig.unit.json",
},
},
},
{
@@ -51,7 +54,7 @@ export default defineConfig({
setupFiles: ["./test/lit/setup.js"],
include: ["./browser/**/*.{test,spec}.ts", "**/*.browser.{test,spec}.ts"],
name: "browser",
name: "Browser Tests",
browser: {
enabled: true,
provider: playwright(),