Let's say we want to implement a simple list filtering language so
we can enter a.b = 12
and return only results in a
list where the a
column is an object that contains a
field b
that is set to the value 12. What would a
filter(jsonPath, equals, listOfObjects)
function look
like?
If we only needed to support object lookup, we might
implement filter
by splitting the jsonPath
on periods and look at each object in the listOfObjects
for matching values. It might look something like this:
function filter(jsonPath, equals, listOfObjects) {
const parts = jsonPath.split('.');
function filterSingle(object) {
let objectAtPath = object;
let i = 0;
for (let part = parts[i]; part && objectAtPath; part = parts[++i]) {
objectAtPath = objectAtPath[part];
}
return i === parts.length && objectAtPath === equals;
}
return listOfObjects.filter(filterSingle);
}
require('assert').deepEqual(
filter('foo.bar', 12, [{ foo: { bar: 12 } }, { foo: null }]),
[{ foo: { bar: 12 } }],
);
That doesn't work too badly. We haven't handled edge cases like a
jsonPath
of foo..bar
or
bar.
. But those would not be difficult to handle:
function filter(jsonPath, equals, listOfObjects) {
if (jsonPath.charAt(0) === '.') {
throw new Error('JSON path cannot begin with a dot, in: ' + jsonPath);
} else if (jsonPath.charAt(jsonPath.length - 1) === '.') {
throw new Error('JSON path cannot end with a dot, in: ' + jsonPath);
}
const parts = jsonPath.split('.');
if (parts.reduce((hasEmptyPart, part) => hasEmptyPart || part.length === 0, false)) {
throw new Error('JSON path cannot contain an empty section, in: ' + jsonPath);
}
function filterSingle(object) {
let objectAtPath = object;
let i = 0;
for (let part = parts[i]; part && objectAtPath; part = parts[++i]) {
objectAtPath = objectAtPath[part];
}
return i === parts.length && objectAtPath === equals;
}
return listOfObjects.filter(filterSingle);
}
And we now handle the most obvious invalid path cases.
Arrays?
If we want to support array path syntax, things get harder. For example:
require('assert').deepEqual(
filter('foo.bar[0].biz', 14, [{ foo: { bar: [ { biz: 14 }, { biz: 19 } ] } }, { foo: { bar: null } }]),
[{ foo: { bar: [ { biz: 14 }, { biz: 19 } ] } }],
);
We could try to stick with the hammer that is
String.prototype.split
and write some really messy
code. :) Or we could switch to an approach that gives us more
control. Let's do that.
We'll build a very simple lexer that will iterate over each character
accumulating characters into individual tokens that represent the
pieces of the path. Let's start by supporting the original
jsonPath
syntax and error-handling.
function getJsonPathParts(path) {
const parts = [];
let currentToken = '';
for (let i = 0; i < path.length; i++) {
const c = path[i];
switch (c) {
case '.': {
if (!currentToken) {
throw new Error('JSON path cannot contain empty section, in: ' + path);
}
parts.push(currentToken);
currentToken = '';
break;
}
default: {
currentToken += c;
break;
}
}
}
if (!currentToken) {
throw new Error('JSON path cannot end with dot, in: ' + path);
}
parts.push(currentToken);
return parts;
}
function filter(jsonPath, equals, listOfObjects) {
const parts = getJsonPathParts(jsonPath);
function filterSingle(object) {
let objectAtPath = object;
let i = 0;
for (let part = parts[i]; part && objectAtPath; part = parts[++i]) {
objectAtPath = objectAtPath[part];
}
return i === parts.length && objectAtPath === equals;
}
return listOfObjects.filter(filterSingle);
}
Not too bad!
Arrays?
Right. Let's build on getJsonPathParts
to support array
syntax. Along with that we're going to impose some restrictions. The
object path parts must be only alphanumeric characters plus dashes and
underscores. The array index must only be numeric characters. Anything
else should throw an error.
function getJsonPathParts(path) {
const parts = [];
let currentToken = '';
let inArray = false;
for (let i = 0; i < path.length; i++) {
const c = path[i];
switch (c) {
case '.': {
if (currentToken === '') {
throw new Error('JSON path cannot contain empty section, in: ' + path);
}
parts.push(currentToken);
currentToken = '';
break;
}
case '[': {
if (inArray) {
throw new Error('JSON path contains unexpected left bracket, in: ' + path);
}
if (currentToken === '') {
throw new Error('JSON path cannot contain empty section, in: ' + path);
}
parts.push(currentToken);
currentToken = '';
inArray = true;
break;
}
case ']': {
if (!inArray) {
throw new Error('JSON path contains unexpected right bracket, in: ' + path);
}
if (currentToken === '') {
throw new Error('JSON path array index must not be empty, in: ' + path);
}
// Array indices are recorded as numbers, not strings.
currentToken = parseInt(currentToken, 10);
inArray = false;
break;
}
default: {
const code = c.charCodeAt(0);
if (inArray) {
if (code >= '0'.charCodeAt(0) && code <= '9'.charCodeAt(0)) {
currentToken += c;
continue;
}
throw new Error('JSON path array index must be numeric, in: ' + path);
}
if ((code >= 'A'.charCodeAt(0) && code <= 'z'.charCodeAt(0)) ||
(code >= '0'.charCodeAt(0) && code <= '9'.charCodeAt(0)) ||
['-', '_'].includes(c)) {
currentToken += c;
continue;
}
throw new Error('JSON path part must contain only alphanumeric characters, in: ' + path);
}
}
}
if (currentToken === '') {
throw new Error('JSON path cannot end with dot, in: ' + path);
}
parts.push(currentToken);
return parts;
}
require('assert').deepEqual(getJsonPathParts('foo.bar[0].biz'), ['foo', 'bar', 0, 'biz']);
Now we've got a simple JSON path parser with decent error handling! Of course we wouldn't want to use this little library in production until we had some serious test coverage. But writing tests and calling out my mistakes will be left here as an exercise for the reader. :)
New (short) post on parsing JSON paths in JavaScript https://t.co/mIjOMugA7C
— Phil Eaton (@phil_eaton) March 28, 2019