Compressing a list of JSON patches

72 Views Asked by At

What type of data structure can help compress JSON patches?

We can compress JSON patches by removing redundant patches. Redundant patches are patches which are fully replaced later, either by a patch with the same path, or with a patch with a shorter prefix.

This is like Last Writer Wins with Highest Ancestors.

As an example, take the following list

[
    {
        "op": "replace",
        "path": "/layouts/13da6470-dbec-57e7-b412-0d75110b145d/x",
        "value": 792
    },
    {
        "op": "replace",
        "path": "/layouts/13da6470-dbec-57e7-b412-0d75110b145d/y",
        "value": 264
    },
    {
        "op": "replace",
        "path": "/layouts/13da6470-dbec-57e7-b412-0d75110b145d/x",
        "value": 784
    },
    {
        "op": "replace",
        "path": "/layouts/13da6470-dbec-57e7-b412-0d75110b145d/y",
        "value": 296
    },
    {
        "op": "replace",
        "path": "/layouts/13da6470-dbec-57e7-b412-0d75110b145d/x",
        "value": 760
    },
    {
        "op": "remove",
        "path": "/layouts/13da6470-dbec-57e7-b412-0d75110b145d"
    },
]

This can be compressed to the following since "/layouts/13da6470-dbec-57e7-b412-0d75110b145d" is a prefix key for patches before it.

[
    {
        "op": "remove",
        "path": "/layouts/13da6470-dbec-57e7-b412-0d75110b145d"
    },
]

On the other hand, if we had a different list,

[
    {
        "op": "add",
        "path": "/layouts/13da6470-dbec-57e7-b412-0d75110b145d",
        "value": { x: 0, y: 0 } 
    },
        {
        "op": "replace",
        "path": "/layouts/13da6470-dbec-57e7-b412-0d75110b145d/x",
        "value": 792
    },
    {
        "op": "replace",
        "path": "/layouts/13da6470-dbec-57e7-b412-0d75110b145d/y",
        "value": 264
    },
    {
        "op": "replace",
        "path": "/layouts/13da6470-dbec-57e7-b412-0d75110b145d/x",
        "value": 784
    },
    {
        "op": "replace",
        "path": "/layouts/13da6470-dbec-57e7-b412-0d75110b145d/y",
        "value": 296
    },
    {
        "op": "replace",
        "path": "/layouts/13da6470-dbec-57e7-b412-0d75110b145d/x",
        "value": 760
    },
]

it could be compressed to

[
    {
        "op": "add",
        "path": "/layouts/13da6470-dbec-57e7-b412-0d75110b145d",
        "value": { x: 760, y: 296 } 
    },
]

I've thought of using a trie where each path segment is treated as a character, but in the same thought, could I use some type of radix sort by path segment + patch index too?

1

There are 1 best solutions below

0
Sentient On

Here's an implementation of a Radix Trie; not sure if it's the fastest way.

/* eslint-disable max-classes-per-file */
import { IJsonPatch, splitJsonPath } from "mobx-state-tree";
import { applyPatches } from "immer";

class TrieNode {
  children: Map<string, TrieNode>;

  label: string[];

  patch: IJsonPatch | undefined;

  // eslint-disable-next-line default-param-last
  constructor(label = [""], patch?: IJsonPatch) {
    this.children = new Map();
    this.label = label;
    this.patch = patch;
  }
}

/**
 * Finds index at which arrays differ. Returns -1 if they are equal or one contains the other.
 */
function splitIndex(partsA: string[], partsB: string[]) {
  const maxLength = Math.min(partsA.length, partsB.length);
  for (let i = 0; i < maxLength; i++) {
    if (partsA[i] !== partsB[i]) {
      return i;
    }
  }
  return -1;
}

export class PatchRadixTrie {
  root: TrieNode;

  constructor(patches: IJsonPatch[] = []) {
    this.root = new TrieNode([""]);
    patches.forEach(patch => this.insert(patch));
  }

  insert(patch: IJsonPatch) {
    let node = this.root;
    const parts = splitJsonPath(patch.path);
    let depth = 0;
    while (depth < parts.length) {
      // Get path that is currently being processed, and unprocessed path parts.
      const path = parts[depth];
      const pathSuffix = parts.slice(depth);
      if (!node.children.has(path)) {
        node.children.set(path, new TrieNode(pathSuffix, patch));
        return;
      }
      const parent = node;
      node = node.children.get(path)!;
      // Check if existing path contains incoming path or vice versa.
      let index = splitIndex(pathSuffix, node.label);
      if (index === -1) {
        // Incoming patch should replace existing node.
        if (pathSuffix.length === node.label.length) {
          node.patch = patch;
          node.children = new Map();
          return;
        }
        // Incoming patch should be parent of existing node.
        if (pathSuffix.length < node.label.length) {
          const prefix = pathSuffix;
          const prefixNode = new TrieNode(prefix, patch);
          const suffix = node.label.slice(prefix.length);
          node.label = suffix;
          prefixNode.children.set(suffix[0], node);
          parent.children.set(path, prefixNode);
          return;
        }
        // Incoming patch should be child of existing node.
        if (pathSuffix.length > node.label.length) {
          index = node.label.length;
        }
      } else {
        // Incoming patch splits existing node into two.
        const prefix = node.label.slice(0, index);
        const suffix = node.label.slice(index);
        node.label = suffix;
        const prefixNode = new TrieNode(prefix);
        prefixNode.children.set(suffix[0], node);
        parent.children.set(path, prefixNode);
        // Insert incoming patch as child of prefix node in next iteration.
        node = prefixNode;
      }
      depth += index;
    }
  }

  getPatches() {
    return this.traverseTrie(this.root);
  }

  traverseTrie(node: TrieNode) {
    const { patch, children } = node;
    const childPatches: IJsonPatch[] = Array.from(children.values()).flatMap(this.traverseTrie.bind(this));
    if (patch) {
      if (children.size) {
        if (patch.op === "remove") throw RangeError("Patch with op remove cannot have children patches.");
        return [
          {
            ...patch,
            value: applyPatches(
              patch.value,
              childPatches.map(patch => ({ ...patch, path: splitJsonPath(patch.path) }))
            ),
          },
        ];
      }
      return [patch];
    }
    return childPatches;
  }
}