Keep only n record when inserting new records in couchbase

65 Views Asked by At

I have a huge bucket that contains all user's notification data. like this:

┌────┬─────────┬─────────────────────────┐
│ id │ user_id │          data           │
├────┼─────────┼─────────────────────────┤
│  1 │       1 │ {"somekey":"someValue"} │
│  2 │       2 │ {"somekey":"someValue"} │
│  3 │       1 │ {"somekey":"someValue"} │
│  4 │       1 │ {"somekey":"someValue"} │
│  5 │       1 │ {"somekey":"someValue"} │
│  6 │       2 │ {"somekey":"someValue"} │
│  7 │       2 │ {"somekey":"someValue"} │
│  8 │       1 │ {"somekey":"someValue"} │
│  9 │       2 │ {"somekey":"someValue"} │
│ 10 │       2 │ {"somekey":"someValue"} │
└────┴─────────┴─────────────────────────┘

So, anytime I want to insert a new record, for example for user_id=2, I want to remove earliest record for user_id=2 to have only N record for each user (of course if total number of records is less than N, there will be no remove)

2

There are 2 best solutions below

1
On

@ehsan, another alternative is to use the Eventing Service and feed your documents to an Eventing function. You could use a compound key of both the id (for the notification) and also the user_id.

For example I use keys of the form "nu:#:#". Your data or notifications would then be processed by Eventing to build up a user document like @MatthewGroves proposed.

In fact you could optionally delete your input documents when they are successfully added.

Consider your input keys and documents as follows:

┌──────────┬─────────────────────────┐
│ key      │  data                   │
├──────────┼─────────────────────────┤
│  nu:1:u1 │ {"somekey":"someValue"} │
│  nu:2:u2 │ {"somekey":"someValue"} │
│  nu:3:u1 │ {"somekey":"someValue"} │
│  nu:4:u1 │ {"somekey":"someValue"} │
│  nu:5:u1 │ {"somekey":"someValue"} │
│  nu:6:u2 │ {"somekey":"someValue"} │
│  nu:7:u2 │ {"somekey":"someValue"} │
│  nu:8:u1 │ {"somekey":"someValue"} │
│  nu:9:u2 │ {"somekey":"someValue"} │
│ nu:10:u2 │ {"somekey":"someValue"} │
└──────────┴─────────────────────────┘

Now we can make an use Eventing function with a parameter say MAX_ARRAY = 3 (adjust to what you want) to control the max number of notifications to keep on a per user basis.

Note I also added a parameter MAX_RETRY = 16 to retry the operation if there was a contention (sort of a poor man's CAS done via checking a field holding Math.random()).

I assume the notification ids always increments since JavaScript handles 2^53 -1 (or 9,007,199,254,740,991) this shouldn't be an issue.

A working Eventing Function is shown below:

/*
KEY nu:10:2 // Example input document where 10 is the notify_id 2 is the user_id
{
    "somekey": "someValue"
}

KEY user_plus_ntfys:2 // Example output doc where "id": 2 is the user_id built from above
{
   "type": "user_plus_ntfys",
   "id": 2,
   "notifications" : [
     {"nid": 7, "doc": { "somekey": "someValue"}},
     {"nid": 9, "doc": { "somekey": "someValue"}},
     {"nid": 10, "doc": { "somekey": "someValue"}}
   ]
}
*/

function OnUpdate(doc, meta) {
    const MAX_RETRY = 16;
    const MAX_ARRAY = 3;
    
    // will process ALL data like nu:#:#
    var parts = meta.id.split(':');
    if (!parts || parts.length != 3 || parts[0] != "nu") return;
    var ntfy_id = parseInt(parts[1]);
    var user_id = parseInt(parts[2]);
    //log("Doc created/updated " +  meta.id + " ntfy_id " + ntfy_id  + " user_id " + user_id);

    var insert_json = {"nid": ntfy_id, doc};
    for (var tries=0; tries < 16; tries++) {
        var user_doc = addToNtfyArray(src_bkt, user_id, insert_json, MAX_ARRAY);
        if (user_doc == null) {
            // do nothing
            return;
        }
        var random_csum = user_doc.random;
        // this is a read write alias to the functons source bucket
        src_bkt["user_plus_ntfys:" + user_id] = user_doc;
        user_doc = src_bkt["user_plus_ntfys:" + user_id];
        if (random_csum !== user_doc.random) {
            // failure need to retry
            tries++;
        } else {
            // success could even delete the input notification doc here
            return;
        }
    }
    log ("FAILED to insert id: " + meta.id, doc)
}

function addToNtfyArray(src_bkt, user_id, insert_json, max_ary) {
    var ntfy_id = insert_json.nid;
    var random_csum;
    var user_doc = src_bkt["user_plus_ntfys:" + user_id];
    if (!user_doc) {
        // generate unique random #
        random_csum = Math.random();
        user_doc = { "type": "user_plus_ntfys", "id": user_id, "notifications" : [], "random": random_csum };
        user_doc.notifications.push(insert_json);
    } else {
        if (user_doc.notifications[0].nid >= ntfy_id && user_doc.notifications.length === max_ary) {
            // do nothing this is older data, we assume that nid always increases
            return null;
        } else {
            // find insert position
            for(var i=0; i<=user_doc.notifications.length + 1 ; i++) {
                if (i < user_doc.notifications.length && user_doc.notifications[i].nid === ntfy_id) {
                    // do nothing this is duplicate data we already have it, assume no updates to notifys
                    return null;
                }  
                if (i == user_doc.notifications.length || user_doc.notifications[i].nid > ntfy_id) {
                    // add to array middle or end
                    user_doc.notifications.splice(i, 0, insert_json);
                    random_csum = Math.random();
                    // update unique random #
                    user_doc.random = random_csum;
                    break;
                }
            }
        }
        while (user_doc.notifications.length > max_ary) {
            // ensure proper size
            user_doc.notifications.shift();
        }
    }
    return user_doc;
}
1
On

There might be a better data modeling approach. Does all of this data need to be in separate documents? If "N" is a relatively small number, you could fit all of these into an array within a single document. Like:

{
   "type": "user",
   "name": "ehsan",
   "notifications" : [
     {"somekey":"someValue"},
     {"somekey":"someValue"},
     {"somekey":"someValue"} 
   ]
}

Then the process would be:

  1. Get the document
  2. Add a record to the notifications array
  3. Determine if you need to remove an old record (and then remove it)
  4. Save the updated document.

This approach has the benefits of simplicity and of not needing to update multiple pieces of data. The way you've modeled it could work, but you would need ACID transactions (which aren't available in Couchbase's Node SDK yet) or maybe an Eventing function to check to make sure there aren't too many notification documents for each user whenever a new notification is created.