Communication client-server with OCaml marshalled data

495 Views Asked by At

I want to do a client-side js_of_ocaml application with a server in OCaml, with contraints described below, and I would like to know if the approach below is right or if there is a more efficient one. The server can sometimes send large quantities of data (> 30MB).

In order to make the communication between client and server safer and more efficient, I am sharing a type t in a .mli file like this :

type client_to_server =
| Say_Hello
| Do_something_with of int

type server_to_client =
| Ack
| Print of string * int

Then, this type is marshalled into a string and sent on the network. I am aware that on the client side, some types are missing (Int64.t).

Also, in a XMLHTTPRequest sent by the client, we want to receive more than one marshalled object from the server, and sometimes in a streaming mode (ie: process the marshal object received (if possible) during the loading state of the request, and not only during the done state).

These constraints force us to use the field responseText of the XMLHTTPRequest with the content-type application/octet-stream.

Moreover, when we get back the response from responseText, an encoding conversion is made because JavaScript's string are in UTF-16. But the marshalled object being binary data, we do what is necessary in order to retrieve our binary data (by overriding the charset with x-user-defined and by applying a mask on each character of the responseText string).

The server (HTTP server in OCaml) is doing something simple like this:

let process_request req =
    let res = process_response req in
    let s = Marshal.to_string res [] in
    send s

However, on the client side, the actual JavaScript primitive of js_of_ocaml for caml_marshal_data_size needs an MlString. But in streaming mode, we don't want to convert the javascript's string in a MlString (which can iter on the full string), we prefer to do the size verification and unmarshalling (and the application of the mask for the encoding problem) only on the bytes read. Therefore, I have writen my own marshal primitives in javascript.

The client code for processing requests and responses is:

external marshal_total_size : Js.js_string Js.t -> int -> int = "my_marshal_total_size"
external marshal_from_string : Js.js_string Js.t -> int -> 'a = "my_marshal_from_string"

let apply (f:server_to_client -> unit) (str:Js.js_string Js.t) (ofs:int) : int =
  let len = str##length in
  let rec aux pos =
    let tsize = 
      try Some (pos + My_primitives.marshal_total_size str pos)
      with Failure _ -> None
    in
    match tsize with
    | Some tsize when tsize <= len ->
      let data = My_primitives.marshal_from_string str pos in
      f data;
      aux tsize
    | _ -> pos
  in
  aux ofs

let reqcallback f req ofs =
  match req##readyState, req##status with
  | XmlHttpRequest.DONE, 200 ->
      ofs := apply f req##responseText !ofs

  | XmlHttpRequest.LOADING, 200 ->
      ignore (apply f req##responseText !ofs)

  | _, 200 -> ()

  | _, i -> process_error i

let send (f:server_to_client -> unit) (order:client_to_server) =
  let order = Marshal.to_string order [] in
  let msg = Js.string (my_encode order) in (* Do some stuff *)
  let req = XmlHttpRequest.create () in
  req##_open(Js.string "POST", Js.string "/kernel", Js._true);
  req##setRequestHeader(Js.string "Content-Type",
            Js.string "application/octet-stream");
  req##onreadystatechange <- Js.wrap_callback (reqcallback f req (ref 0));
  req##overrideMimeType(Js.string "application/octet-stream; charset=x-user-defined");
  req##send(Js.some msg)

And the primitives are:

//Provides: my_marshal_header_size
var my_marshal_header_size = 20;

//Provides: my_int_of_char
function my_int_of_char(s, i) {
    return (s.charCodeAt(i) & 0xFF); // utf-16 char to 8 binary bit
}

//Provides: my_marshal_input_value_from_string 
//Requires: my_int_of_char, caml_int64_float_of_bits, MlStringFromArray
//Requires: caml_int64_of_bytes, caml_marshal_constants, caml_failwith
var my_marshal_input_value_from_string = function () {
    /* Quite the same thing but with a custom Reader which
       will call my_int_of_char for each byte read */
}


//Provides: my_marshal_data_size
//Requires: caml_failwith, my_int_of_char
function my_marshal_data_size(s, ofs) {
    function get32(s,i) {
    return (my_int_of_char(s, i) << 24) | (my_int_of_char(s, i + 1) << 16) |
        (my_int_of_char(s, i + 2) << 8) | (my_int_of_char(s, i + 3));
    }
    if (get32(s, ofs) != (0x8495A6BE|0))
    caml_failwith("MyMarshal.data_size");
    return (get32(s, ofs + 4));
}

//Provides: my_marshal_total_size
//Requires: my_marshal_data_size, my_marshal_header_size, caml_failwith
function my_marshal_total_size(s, ofs) {
    if ( ofs < 0 || ofs > s.length - my_marshal_header_size ) 
    caml_failwith("Invalid argument");
    else return my_marshal_header_size + my_marshal_data_size(s, ofs);
}

Is this the most efficient way to transfer large OCaml values from server to client, or what would time- and space-efficient alternatives be?

1

There are 1 best solutions below

0
On

Have you try to use EventSource https://developer.mozilla.org/en-US/docs/Web/API/EventSource

You could stream json data instead of marshaled data. Json.unsafe_input should be faster than unmarshal.

class type eventSource =
 object
  method onmessage :
    (eventSource Js.t, event Js.t -> unit) Js.meth_callback
    Js.writeonly_prop
 end
and event =
 object
  method data : Js.js_string Js.t Js.readonly_prop
  method event : Js.js_string Js.t Js.readonly_prop
 end

let eventSource : (Js.js_string Js.t -> eventSource Js.t) Js.constr = 
   Js.Unsafe.global##_EventSource

let send (f:server_to_client -> unit) (order:client_to_server) url_of_order =
 let url = url_of_order order in
 let es = jsnew eventSource(Js.string url) in
 es##onmessage <- Js.wrap_callback (fun e ->
  let d = Json.unsafe_input (e##data) in
  f d);
 ()

On the server side, you then need to rely on deriving_json http://ocsigen.org/js_of_ocaml/2.3/api/Deriving_Json to serialize your data

type server_to_client =
 | Ack
 | Print of string * int 
deriving (Json)

let process_request req =
  let res = process_response req in
  let data = Json_server_to_client.to_string res in
  send data

note1: Deriving_json serialize ocaml value to json using the internal representation of values in js_of_ocaml. Json.unsafe_input is a fast deserializer for Deriving_json that rely on browser-native JSON support.

note2: Deriving_json and Json.unsafe_input take care of ocaml string encoding