Siddhi QL : In-Memory table outer join with Input Stream

385 Views Asked by At

I want to calculate % of protocols appearing in a network traffic in continuous way such that these % keeps on being updated with new events. A pie chart is generated and updated with the percentages. Since I need both new and previous data for the calculation, I decided to use in-memory table to keep events for a longer time (say a day).

As event tables are usable only when joined with event streams, I chose outer join to get old values as well. Being interested in just protocols and their percentages, I need just two columns but I am unable to apply aggregate function in outer join. The query I have so far generated is:

@Import('MAINInStream:1.0.0')
define stream MAINInStream (ts string, uid string, id_orig_h string, id_orig_p int, id_resp_h string, id_resp_p int, proto string, service string, duration double, orig_bytes long, resp_bytes long, conn_state string, local_orig bool, local_resp bool, missed_bytes long, history string, orig_pkts long, orig_ip_bytes long, resp_pkts long, resp_ip_bytes long, tunnel_parents string, sensorname string);

@Export('ProtocolStream:1.0.0')
define stream ProtocolStream (protocol string, count int);

define table mem_conn_table (timestamp long, id_orig_h string, id_orig_p int, id_resp_h string, id_resp_p int, proto string);

from MAINInStream
select time:timestampInMilliseconds(time:dateAdd(str:replaceAll(ts,'T',' '), 5, 'hour',"yyyy-MM-dd HH:mm:ss"),'yyyy-MM-dd HH:mm') as timestamp, id_orig_h, id_orig_p, id_resp_h, id_resp_p, proto
insert into intermediateStream;

from MAINInStream
select time:timestampInMilliseconds(time:dateAdd(str:replaceAll(ts,'T',' '), 5, 'hour',"yyyy-MM-dd HH:mm:ss"),'yyyy-MM-dd HH:mm') as timestamp, id_orig_h, id_orig_p, id_resp_h, id_resp_p, proto
group by id_resp_p
insert into mem_conn_table;

from intermediateStream#window.externalTimeBatch(timestamp,1min, timestamp, 1min) as i right outer join mem_conn_table[time:dateDiff(time:currentTimestamp(),cast(timestamp,"string"), "yyyy-MM-dd HH:mm:ss", "yyyy-MM-dd HH:mm:ss") == 0] as mc
on  i.timestamp == mc.timestamp 
SELECT (ifThenElse(mc.id_resp_p == 21,'FTP', ifThenElse(mc.id_resp_p == 22,'SSH', ifThenElse(mc.id_resp_p == 25,'SMTP', ifThenElse(mc.id_resp_p == 445,'SMB','MYSQL')))))  as protocol , cast(count(mc.id_resp_p),'int') as count
insert into ProtocolStream;

I am batching window with one external minute and then getting protocols and their counts, but it isn't giving me any output.

Any suggestions?

1

There are 1 best solutions below

1
On

You cannot use outer joins with in-memory tables. If you need, you can emit events resides in in-memory table to a intermediate stream and use it for joining (guide). However, for you scenario you can use externalTime window, instead of going with event tables. Try something similar to below;

@Import('MAINInStream:1.0.0')
define stream MAINInStream (ts string, uid string, id_orig_h string, id_orig_p int, id_resp_h string, id_resp_p int, proto string, service string, duration double, orig_bytes long, resp_bytes long, conn_state string, local_orig bool, local_resp bool, missed_bytes long, history string, orig_pkts long, orig_ip_bytes long, resp_pkts long, resp_ip_bytes long, tunnel_parents string, sensorname string);

@Export('ProtocolStream:1.0.0')
define stream ProtocolStream (protocol string, count long);

@Export('PercentageStream:1.0.0')
define stream PercentageStream (protocol string, count long, percentage double);


from MAINInStream
select 
    time:timestampInMilliseconds(time:dateAdd(str:replaceAll(ts,'T',' '), 5, 'hour',"yyyy-MM-dd HH:mm:ss"),'yyyy-MM-dd HH:mm') as timestamp, 
    (ifThenElse(mc.id_resp_p == 21,'FTP', ifThenElse(mc.id_resp_p == 22,'SSH', ifThenElse(mc.id_resp_p == 25,'SMTP', ifThenElse(mc.id_resp_p == 445,'SMB','MYSQL')))))  as protocol
    id_orig_h, id_orig_p, id_resp_h, id_resp_p, proto
insert into intermediateStream;

from intermediateStream#window.externalTime(timestamp, 1 day)
select timestamp, count() as totalCount
insert into totalCountStream;

from intermediateStream#window.externalTime(timestamp, 1 day)
select timestamp, protocol, count() as count
group by protocol
insert into perProtocolCountStream;

from perProtocolCountStream
select protocol, count
insert into ProtocolStream;

from totalCountStream#window.time(1 min) as tcs join perProtocolCountStream#window.time(1 min) as pcs
select pcs.protocol, pcs.count as count, ((pcs.count/tcs.totalCount)) * 100 as percentage
    on tcs.timestamp == pcs.timestamp
insert into PercentageStream;