How to dynamically format nested list of dict with less latency

123 Views Asked by At

I need your expertise to easy the nested dictionary formatting. I have list of input signals which need to be grouped on the u_id and on timestamp field based on minute precision and convert to respective output format. I have posted the formatting i have tried. I need to easily format and process it as fast as possible, because time complexity is involved. help highly appreciated.

Code snippet

final_output = []

sorted_signals = sorted(signals, key=lambda x: (x['u_id'], str(x['start_ts'])[0:8]))

data = itertools.groupby(sorted_signals, key=lambda x: (x['u_id'], calendar.timegm(time.strptime(datetime.utcfromtimestamp(x['start_ts']).strftime('%Y-%m-%d-%H:%M'),'%Y-%m-%d-%H:%M'))))

def format_signals(v):
    result =[]
    for i in v:
        temp_dict = {}
        temp_dict.update({'timestamp_utc': i['start_ts']})
        for data in i['sign']:
            temp_dict.update({data['name'].split('.')[0]: data['val']})
        result.append(temp_dict)
    return result


for k, v in data:

    output_format = {'ui_id': k[0], 'minute_utc': datetime.fromtimestamp(int(k[1])), 'data': format_signals(v),
                'processing_timestamp_utc': datetime.strptime(datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),"%Y-%m-%d %H:%M:%S")}
    final_output.append(output_format)

print(final_output)

Input

signals = [
       {'c_id': '1234', 'u_id': 288, 'f_id': 331,
        'sign': [{'name': 'speed', 'val': 9},
                    {'name': 'pwr', 'val': 1415}], 'start_ts': 1598440244,
        'crt_ts': 1598440349, 'map_crt_ts': 1598440351, 'ca_id': 'AT123', 'c_n': 'demo',
        'msg_cnt': 2, 'window': 'na', 'type': 'na'},
       {'c_id': '1234', 'u_id': 288, 'f_id': 331,
        'sign': [{'name': 'speed', 'val': 10},
                    {'name': 'pwr', 'val': 1416}], 'start_ts': 1598440243,
        'crt_ts': 1598440349, 'map_crt_ts': 1598440351, 'ca_id': 'AT123', 'c_n': 'demo',
        'msg_cnt': 2, 'window': 'na', 'type': 'na'},
       {'c_id': '1234', 'u_id': 287, 'f_id': 331,
        'sign': [{'name': 'speed', 'val': 10},
                    {'name': 'pwr', 'val': 1417}], 'start_ts': 1598440344,
        'crt_ts': 1598440349, 'map_crt_ts': 1598440351, 'ca_id': 'AT123', 'c_n': 'demo',
        'msg_cnt': 2, 'window': 'na', 'type': 'na'},
       {'c_id': '1234', 'u_id': 288, 'f_id': 331,
        'sign': [{'name': 'speed.', 'val': 8.2},
                    {'name': 'pwr', 'val': 925}], 'start_ts': 1598440345,
        'crt_ts': 1598440349, 'map_crt_ts': 1598440351, 'ca_id': 'AT172', 'c_n': 'demo',
        'msg_cnt': 2, 'window': 'na', 'type': 'na'}
       ]

Current output

   [{
    'ui_id': 287,
    'minute_utc': datetime.datetime(2020, 8, 26, 16, 42),
    'data': [{
        'timestamp_utc': 1598440344,
        'speed': 10,
        'pwr': 1417
    }],
    'processing_timestamp_utc': datetime.datetime(2020, 8, 29, 19, 35, 46)
}, {
    'ui_id': 288,
    'minute_utc': datetime.datetime(2020, 8, 26, 16, 40),
    'data': [{
        'timestamp_utc': 1598440244,
        'speed': 9,
        'pwr': 1415
    }, {
        'timestamp_utc': 1598440243,
        'speed': 10,
        'pwr': 1416
    }],
    'processing_timestamp_utc': datetime.datetime(2020, 8, 29, 19, 35, 46)
}, {
    'ui_id': 288,
    'minute_utc': datetime.datetime(2020, 8, 26, 16, 42),
    'data': [{
        'timestamp_utc': 1598440345,
        'speed': 8.2,
        'pwr': 925
    }],
    'processing_timestamp_utc': datetime.datetime(2020, 8, 29, 19, 35, 46)
}]

Required Output

    [{
    'ui_id': 287,
    'f_id': 311,
    'c_id': 1234,
    'minute_utc': datetime.datetime(2020, 8, 26, 16, 42),
    'data': [{
        'timestamp_utc': 1598440344,
        'speed': 10,
        'pwr': 1417
    }],
    'processing_timestamp_utc': datetime.datetime(2020, 8, 29, 19, 35, 46)
}, {
    'ui_id': 288,
     'f_id': 311,
    'c_id': 1234,
    'minute_utc': datetime.datetime(2020, 8, 26, 16, 40),
    'data': [{
        'timestamp_utc': 1598440244,
        'speed': 9,
        'pwr': 1415
    }, {
        'timestamp_utc': 1598440243,
        'speed': 10,
        'pwr': 1416
    }],
    'processing_timestamp_utc': datetime.datetime(2020, 8, 29, 19, 35, 46)
}, {
    'ui_id': 288,
     'f_id': 311,
    'c_id': 1234,
    'minute_utc': datetime.datetime(2020, 8, 26, 16, 42),
    'data': [{
        'timestamp_utc': 1598440345,
        'speed': 8.2,
        'pwr': 925
    }],
    'processing_timestamp_utc': datetime.datetime(2020, 8, 29, 19, 35, 46)
}]
2

There are 2 best solutions below

0
Olvin Roght On BEST ANSWER

So, let's define simple function which will extract from each object keys which required for grouping:

def extract(obj):
    return obj['u_id'], obj['f_id'], obj['c_id'], obj['start_ts'] // 60 * 60

Note: to implement "minutes precision" I've divided timestamp to 60 to cut seconds and multiply to 60 to get valid timestamp back.

Then let's group objects and form final list:

from itertools import groupby
from datetime import datetime
...
final_output = []
for (uid, fid, cid, ts), ss in groupby(sorted(signals, key=extract), extract):
    obj = {
        'ui_id': uid,
        'f_id': fid,
        'c_id': int(cid),
        'minute_utc': datetime.utcfromtimestamp(ts),
        'data': [],
        'processing_timestamp_utc': datetime.utcnow()
    }
    for s in ss:
        obj['data'].append({
            'timestamp_utc': s['start_ts'],
            **{i['name']: i['val'] for i in s['sign']}
        })
    final_output.append(obj)

To print final_output in readable form we could use pprint:

from pprint import pprint
...
pprint(final_output, sort_dicts=False)
2
antont On

Maybe this helps you to write the code in a more straightforward way. If you can just go through the signals and organize them in one loop, maybe you don't need the sort and groupby which may be heavier.

As you want to gather the signals based on the u_id, a dictionary is handy to get a single entry per u_id. This does that much, you just need to add creating the output based on this organized dict of signals:

organized = {}
      
for s in signals:
  u_id = s['u_id']
  
  entry = organized.get(u_id, None)
  if entry is None:
    entry = []
    organized[u_id] = entry  
  entry.append(s)      

pprint.pprint(organized)

Is executable there, and output pasted below, https://repl.it/repls/ShallowQuintessentialInteger

{287: [{'c_id': '1234',
        'c_n': 'demo',
        'ca_id': 'AT123',
        'crt_ts': 1598440349,
        'f_id': 331,
        'map_crt_ts': 1598440351,
        'msg_cnt': 2,
        'sign': [{'name': 'speed', 'val': 10}, {'name': 'pwr', 'val': 1417}],
        'start_ts': 1598440344,
        'type': 'na',
        'u_id': 287,
        'window': 'na'}],
 288: [{'c_id': '1234',
        'c_n': 'demo',
        'ca_id': 'AT123',
        'crt_ts': 1598440349,
        'f_id': 331,
        'map_crt_ts': 1598440351,
        'msg_cnt': 2,
        'sign': [{'name': 'speed', 'val': 9}, {'name': 'pwr', 'val': 1415}],
        'start_ts': 1598440244,
        'type': 'na',
        'u_id': 288,
        'window': 'na'},
       {'c_id': '1234',
        'c_n': 'demo',
        'ca_id': 'AT123',
        'crt_ts': 1598440349,
        'f_id': 331,
        'map_crt_ts': 1598440351,
        'msg_cnt': 2,
        'sign': [{'name': 'speed', 'val': 10}, {'name': 'pwr', 'val': 1416}],
        'start_ts': 1598440243,
        'type': 'na',
        'u_id': 288,
        'window': 'na'},
       {'c_id': '1234',
        'c_n': 'demo',
        'ca_id': 'AT172',
        'crt_ts': 1598440349,
        'f_id': 331,
        'map_crt_ts': 1598440351,
        'msg_cnt': 2,
        'sign': [{'name': 'speed.', 'val': 8.2}, {'name': 'pwr', 'val': 925}],
        'start_ts': 1598440345,
        'type': 'na',
        'u_id': 288,
        'window': 'na'}]}