convert two input files into certain string rules by python

119 Views Asked by At

Let's say there are two input files as below:
input1.txt (only composed of hstep3_*)

hstep3_num00 = a5;
hstep3_num01 = 3b;
hstep3_num02 = 4f;
hstep3_num03 = 27;

input2.txt ( the letters inside brackets are some random characters split by ,)

some random strings that are not 'hstep' form
... 
match hstep1_num00 = {eau,t,nb,v,d}; // MATCH
match hstep1_num01 = {c,bul,kv,e}; // MATCH
... 
match hstep3_num00 = {u_ku,b,ntv,q}; // MATCH
match hstep3_num01 = {qq,rask,cb_p}; // MATCH
match hstep3_num02 = {c,a,ha,w,ykl}; // MATCH
match hstep3_num03 = {p,gu,enb_q_b,z,d}; // MATCH
...
some random strings that are not 'hstep' form

and What I want to do is to sort out all the left hand side of equation from input1.txt, and match the corresponding bracket and value from input2.txt.

So, the final output.txt looks as follows: output.txt

{u_ku,b,ntv,q}     = a5;
{qq,rask,cb_p}     = 3b;
{c,a,ha,w,ykl}     = 4f;
{p,gu,enb_q_b,z,d} = 27;

In order to do so by python, I've thought about readlines.split(). Also, since the number of Characters inside bracket are not always same by lines, I thought I have to use regular expressions to limit the range inside {} but It doesn't work as I expected...
Anybody give me any solutions or guidelines for this?

Any help would be much appreciated.. thanks!

3

There are 3 best solutions below

0
Edo Akse On BEST ANSWER

The code below is not optimized, but it's so OP understands the processes involved a bit better

# read input1 and turn into dict
input1 = {}
with open("input1.txt") as infile:
    for line in infile.readlines():
        key, value = line.split(" = ")
        input1[key] = value

# read input 2 and store the maxlen value
input2 = []
maxlen = 0
with open("input2.txt") as infile:
    for line in infile.readlines():
        # only process lines that start with "match hstep3"
        if line.startswith("match hstep3"):
            key = line.split(" ")[1]
            value = line.split("= ")[1].split(";")[0]
            input2.append([key, value])
            # get the maxlength and store it for future use
            maxlen = max(maxlen, len(value))

# finally, produce the required output and write to file
with open("output.txt", "w") as outfile:
    for line in input2:
        key, value = line
        # use an f-string to produce the required output
        newline = f"{value:<{maxlen}} = {input1[key]}"
        outfile.write(newline)

output.txt file contents:

{u_ku,b,ntv,q}     = a5;
{qq,rask,cb_p}     = 3b;
{c,a,ha,w,ykl}     = 4f;
{p,gu,enb_q_b,z,d} = 27;
4
mozway On

You can use two loops with a regex. A first one with re.findall to read the lines from input2.txt and build a dictionary if matches, and a second loop on input1.txt to perform the substitutions with re.sub:

import re
with open('input2.txt') as f2:
    dic = dict(re.findall(fr'match ([^\s=]+) = ([^;]+); // MATCH', f2.read()))
# {'hstep1_num00': '{eau,t,nb,v,d}', 'hstep1_num01': '{c,bul,kv,e}',
#  'hstep3_num00': '{u_ku,b,ntv,q}', 'hstep3_num01': '{qq,rask,cb_p}',
#  'hstep3_num02': '{c,a,ha,w,ykl}', 'hstep3_num03': '{p,gu,enb_q_b,z,d}'}

with open('input1.txt') as f1, open('output1.txt', 'w') as f_out:
    for line in f1:
        f_out.write(re.sub(r'^\S+', lambda m: dic.get(m.group(), ''), line))

Output file:

{u_ku,b,ntv,q} = a5;
{qq,rask,cb_p} = 3b;
{c,a,ha,w,ykl} = 4f;
{p,gu,enb_q_b,z,d} = 27;

regex demo 1, regex demo 2

alignment

If alignment of the strings is needed, then you can modify the above approach.

fixed width (or based on the maximum possible width):

import re

# same as previously
with open('input2.txt') as f2:
    dic = dict(re.findall(fr'match ([^\s=]+) = ([^;]+); // MATCH', f2.read()))

WIDTH = max([len(v) for k,v in dic.items() if k.startswith('hstep3_')])

with open('input1.txt') as f1, open('output1.txt', 'w') as f_out:
    for line in f1:
        f_out.write(re.sub(r'^\S+', lambda m: dic.get(m.group(), '').ljust(WIDTH), line))

Dynamic width, based on the longest string:

import re

# same as previously
with open('input2.txt') as f2:
    dic = dict(re.findall(fr'match ([^\s=]+) = ([^;]+); // MATCH', f2.read()))

with open('input1.txt') as f1:
    WIDTH = max(len(dic.get(line.split(maxsplit=1)[0], '')) for line in f1)

with open('input1.txt') as f1, open('output1.txt', 'w') as f_out:
    for line in f1:
        f_out.write(re.sub(r'^\S+', lambda m: dic.get(m.group(), '').ljust(WIDTH), line))

Output:

{u_ku,b,ntv,q}     = a5;
{qq,rask,cb_p}     = 3b;
{c,a,ha,w,ykl}     = 4f;
{p,gu,enb_q_b,z,d} = 27;
0
Muhammad Shamshad Aslam On

If your data is in the format as you mentioned OR close to it then this should work.

result_2_dict = {}
result_1_dict = {}
file_2_list= []
file_1_list = []

with open('file2.txt', 'r') as file:
    for line in file:
        parts = line.split('=')
        file_2_list.append(parts)

for item in file_2_list:
    if "h" in item[0]:
        result_2_dict[item[0].strip("match").strip() ] = item[1].strip().split(" ")[0].strip(";")


with open('file1.txt', 'r') as file:
    for line in file:
        parts = line.split('=')
        file_1_list.append(parts)

for item in file_1_list:
    if "h" in item[0]:
        result_1_dict[item[0].strip()] = item[1].strip().strip(";")


matches_values = {}

for key, value in result_2_dict.items():
    if key in result_1_dict:
        matches_values[value] = result_1_dict[key]


    
for key, value in matches_values.items():
    print(f"{key} = {value}")