how can I brute-force 5 characters in python to crack sha256 with multiprocess or multithread?

235 Views Asked by At

everything is fine with 4 characters and Pool.map, it runs in about 46s

import multiprocessing as mp
from tqdm import tqdm
from hashlib import sha256
from string import printable
from itertools import product, count

charset = printable[:-6]
chars = [c.encode() for c in charset]

def e(i):
    # sha256(W1n^r)  = 705c098135f1d15e28157c6424b17a7803ccbb6a218baaa312845de7b43303fe
    if sha256(i).hexdigest() == "705c098135f1d15e28157c6424b17a7803ccbb6a218baaa312845de7b43303fe":
        print(f"FOUND msg: {i}")
        return f"FOUND msg: {i}"

def g():
    for length in count(start=1):
        if length == 5:
            break
        for pwd in product(chars, repeat=length):
            yield b''.join(pwd)
    print("generator done")

if __name__ == '__main__':
    p = mp.Pool(4)
    for res in tqdm(p.map(e, g())):
        if res is not None:
            break

However, when the number of characters increased to 5, the generator was overloaded and my computer crashed. I looked up some solutions that suggested using imap but it took up to 2 hours??? while regular brute-force only takes about 1m33s

https://i.stack.imgur.com/3cz1K.png

import multiprocessing as mp
from tqdm import tqdm
from hashlib import sha256
from string import printable
from itertools import product, count

charset = printable[:-6]
chars = [c.encode() for c in charset]

arr = []
def e(i):
    if sha256(i).hexdigest() == "705c098135f1d15e28157c6424b17a7803ccbb6a218baaa312845de7b43303fe": # W1n^r
        print(f"FOUND msg: {i}")
        return True

def g():
    for length in count(start=1):
        if length == 5:
            break
        for pwd in product(chars, repeat=length):
            yield b''.join(pwd)
    print("generator done")

if __name__ == '__main__':
    
    for i in tqdm(g(), total=78914410):
        res = e(i)
        if res is not None:
            break

How should I handle the generator so that I can combine it with mutilprocess to brute-force 5 characters?

3

There are 3 best solutions below

5
SIGHUP On BEST ANSWER

Multiprocessing would be superior to multithreading as the core functionality is CPU-bound.

You can use queues for communication between the main (parent) process and the subprocesses.

The following code runs for just over 12 minutes on my system. Results will vary according to platform and variations on number of subprocesses, batch size and password length.

from multiprocessing import Process, Queue
from string import printable
from itertools import product, cycle
from hashlib import sha256
from queue import Empty
from time import perf_counter


PROCS = 7 # one less that CPU count
MAXLEN = 5
DIGEST = "705c098135f1d15e28157c6424b17a7803ccbb6a218baaa312845de7b43303fe"
BATCH = 10_000 # empirically determined to be a fairly good batch size

# check a batch of passwords
def process(qsend, qres):
    while batch := qsend.get():
        for v in map(str.encode, batch):
            if sha256(v).hexdigest() == DIGEST:
                qres.put(v)
                break

# generate passwords
def genpwd():
    for length in range(1, MAXLEN + 1):
        for pwd in product(printable[:-6], repeat=length):
            yield "".join(pwd)


def main():
    qres = Queue() # response queue
    # start PROCS processes each with a discrete input queue
    # each proc uses the same response queue
    procs = []
    for queue in (queues := [Queue() for _ in range(PROCS)]):
        (proc := Process(target=process, args=(queue, qres))).start()
        procs.append(proc)

    batch = []
    qc = cycle(queues)
    solution = None

    for pwd in genpwd():
        batch.append(pwd)
        if len(batch) == BATCH:
            # send batch to the next queue in the cycle
            next(qc).put(batch)
            batch = []
            # occasional check for a response
            try:
                solution = qres.get(block=False)
                break
            except Empty:
                pass

    # if there's no solution (yet) make sure anything left over in the batch list is submitted
    if not solution:
        next(qc).put(batch)
    
    # tells each process to stop
    for queue in queues:
        queue.put(None)

    # wait for all subprocesses to end
    for p in procs:
        p.join()

    # if there was no solution, check the response queue once more
    # ...because there could be subprocesses still running when the main loop ended (generator exhausted)
    if not solution:
        try:
            solution = qres.get(block=False)
        except Empty:
            pass

    if solution:
        print(f"Solution = {solution}")
    else:
        print("No solution found")

if __name__ == "__main__":
    start = perf_counter()
    main()
    end = perf_counter()
    duration = int(end - start)
    print(f"Duration={duration}s")

Output:

Solution = W1n^r
Duration=730s
0
Kelly Bundy On

Don't micro-manage the processes.

You give them microscopic tasks like check the single password "b3.cw", which involves a large communication overhead for transmitting each password and the result. Instead, give them tasks like check all passwords that start with "b3" and have 3 more chars.

import multiprocessing as mp
from tqdm import tqdm
from hashlib import sha256
from string import printable
from itertools import product, count

charset = printable[:-6]
chars = [c.encode() for c in charset]

def e(args):
    prefix, suffix_length = args
    for suffix in product(chars, repeat=suffix_length):
        pwd = prefix + suffix
        i = b''.join(pwd)
        if sha256(i).hexdigest() == "705c098135f1d15e28157c6424b17a7803ccbb6a218baaa312845de7b43303fe":
            print(f"FOUND msg: {i}")
            return f"FOUND msg: {i}"

def g():
    for length in count(start=1):
        if length == 6:
            break
        prefix_length = length // 2
        suffix_length = length - prefix_length
        for prefix in product(chars, repeat=prefix_length):
            yield prefix, suffix_length
    print("generator done")

if __name__ == '__main__':
    p = mp.Pool(4)
    for res in tqdm(p.imap(e, g())):
        if res is not None:
            break
0
no comment On

Replace p.map(e, g())): with p.imap(e, g(), 10000). The imap avoids memory exhaustion and the chunk size 10000 (default is 1) avoids a lot of communication overhead, similar to Codist's. Made it ~125 times faster in my testing.