How to convert a .pth file into a .protox.txt and a .caffemodel files?

116 Views Asked by At

After much research, I finally got the following script (which works):

from detecto import core, utils, visualize
from detecto.visualize import show_labeled_image, plot_prediction_grid
from torchvision import transforms
import matplotlib.pyplot as plt
import numpy as np
import os

custom_transforms = transforms.Compose([
transforms.ToPILImage(),
transforms.Resize(900),
transforms.RandomHorizontalFlip(0.5),
transforms.ColorJitter(saturation=0.2),
transforms.ToTensor(),
utils.normalize_transform(),
])

Train_dataset = core.Dataset('/content/drive/MyDrive/Dataset/train/',transform=custom_transforms) # run on googlecolab
Test_dataset = core.Dataset('/content/drive/MyDrive/Dataset/test/')
loader = core.DataLoader(Train_dataset, batch_size=2, shuffle=True)
model = core.Model(['obj1', 'obj2', 'obj3', 'obj4'])
losses = model.fit(loader, Test_dataset, epochs=50, lr_step_size=5, learning_rate=0.001, verbose=True)

plt.plot(losses)
plt.show()

# Saving Model
model.save('model.pth')


## PART 2 : TESTING THE MODEL
# Loading Model
model = core.Model.load('model.pth', ['obj1', 'obj2', 'obj3', 'obj4'])

# Testing Model
image = utils.read_image('/content/drive/MyDrive/Dataset/test/test.png')
predictions = model.predict(image)
labels, boxes, scores = predictions
show_labeled_image(image, boxes, labels)

# Treshold to avoid wrong results
thresh = 0.5
filtered_indices = np.where(scores>thresh)
filtered_scores = scores[filtered_indices]
filtered_boxes = boxes[filtered_indices]
num_list = filtered_indices[0].tolist()
filtered_labels = [labels[i] for i in num_list]
show_labeled_image(image, filtered_boxes, filtered_labels)

and this script, which also works correctly:

# This script is equivalent to the PART 2 fo the previous one but using caffemodel
import sys
from imutils.video import VideoStream
from imutils.video import FPS
import numpy as np
import argparse
import imutils
import time
import cv2
import os
# Arguments construction
if len(sys.argv)==1:
    args={
    "prototxt": os.path.join(os.path.dirname(__file__), "MobileNetSSD_deploy.prototxt.txt"), # run on my computer
    "model": os.path.join(os.path.dirname(__file__), "MobileNetSSD_deploy.caffemodel"),
    "confidence":0.2,
    }
else:
    #lancement à partir du terminal
    #python3 ObjectRecognition.py --prototxt MobileNetSSD_deploy.prototxt.txt --model MobileNetSSD_deploy.caffemodel
    ap = argparse.ArgumentParser()
    ap.add_argument("-p", "--prototxt", required=True,
        help="path to Caffe 'deploy' prototxt file")
    ap.add_argument("-m", "--model", required=True,
        help="path to Caffe pre-trained model")
    ap.add_argument("-c", "--confidence", type=float, default=0.2,
        help="minimum probability to filter weak detections")
    args = vars(ap.parse_args())
# ModelNet SSD Object list init
CLASSES = ["arriere-plan", "avion", "velo", "oiseau", "bateau",
    "bouteille", "autobus", "voiture", "chat", "chaise", "vache", "table",
    "chien", "cheval", "moto", "personne", "plante en pot", "mouton",
    "sofa", "train", "moniteur"]
COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))
# Load model file
print("Load Neural Network...")
net = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"])
if __name__ == '__main__':
    # Camera initialisation
    # print("Start Camera...")
    # vs = VideoStream(src=0, resolution=(1600, 1200)).start()
    #vs = VideoStream(usePiCamera=True, resolution=(1600, 1200)).start()
    # vs = cv2.VideoCapture('needles/hellas1.png') #from video
    time.sleep(2.0)
    fps = FPS().start()
    
    #Main loop
    while True:
        # Get video sttream. max width 800 pixels 
        # frame = vs.read()
        frame = cv2.imread(os.path.join(os.path.dirname(__file__), 'Dataset', 'test', 'testimg.png')) #from image file
        # ret, frame = vs.read() # from video or ip cam
        frame = imutils.resize(frame, width=800)
        # Create blob from image
        (h, w) = frame.shape[:2]
        blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)), 0.007843, (300, 300), 127.5)
        # Feed input to neural network 
        net.setInput(blob)
        detections = net.forward()
        # Detection loop
        for i in np.arange(0, detections.shape[2]):
            # Compute Object detection probability
            confidence = detections[0, 0, i, 2]
            
            # Suppress low probability
            if confidence > args["confidence"]:
                # Get index and position of detected object
                idx = int(detections[0, 0, i, 1])
                box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
                (startX, startY, endX, endY) = box.astype("int")
                # Create box and label
                label = "{}: {:.2f}%".format(CLASSES[idx],
                    confidence * 100)
                cv2.rectangle(frame, (startX, startY), (endX, endY),
                    COLORS[idx], 2)
                y = startY - 15 if startY - 15 > 15 else startY + 15
                cv2.putText(frame, label, (startX, y),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)
                    
                # enregistrement de l'image détectée 
                cv2.imwrite("detection.png", frame)
                
                
        # Show video frame
        cv2.imshow("Frame", frame)
        key = cv2.waitKey(1) & 0xFF
        # Exit script with letter q
        if key == ord("q"):
            break
        # FPS update 
        fps.update()
    # Stops fps and display info
    fps.stop()
    print("[INFO] elapsed time: {:.2f}".format(fps.elapsed()))
    print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))
    cv2.destroyAllWindows()
    # vs.stop()
    # vs.release()

My question is how to convert the .pth file generated by the first programm into two files .prototxt.txt and .caffemodel to link the first script to the second, or vice versa, i.e. adapt the second script to make it work with the first (I don't know which way is better, so I don't go for it and of course it doesn't work better).

I must admit that I'm at a loss when it comes to the many models on offer, and I've seen the names YOLO, TensorFlow, Onnx etc. come up, but my research has come to nothing.

I found https://github.com/WoodsGao/pytorch2caffe but README.md is not very explicit.

I hope I'm not being too messy :).

Note: [https://stackoverflow.com/questions/30902056/generate-caffemodel-file](This question) seems to be the same but it hasn't been answered.

0

There are 0 best solutions below