Torchlib shared library segfaults

41 Views Asked by At

I'm trying to run example of object detection using YOLO and libtorch. Here is the source code:

//main.cpp
#include <ATen/core/stack.h>
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <torch/csrc/autograd/generated/variable_factories.h>
#include <torch/csrc/jit/api/module.h>
#include <torch/script.h>
#include <opencv2/opencv.hpp>
#include <unistd.h>
#include "detect.h"

int main() {
    std::cout << "hey\n";
    torch::jit::script::Module
        model = torch::jit::load("uav_model.torchscript");
    std::cout << "size is " << sizeof(model) << '\n';
    std::string source = "0001.jpg";
    cv::Mat img = cv::imread(source);
    cv::Mat imgNorm = img;
    std::cout << "Image read\n";
    cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
    cv::normalize(img, imgNorm, 0.0, 1.0, cv::NORM_MINMAX, CV_32F);
    std::cout << "Image normalized\n";
    //imgNorm?
    std::vector<torch::jit::IValue> inputs = {
        torch::from_blob(
            imgNorm.data,
            {640, 640, 3},
            torch::kFloat32
        ).permute({2, 0, 1}).unsqueeze(0)
    };
    std::cout << "Inputs created:\n";
    std::cout << inputs.size() << ' ' << sizeof(torch::jit::IValue) <<  '\n';

    //at::Tensor outputs;
    std::cout << "Outputs created\n";
    auto outputs = model(inputs).toTensor();
    //outputs = model(inputs).toTensor();
    std::cout << "Outputs initialized\n";

    std::vector<Box> boxes = getBoxes(outputs); 
    std::cout << "Boxes created:\n";
    std::cout << boxes.size() << '\n';
    highlightBoxes(img, boxes);
    cv::imshow("Result", img);
    return 0;
}
//detect.h
#include <vector>
#include <algorithm>

class Box {
    public:
        int x1, y1, x2, y2;
        float conf;
        Box(int x1, int y1, int x2, int y2, float conf) {
            this->x1 = x1;
            this->y1 = y1;
            this->x2 = x2;
            this->y2 = y2;
            this->conf = conf;
        }
};

float iou(Box &fb, Box &sb) {
    float inter = std::max(std::min(fb.x2, sb.x2) - std::min(fb.x1, sb.x1), 0) * std::max(std::min(fb.y2, sb.y2) - std::min(fb.y1, sb.y1), 0);
    float union_ = (fb.x2-fb.x1)*(fb.y2-fb.y1) + (sb.x2-sb.x1)*(sb.y2-sb.y1) - inter;
    return inter / union_;
}

std::vector<Box> nms(std::vector<Box> &boxes, float iouThres) {
    std::vector<Box> supBoxes;
    for (Box box: boxes) {
        bool valid = true;
        for (Box supBox: supBoxes) {
            if (iou(box, supBox) > iouThres) {
                valid = false;
                break;
            }
        }
        if (valid == true) {
            supBoxes.push_back(box);
        }
    }
    return supBoxes;
}

std::vector<Box> getBoxes (
    at::Tensor &outputs,
    float confThres = 0.25,
    float iouThres = 0.15
) {
    std::vector<Box> candidates;
    for (unsigned short ibatch = 0; ibatch < outputs.sizes()[0]; ibatch++) {
        for (unsigned short ibox = 0; ibox < outputs.sizes()[2]; ibox++) {
            float conf = outputs[ibatch][4][ibox].item<float>();
            if (conf >= confThres) {
                unsigned short
                    cx = outputs[ibatch][0][ibox].item<int>(),
                    cy = outputs[ibatch][1][ibox].item<int>(),
                    w = outputs[ibatch][2][ibox].item<int>(),
                    h = outputs[ibatch][3][ibox].item<int>();
                unsigned short
                    x1 = cx - w / 2,
                    y1 = cy - h / 2,
                    x2 = cx + w / 2,
                    y2 = cy + h / 2;
                candidates.push_back(Box(x1,y1,x2,y2,conf));
            }
        }
    }
    std::sort(candidates.begin(), candidates.end(), [](Box b1, Box b2){return b1.conf > b2.conf;});
    std::vector<Box> boxes = nms(candidates, iouThres);
    return boxes;
}

void highlightBoxes(cv::Mat &img, std::vector<Box> &boxes) {
  cv::Scalar rectColor(0,192,0);
  unsigned short fontScale = 2, confPrecis = 2;

  for (Box box: boxes) {
      std::string text = std::to_string(box.conf);
    cv::rectangle(img, {box.x1,box.y1}, {box.x2,box.y2}, rectColor, 2);
    cv::rectangle(
      img,
      {box.x1, box.y1 - fontScale * 12},
      {box.x1 + (unsigned short)text.length() * fontScale * 9, box.y1},
      rectColor,
      -1
    );
    cv::putText(img, text, {box.x1,box.y1}, cv::FONT_HERSHEY_PLAIN, fontScale, {255,255,255}, 2);
  }
}

calling model.forward(inputs) causes segfault, and i cannot understand why. Here is: core dump

Stack trace of thread 26329:
                #0  0x00007fbc3fa53800 n/a (libtorch_cpu.so + 0x6a53800)
                #1  0x00007fbc3a38fa2d _ZNK2at18TensorIteratorBase15serial_for_eachEN3c1012function_refIFvPPcPKlllE>
                #2  0x00007fbc3a38fdee n/a (libtorch_cpu.so + 0x138fdee)
                #3  0x00007fbc360c3c96 gomp_thread_start (libgomp.so.1 + 0x20c96)
                #4  0x00007fbc38aaa9eb n/a (libc.so.6 + 0x8c9eb)
                #5  0x00007fbc38b2e7cc n/a (libc.so.6 + 0x1107cc)

                Stack trace of thread 26310:
                #0  0x00007fbc38b2c73d syscall (libc.so.6 + 0x10e73d)
                #1  0x00007fbc458b3fbc n/a (libtbb.so.12 + 0xefbc)
                #2  0x00007fbc458c6dc3 n/a (libtbb.so.12 + 0x21dc3)
                #3  0x00007fbc38aaa9eb n/a (libc.so.6 + 0x8c9eb)
                #4  0x00007fbc38b2e7cc n/a (libc.so.6 + 0x1107cc)

objdump of libtorch_cpu.so starting from address 0x6a53800


/usr/lib/libtorch_cpu.so:     file format elf64-x86-64

Disassembly of section .text:

0000000006a53800 <_ZN5torch9serialize13OutputArchiveC1ESt10shared_ptrINS_3jit15CompilationUnitEE@@Base+0x8a2100>:
 6a53800:   c5 fa 10 07             vmovss (%rdi),%xmm0
 6a53804:   48 83 c2 01             add    $0x1,%rdx
 6a53808:   4c 01 d7                add    %r10,%rdi
 6a5380b:   c4 c1 7a 11 01          vmovss %xmm0,(%r9)
 6a53810:   4d 01 d9                add    %r11,%r9
 6a53813:   48 39 d0                cmp    %rdx,%rax
 6a53816:   75 e8                   jne    6a53800 <_ZN5torch9serialize13OutputArchiveC1ESt10shared_ptrINS_3jit15CompilationUnitEE@@Base+0x8a2100>
 6a53818:   48 83 c3 01             add    $0x1,%rbx
 6a5381c:   4c 01 e1                add    %r12,%rcx
 6a5381f:   4c 01 ee                add    %r13,%rsi
 6a53822:   4c 39 c3                cmp    %r8,%rbx
 6a53825:   75 cb                   jne    6a537f2 <_ZN5torch9serialize13OutputArchiveC1ESt10shared_ptrINS_3jit15CompilationUnitEE@@Base+0x8a20f2>
 6a53827:   48 8d 65 d8             lea    -0x28(%rbp),%rsp
 6a5382b:   5b                      pop    %rbx
 6a5382c:   41 5c                   pop    %r12
 6a5382e:   41 5d                   pop    %r13
 6a53830:   41 5e                   pop    %r14
 6a53832:   41 5f                   pop    %r15
 6a53834:   5d                      pop    %rbp
 6a53835:   c3                      ret
 6a53836:   66 2e 0f 1f 84 00 00    cs nopw 0x0(%rax,%rax,1)
 6a5383d:   00 00 00 
 6a53840:   48 83 3a 04             cmpq   $0x4,(%rdx)
 6a53844:   0f 85 82 ff ff ff       jne    6a537cc <_ZN5torch9serialize13OutputArchiveC1ESt10shared_ptrINS_3jit15CompilationUnitEE@@Base+0x8a20cc>
 6a5384a:   4d 85 c0                test   %r8,%r8
 6a5384d:   0f 8e 7e ff ff ff       jle    6a537d1 <_ZN5torch9serialize13OutputArchiveC1ESt10shared_ptrINS_3jit15CompilationUnitEE@@Base+0x8a20d1>
 6a53853:   4c 8d 48 f0             lea    -0x10(%rax),%r9
 6a53857:   48 8d 78 f1             lea    -0xf(%rax),%rdi
 6a5385b:   45 31 db                xor    %r11d,%r11d
 6a5385e:   49 83 e1 f0             and    $0xfffffffffffffff0,%r9
 6a53862:   4d 8d 61 10             lea    0x10(%r9),%r12
 6a53866:   48 83 f8 0f             cmp    $0xf,%rax
 6a5386a:   0f 8e e8 05 00 00       jle    6a53e58 <_ZN5torch9serialize13OutputArchiveC1ESt10shared_ptrINS_3jit15CompilationUnitEE@@Base+0x8a2758>

I used differend vesrioins of libtorch and opnecv, and tried to run code with 3 different models, all segfaulted

if you need any additional info, please let me know. Thanks in advance

0

There are 0 best solutions below