How to convert XML Files into Text files (YOLOV3 Format) for object detection

3k Views Asked by At

How we convert XML annotation folder into text or YOLOv3 Format for detection??? I used this code for conversion but it only take one xml image and convert into .txt ..but i want to convert my full folder at once time. You have Any easy solution to convert xml files into text files. i Have 15000+ images.

from xml.dom import minidom
import os
import glob

lut={}
lut["14111"] =0
lut["14131"] =1
lut["14141"] =2

def convert_coordinates(size, box):
  dw = 1.0/size[0]
  dh = 1.0/size[1]
  x = (box[0]+box[1])/2.0
  y = (box[2]+box[3])/2.0
  w = box[1]-box[0]
  h = box[3]-box[2]
  x = x*dw
  w = w*dw
  y = y*dh
  h = h*dh
  return (x,y,w,h)


def convert_xml2yolo( lut ):

  for fname in glob.glob("/content/gdrive/MyDrive/Dataset /Annotation/14111_00000002.xml"):
      
      xmldoc = minidom.parse(fname)
      
      fname_out = (fname[:-4]+'.txt')

      with open(fname_out, "w") as f:

          itemlist = xmldoc.getElementsByTagName('object')
          size = xmldoc.getElementsByTagName('size')[0]
          width = int((size.getElementsByTagName('width')[0]).firstChild.data)
          height = int((size.getElementsByTagName('height')[0]).firstChild.data)

          for item in itemlist:
              # get class label
              classid =  (item.getElementsByTagName('name')[0]).firstChild.data
              if classid in lut:
                  label_str = str(lut[classid])
              else:
                  label_str = "-1"
                  print ("warning: label '%s' not in look-up table" % classid)

              # get bbox coordinates
              xmin = ((item.getElementsByTagName('bndbox')[0]).getElementsByTagName('xmin')[0]).firstChild.data
              ymin = ((item.getElementsByTagName('bndbox')[0]).getElementsByTagName('ymin')[0]).firstChild.data
              xmax = ((item.getElementsByTagName('bndbox')[0]).getElementsByTagName('xmax')[0]).firstChild.data
              ymax = ((item.getElementsByTagName('bndbox')[0]).getElementsByTagName('ymax')[0]).firstChild.data
              b = (float(xmin), float(xmax), float(ymin), float(ymax))
              bb = convert_coordinates((width,height), b)
              #print(bb)

              f.write(label_str + " " + " ".join([("%.6f" % a) for a in bb]) + '\n')

      print ("wrote %s" % fname_out)



def main():
  convert_xml2yolo( lut )


if __name__ == '__main__':
  main()
2

There are 2 best solutions below

1
On

Follow this github repository.

0
On

You just need to edit this line:

for fname in glob.glob("/content/gdrive/MyDrive/Dataset/Annotation/*.xml"):

This means you will read all the .xml files in the Annotation folder and convert them to .txt files.