Python 3 Parsing XML from URL - Script doesn't works correctly, only wrote the root element

107 Views Asked by At

I have problem with parsing document from XML, I want to parse below specified elements and build own XML feed (and based new XML feed with products which has element values min_units_per_order on 1. But script only prints the name element and in file I have only element <SHOP /> and nothing..Do you have some tips?

Code here:

#!/usr/bin/python3
import urllib.request, urllib.parse, urllib.error
import xml.etree.ElementTree as ET 
  
url = 'https://dmo.gesio.be/dyndata/exportaciones/csvzip/catalog_1_50_3_2_3834c027fd8c0423eb157b5aa38e5d0f_xml_plain.xml'
uh = urllib.request.urlopen(url)
  
tree = ET.parse(uh)

root = tree.getroot() 


data = ET.Element('SHOP')   

for r in root.findall('product'):
    if r.find('min_units_per_order').text == '1':
        name = r.find('title').text
        print(name)
        brand = "Ocio"
        try:
            description = r.find('description').text
        except AttributeError:
            description = ""
            continue
        try:
            price = r.find('PRICE_VAT').text
        except AttributeError:
            price = ""
            continue
        ean = r.find('barcodes/barcode').text
        print(ean)
        sku = ean[-8:-1]
        sku = "TOM" + sku
        print(sku)
        img = r.find('images/image').text

    
 
        element2 = ET.SubElement(data, 'SHOPITEM')
        
        s_elem2_1 = ET.SubElement(element2, 'PRODUCTNAME')
        s_elem2_2 = ET.SubElement(element2, 'PRODUCT')
        s_elem2_3 = ET.SubElement(element2, 'DESCRIPTION')
        s_elem2_4 = ET.SubElement(element2, 'PRICE_VAT')
        s_elem2_5 = ET.SubElement(element2, 'EAN')
        s_elem2_6 = ET.SubElement(element2, 'ITEM_ID')
        s_elem2_7 = ET.SubElement(element2, 'IMGURL')
        s_elem2_8 = ET.SubElement(element2, 'DELIVERY_DATE')
        s_elem2_9 = ET.SubElement(element2, 'CATEGORYTEXT')
        s_elem2_10 = ET.SubElement(element2, 'MANUFACTURER')
    
    
        s_elem2_1.text = name
        s_elem2_2.text = name
        s_elem2_3.text = description
        s_elem2_4.text = price
        s_elem2_5.text = ean
        s_elem2_6.text = sku 
        s_elem2_7.text = img
        s_elem2_8.text = "7"
        s_elem2_9.text = "Heureka.cz | D\u011btské zbo\u017eí | Hra\u010dky"
        s_elem2_10.text = brand
    
        xml_content = ET.tostring(data)

        with open('ociofeed.xml', 'wb') as f:
            f.write(xml_content)
            f.close()
0

There are 0 best solutions below