I have problem with parsing document from XML, I want to parse below specified elements and build own XML feed (and based new XML feed with products which has element values min_units_per_order on 1.
But script only prints the name element and in file I have only element <SHOP />
and nothing..Do you have some tips?
Code here:
#!/usr/bin/python3
import urllib.request, urllib.parse, urllib.error
import xml.etree.ElementTree as ET
url = 'https://dmo.gesio.be/dyndata/exportaciones/csvzip/catalog_1_50_3_2_3834c027fd8c0423eb157b5aa38e5d0f_xml_plain.xml'
uh = urllib.request.urlopen(url)
tree = ET.parse(uh)
root = tree.getroot()
data = ET.Element('SHOP')
for r in root.findall('product'):
if r.find('min_units_per_order').text == '1':
name = r.find('title').text
print(name)
brand = "Ocio"
try:
description = r.find('description').text
except AttributeError:
description = ""
continue
try:
price = r.find('PRICE_VAT').text
except AttributeError:
price = ""
continue
ean = r.find('barcodes/barcode').text
print(ean)
sku = ean[-8:-1]
sku = "TOM" + sku
print(sku)
img = r.find('images/image').text
element2 = ET.SubElement(data, 'SHOPITEM')
s_elem2_1 = ET.SubElement(element2, 'PRODUCTNAME')
s_elem2_2 = ET.SubElement(element2, 'PRODUCT')
s_elem2_3 = ET.SubElement(element2, 'DESCRIPTION')
s_elem2_4 = ET.SubElement(element2, 'PRICE_VAT')
s_elem2_5 = ET.SubElement(element2, 'EAN')
s_elem2_6 = ET.SubElement(element2, 'ITEM_ID')
s_elem2_7 = ET.SubElement(element2, 'IMGURL')
s_elem2_8 = ET.SubElement(element2, 'DELIVERY_DATE')
s_elem2_9 = ET.SubElement(element2, 'CATEGORYTEXT')
s_elem2_10 = ET.SubElement(element2, 'MANUFACTURER')
s_elem2_1.text = name
s_elem2_2.text = name
s_elem2_3.text = description
s_elem2_4.text = price
s_elem2_5.text = ean
s_elem2_6.text = sku
s_elem2_7.text = img
s_elem2_8.text = "7"
s_elem2_9.text = "Heureka.cz | D\u011btské zbo\u017eí | Hra\u010dky"
s_elem2_10.text = brand
xml_content = ET.tostring(data)
with open('ociofeed.xml', 'wb') as f:
f.write(xml_content)
f.close()