Find and convert values in nested dict and update db collection

237 Views Asked by At

I have converted many xml files to json using xmltodict and inserted those in arangodb. Now I well loop over the collection and change some values in the database. LIke day, mount and year from string to int. The documents can be very nested and the values that I well change can be in different places.

This is what I have of the code.

    # Get the API wrapper for "FORM16" collection.
FORM16 = db.collection('FORM16')
  
def recursive_items(dictionary):
    for key, value in dictionary.items():
        if type(value) is dict:
            yield from recursive_items(value)
        else:
            yield (key, value)
 
 
search_key = 'LOW_VALUE'
 
for item in FORM16:
    for key, value in recursive_items(item):
        if search_key in list(key):
            item[search_key] = int(item[search_key])
            
        else:
            pass 
       
FORM16.update(item) 


{'_id': 'FORM16/2098312',
 '_key': '2098312',
 '_rev': '_blGxlRi---',
 '_old_rev': '_blGvpVO---'}

The code runs but It won’t update the database and the document that I receive that has changed is only the last document in the collection. What do I have to change in the code to convert values in keys like day, mount and year to int?

EDIT: This is one of the nested json's doc. that I well update

{
  "DOFFIN_ESENDERS": {
    "DOFFIN_APPENDIX": {
      "AUTHORITY_ORGANISATION_NR": "986 105 174",
      "DOFFIN_FORM_TYPE": {
        "NATIONAL": {
          "EXPRESSION_OF_INTEREST_URL": "https://kgv.doffin.no/ctm/Supplier/Notice/260549",
          "EXTERNAL_DOCUMENT_URL": "https://kgv.doffin.no/ctm/Supplier/Documents/Folder/124452",
          "LOCATION": {
            "NATIONWIDE": null
          },
          "PUBLISH_TO_TED": null
        }
      }
    },
    "FORM_SECTION": {
      "PRIOR_INFORMATION_DEFENCE": {
        "CATEGORY": "ORIGINAL",
        "FD_PRIOR_INFORMATION_DEFENCE": {
          "AUTHORITY_PRIOR_INFORMATION_DEFENCE": {
            "NAME_ADDRESSES_CONTACT_PRIOR_INFORMATION": {
              "CA_CE_CONCESSIONAIRE_PROFILE": {
                "ADDRESS": "Postboks 800, Postmottak",
                "ATTENTION": "Ole Jan Skoglund",
                "CONTACT_POINT": "Forsvarets logistikkorganisasjon",
                "COUNTRY": {
                  "VALUE": "NO"
                },
                "E_MAILS": {
                  "E_MAIL": "[email protected]"
                },
                "FAX": "+47 67863799",
                "ORGANISATION": {
                  "NATIONALID": "986105174",
                  "OFFICIALNAME": "Forsvarets logistikkorganisasjon"
                },
                "PHONE": "+47 67863787",
                "POSTAL_CODE": "LILLEHAMMER",
                "TOWN": "N-2617"
              },
              "FURTHER_INFORMATION": {
                "IDEM": null
              },
              "INTERNET_ADDRESSES_PRIOR_INFORMATION": {
                "URL_BUYER": "https://kgv.doffin.no/ctm/Supplier/CompanyInformation/Index/1127",
                "URL_GENERAL": "http://www.forsvaret.no"
              }
            },
            "TYPE_AND_ACTIVITIES_OR_CONTRACTING_ENTITY_AND_PURCHASING_ON_BEHALF": {
              "PURCHASING_ON_BEHALF": {
                "PURCHASING_ON_BEHALF_NO": null
              },
              "TYPE_AND_ACTIVITIES": {
                "TYPE_OF_ACTIVITY": {
                  "VALUE": "DEFENCE"
                },
                "TYPE_OF_CONTRACTING_AUTHORITY": {
                  "VALUE": "MINISTRY"
                }
              }
            }
          },
          "CTYPE": "SUPPLIES",
          "LEFTI_PRIOR_INFORMATION": null,
          "OBJECT_WORKS_SUPPLIES_SERVICES_PRIOR_INFORMATION": {
            "ADDITIONAL_INFORMATION": {
              "P": "Konkurransen vil bli utført som en forhandlet prosedyre etter en planlagt kunngjøring ultimo 2015 i henhold til “Forskrift 4. oktober 2013 nr. 1185 om forsvars og sikkerhetsanskaffelser“ basert på Eu direktiv 2009/81/EC fra Europa Parlamentet."
            },
            "CPV": {
              "CPV_ADDITIONAL": [
                {
                  "CPV_CODE": {
                    "CODE": "18900000"
                  }
                },
                {
                  "CPV_CODE": {
                    "CODE": "18930000"
                  }
                },
                {
                  "CPV_CODE": {
                    "CODE": "18937000"
                  }
                },
                {
                  "CPV_CODE": {
                    "CODE": "33000000"
                  }
                },
                {
                  "CPV_CODE": {
                    "CODE": "33120000"
                  }
                },
                {
                  "CPV_CODE": {
                    "CODE": "33124000"
                  }
                },
                {
                  "CPV_CODE": {
                    "CODE": "33140000"
                  }
                },
                {
                  "CPV_CODE": {
                    "CODE": "33141000"
                  }
                },
                {
                  "CPV_CODE": {
                    "CODE": "33141100"
                  }
                },
                {
                  "CPV_CODE": {
                    "CODE": "33141200"
                  }
                },
                {
                  "CPV_CODE": {
                    "CODE": "33141300"
                  }
                },
                {
                  "CPV_CODE": {
                    "CODE": "50400000"
                  }
                }
              ],
              "CPV_MAIN": {
                "CPV_CODE": {
                  "CODE": "33100000"
                }
              }
            },
            "FRAMEWORK_AGREEMENT": {
              "VALUE": "YES"
            },
            "QUANTITY_SCOPE_WORKS_DEFENCE": {
              "COSTS_RANGE_AND_CURRENCY": {
                "CURRENCY": "NOK",
                "RANGE_VALUE_COST": {
                  "HIGH_VALUE": "200000000",
                  "LOW_VALUE": "150000000"
                }
              },
              "F16_DIVISION_INTO_LOTS": {
                "DIV_INTO_LOT_NO": null
              },
              "TOTAL_QUANTITY_OR_SCOPE": {
                "P": "Forsvarets logistikkorganisasjon planlegger å skifte ut Forsvarets prehospitale sanitetssystem. Vi ser derfor etter en systemleverandør som kan levere test moduler, store initielle systemleveranser og ta ansvar for effektiv etterforsyning til Forsvaret på rammeavtaler med inntil syv års varighet."
              }
            },
            "SCHEDULED_DATE_PERIOD": {
              "PERIOD_WORK_DATE_STARTING": {
                "MONTHS": "84"
              }
            },
            "TITLE_CONTRACT": {
              "P": "RFI P9346 -Nytt Prehospital Sanitetssystem  til Forsvaret"
            },
            "TYPE_CONTRACT_PLACE_DELIVERY_DEFENCE": {
              "SITE_OR_LOCATION": {
                "LABEL": "N-2055 Nordkisa",
                "NUTS": {
                  "CODE": "NO"
                }
              },
              "TYPE_CONTRACT_PI_DEFENCE": {
                "TYPE_CONTRACT": {
                  "VALUE": "SUPPLIES"
                }
              }
            }
          },
          "OTH_INFO_PRIOR_INFORMATION": {
            "ADDITIONAL_INFORMATION": {
              "P": "Vi ønsker svar både fra Systemleverandører og Underleverandører på denne RFI."
            },
            "INFORMATION_REGULATORY_FRAMEWORK": {
              "TAX_LEGISLATION": {
                "TAX_LEGISLATION_VALUE": "www.lovdata.no"
              }
            },
            "NOTICE_DISPATCH_DATE": {
              "DAY": "28",
              "MONTH": '11',
              "YEAR": "2014"
            },
            "RELATES_TO_EU_PROJECT_NO": null
          }
        },
        "FORM": "16",
        "LG": "NB",
        "VERSION": "R2.0.8.S02"
      }
    },
    "VERSION": "V2.0.0",
    "http://www.w3.org/2001/XMLSchema-instance:noNamespaceSchemaLocation": "DOFFIN_ESENDERS.xd",
    "xmlns": {
      "xsi": "http://www.w3.org/2001/XMLSchema-instance"
    }
  }
}
2

There are 2 best solutions below

1
On

It looks like your code is correct, assuming the JSON blob at the bottom is a representation of item. Just make sure the data you're passing to .update() includes a valid _key and/or _id attribute.

However, it looks like your update statement is not indented properly and/or out of order. I would put the update inline, when you make the change:

FORM16 = db.collection('FORM16')
for item in FORM16:
    for key, value in recursive_items(item):
        if search_key in list(key):
            item[search_key] = int(item[search_key])
            FORM16.update(item)
        else:
            pass

or in the top-level for loop:

FORM16 = db.collection('FORM16')
for item in FORM16:
    for key, value in recursive_items(item):
        if search_key in list(key):
            item[search_key] = int(item[search_key])
        else:
            pass
    FORM16.update(item)
0
On

I did find a functin for convert sting to int and float in JSON files.

def _decode(o):
    # Note the "unicode" part is only for python2
    if isinstance(o, str):
        try:
            return int(o)
        except ValueError:
            try:
                return float(o)
            except ValueError:
                return o

    elif isinstance(o, dict):
        return {k: _decode(v) for k, v in o.items()}
    elif isinstance(o, list):
        return [_decode(v) for v in o]
    else:
        return o


path = 'C:/doffin/test/'
for filename in os.listdir(path):
    if not filename.endswith('.json'):
        continue
    #26  
    #fullname = os.path.join(path, filename)
    fullname = os.path.join(path, filename)
    with open(fullname, 'rb') as f:
        jsonstr = f.read()

    json_sting = json.loads(jsonstr, object_hook=_decode)
    json_str2 = json.dumps(json_sting)
    
    with open(fullname[:-4] + ".json", 'w') as f:
        f.write(json_str2)

and afther that I use arango import form the shell. It works better then the API.