Python API for USDA nutritional facts

107 Views Asked by At

I am using an API that retrieves all the nutritional facts about fruits from the USDA, and posts them on an Excel sheet, but I only get 50 results. I want to export every fruit from their web site. How do I get the whole list?

import requests
import json
import pandas as pd

def call_API(foodName, apiKey):
    url = f'https://api.nal.usda.gov/fdc/v1/foods/search?api_key={apiKey}&query={foodName}'
    r = requests.get(url)
    return r.json()
    
api_response = call_API('raw', '#API KEY')

# search
table_data = []
for food_item in api_response['foods']:
    row = {
        "Description": food_item["description"]
        }

    for nutrient in food_item["foodNutrients"]:
        nutrient_name = nutrient["nutrientName"]
        row[nutrient_name] = nutrient["value"]
    table_data.append(row)

# creating a DataFrame
df = pd.DataFrame(table_data)
print(df.iloc[-1])

df.to_excel('output.xlsx', index=False)
3

There are 3 best solutions below

0
Jason Baker On

Use the pageNumber as shown below:

https://api.nal.usda.gov/fdc/v1/foods/search?api_key=your_key&query=raw&pageNumber=2

If you want to pull page count before looping try:

pages = pd.json_normalize(data=api_response).totalPages.squeeze()
0
0x00 On

You should always check the API documentation you are using. If you open in your browser or print the api_response from your request, you'll see there are keys in the response that say currentPage and totalPages, which means results are paged and you'll have to iterate over those pages to get all the foods from your search criteria.

In the documentation, one of the parameters is pageSize which default is 50 but the maximum can be set to 200 and to set the page number you can use the parameter pageNumber.

So you'll have to do something like this.

def call_API(foodName, apiKey):
    foods = []
    url = f"https://api.nal.usda.gov/fdc/v1/foods/search?api_key={apiKey}&query={foodName}&PageSize=200"
    with requests.Session() as req:
        try:
            r = req.get(url, timeout=10)
            r.raise_for_status()
        except requests.exceptions.HTTPError as err:
            print(f"Error: {err}")
            return None
        api_response = r.json()
        foods.extend(api_response["foods"])
        for page in range(2, api_response["totalPages"] + 1):
            try:
                r = req.get(url + f"&pageNumber={page}", timeout=10)
                r.raise_for_status()
            except requests.exceptions.HTTPError as err:
                print(f"Error: {err}")
                break
            api_response = r.json()
            foods.extend(api_response["foods"])
    return foods

food_items = call_API("raw", "API_KEY")

for food_item in food_items:
    # Do stuff
1
Maria D On

using panda's module you could try the following changes:

import requests
import json
import pandas as pd

def call_API(foodName, apiKey):
    url = f'https://api.nal.usda.gov/fdc/v1/foods/search?api_key={apiKey}&query={foodName}'
    try:
        r = requests.get(url)
        r.raise_for_status()  # Raise an HTTPError for bad requests
        return r.json()
    except requests.exceptions.RequestException as e:
        print(f"Error during API call: {e}")
        return None

def main():
    apiKey = 'coIkTLZKgHh6Ta2m8O7yAbZhx6c5JdICjtHdAW7h'
    foodName = 'raw'  # Replace with the desired food item

    api_response = call_API(foodName, apiKey)

    if api_response:
        table_data = []

        for food_item in api_response['foods']:
            row = {"Description": food_item["description"]}
    
            for nutrient in food_item["foodNutrients"]:
                nutrient_name = nutrient["nutrientName"]
                row[nutrient_name] = nutrient["value"]
            
            table_data.append(row)

        df = pd.DataFrame(table_data)
        print(df.iloc[-1])

        # Save DataFrame to Excel file
        df.to_excel('output.xlsx', index=False)
        print("Data saved to 'output.xlsx'.")

if __name__ == "__main__":
    main()