How to get good source code in Odoo to be able to use with selenium?

619 Views Asked by At

I'm trying to scrap my Odoo's inventory pages to be able to get all the pictures already in the DB, more than 9000, so i wrote a python script using selenium, it works like a charm but not on my Odoo's pages, the fact is that on Odoo's inventory pages i think the source code is genreated by AJAX or Js or something else. So when i try to scrap it i won't have the good source, so i cannot get the elements. I really don't want to have to download the 9000+ images and names by hand, if someone know how to capture the good source code could you please tell me ?

Python code :

from selenium import webdriver

from selenium.webdriver.chrome.service import Service

from webdriver_manager.chrome import ChromeDriverManager

from selenium.webdriver.common.by import By

from selenium.webdriver.common.keys import Keys

from bs4 import BeautifulSoup as bs

import time

import urllib.request

import requests

import ssl


#Vars

base_url = "https://yourodoourlhere.com"

img_list=[]

symbols = [":","/","\"","?","<",">","\\","*","|"]

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

gcontext = ssl.SSLContext()


def Login():

    driver.get(base_url+"/web/login")

    login_form = driver.find_element(By.XPATH, "//*[@id='login']")

    #print(login_form) #Test

    login_form.send_keys('[email protected]')

    password_form = driver.find_element(By.XPATH, "//*[@id='password']")

    password_form.send_keys('yourodoopasswordhere')

    log_button = driver.find_element(By.XPATH, "/html/body/div[1]/main/div/form/div[3]/button")

    log_button.click()

    GetInventory()

    #print(driver.current_url) get current url

    #time.sleep(5) #specify the seconds TEST

    #driver.close()


def GetInventory():

    url=base_url+"/web#action=375&model=product.template&view_type=kanban&cids=1&menu_id=198"

    driver.get(url)

    CaptureImages()

   

def CaptureImages():


    url = driver.current_url

    driver.get(url)

    r=driver.execute_script("return document.documentElement.innerHTML;")

    print(r)

   

    #get HTML src

    #r = requests.get(url,stream=True).text

   

    #BeautifulSoup

    soup = bs(r, 'html.parser')


    #find all images

    all_imgs = soup.find_all('img')


    #get images list

    for image in all_imgs:

        img_list.append(image['src'])

    print(img_list)


    #Loop to format images names and DL

    for img_url in img_list:

        img_name=img_url

        for symbol in symbols:

            if symbol in img_name:

                img_name=img_name.replace(symbol,"")

        #print(img_name)

        ssl._create_default_https_context = ssl._create_unverified_context

        urllib.request.urlretrieve(base_url+img_url, img_name)


    time.sleep(5) #specify the seconds TEST

    driver.close()





#CaptureImages()

Login()

from what i've found in chrome inspector, a page at https://myodoourl.com/web/dataset/search_read will have in "preview" section the following datas :

{jsonrpc: "2.0", id: 815011021, result: {length: 8120,…}}
id: 815011021
jsonrpc: "2.0"
result: {length: 8120,…}
length: 8120
records: [{id: 13968, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…},…]
0: {id: 13968, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
1: {id: 13965, product_variant_count: 2, currency_id: [1, "EUR"], activity_state: false,…}
2: {id: 13961, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
3: {id: 13442, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
4: {id: 13966, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
5: {id: 10820, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
6: {id: 10635, product_variant_count: 16, currency_id: [1, "EUR"], activity_state: false,…}
7: {id: 4348, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
8: {id: 12785, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
9: {id: 12797, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
10: {id: 12782, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
11: {id: 12781, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
12: {id: 10721, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
13: {id: 588, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false, name: "001 Misc",…}
14: {id: 13551, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
15: {id: 589, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false, name: "002 Misc",…}
16: {id: 590, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false, name: "003 Misc",…}
17: {id: 591, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false, name: "004 Misc",…}
18: {id: 950, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false, name: "005 Misc",…}
19: {id: 951, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false, name: "006 Misc",…}
20: {id: 10814, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
21: {id: 952, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false, name: "007 Misc",…}
22: {id: 953, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false, name: "008 Misc",…}
23: {id: 954, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false, name: "009 Misc",…}
24: {id: 11755, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
25: {id: 11756, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
26: {id: 11757, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
27: {id: 11744, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
28: {id: 4470, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
29: {id: 13623, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
30: {id: 5004, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
31: {id: 5134, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
32: {id: 5005, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
33: {id: 10158, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
34: {id: 10337, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
35: {id: 4712, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
36: {id: 11702, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
37: {id: 12504, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
38: {id: 10212, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
39: {id: 1134, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
40: {id: 1133, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
41: {id: 1132, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
42: {id: 1131, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
43: {id: 1130, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
44: {id: 1129, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
45: {id: 1128, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
46: {id: 1126, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
47: {id: 13331, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
48: {id: 1127, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
49: {id: 1124, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
50: {id: 1125, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
51: {id: 1123, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
52: {id: 979, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
53: {id: 978, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
54: {id: 1120, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
55: {id: 216, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
56: {id: 4045, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
57: {id: 1119, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
58: {id: 4044, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
59: {id: 10947, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
60: {id: 2369, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
61: {id: 2404, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
62: {id: 2405, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
63: {id: 1990, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
64: {id: 1559, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
65: {id: 2406, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
66: {id: 1558, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
67: {id: 1171, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
68: {id: 2930, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
69: {id: 5082, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
70: {id: 259, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
71: {id: 2551, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
72: {id: 5208, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
73: {id: 1170, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
74: {id: 11217, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
75: {id: 1138, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
76: {id: 969, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
77: {id: 1140, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
78: {id: 1999, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
79: {id: 403, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}

and in each record there is :

0: {id: 13968, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
activity_state: false
currency_id: [1, "EUR"]
default_code: "12121212121212"
id: 13968
lst_price: 10
name: "\"Monkey's fist\" key ring Red"
product_variant_count: 1
qty_available: 0
type: "product"
uom_id: [1, "Units"]

Is there a way i could get the "id" and "name" fields from python ???

1

There are 1 best solutions below

0
Garbez François On

Okay so i changed all my way of working on this problem and i came up to this solution that i share if someone needs :

requirements : Have a folder called "images" in same folder as the python script, have "odoorpc" module installed, i made it on python 3.9.4 so i don't know about 3.10...

import odoorpc
import ssl
import base64
from os import getcwd

#Vars
url = 'foobar.my.odoo.com' #without "https://"
db = 'mydbname'
username = '[email protected]'
password = 'myodoopassword'
symbols = [":","/","\"","?","<",">","\\","*","|","\t","\n","\r"]
img_dir=getcwd()+"//images//"

#Odoo connect
ssl._create_default_https_context = ssl._create_unverified_context #For SSL connect
odoo = odoorpc.ODOO(url, protocol='jsonrpc+ssl', port=443)

def Login(): 

    print(odoo.version) #Test only
    
    # Login
    odoo.login(db, username, password)
    
    # Current user only for test
    user = odoo.env.user
    print(user.name)            # name of the user connected
    print(user.company_id.name) # the name of its company

    GetImages()

def GetImages():

    Prod = odoo.env['product.template']
    product_ids = Prod.search([])
    print(len(product_ids))
    for id_prod in product_ids:
        prod = Prod.browse(id_prod)
        img_b64 = prod.image_1920
        nom_prod = prod.name
        if img_b64 != False:
            imgdata = base64.b64decode(img_b64)
            for symbol in symbols:
                if symbol in nom_prod:
                    nom_prod=nom_prod.replace(symbol,"")
            filename = img_dir+nom_prod+".jpg"
            with open(filename, 'wb') as f:
                f.write(imgdata)
            print(nom_prod+" img saved !")
        else:
            print(nom_prod+" has no image !")
Login()

Enjoy ;)