I'm trying to store pubdate tag of xml into database using python. I'm using beautifulsoup for web crawler

43 Views Asked by At
<pubDate> <![CDATA[ Wed, 17 Aug 2022 14:32:47 +0530 ]]></pubDate>

Above is the xml tag now how can I store this date tag into dbms?

    from bs4 import BeautifulSoup import requests import pymysql

headers = {
            'User-Agent': ' agent'
        }

class ReadRss:

    def __init__(self, rss_url, headers):

        self.url = rss_url
        self.headers = headers

        try:
            self.conn=pymysql.connect(host="localhost", user="root", passwd= "", db="my_python")
            self.r = requests.get(rss_url, headers=self.headers)
            self.status_code = self.r.status_code
        except Exception as e:
            print('Error fetching the URL: ', rss_url)
            print(e)
        try:
            self.soup = BeautifulSoup(self.r.text, 'lxml')
        except Exception as e:
            print('Could not parse the xml: ', self.url)
            print(e)
        self.articles = self.soup.findAll('item')
        for a in self.articles:
            self.insertData(a.find('title').text,a.find('pubdate').text,a.link.next_sibling.replace('\n','').replace('\t',''),a.find('description').text,'thehindu')
        self.articles_dicts if 'pubdate' in d]
        self.conn.close()

    def insertData(self,title,date,url,description, source):
        myCursor= self.conn.cursor()
        query = "INSERT INTO `delhi`(`title`, `url`, `description`, `source`) VALUES(%s,%s,%s,%s)"
        args=(title,url,description,source)
        myCursor.execute(query,args)
        self.conn.commit()   

if __name__ == '__main__':

    feed = ReadRss('https://www.thehindu.com/news/cities/Delhi/feeder/default.rss', headers)

In this code I've imported beautifulsoup to exract the news from RSS feed and then I've to store it into database but everytime it gives the error for date [CDATA[ Wed, 17 Aug 2022 14:32:47 +0530 ]

How do I store it in database?

0

There are 0 best solutions below