This module gets the links from web pages using BeautifulSoup.
import requests
import argparse
from bs4 import BeautifulSoup
class WebScraper:
def __init__(self, page_url: str, intended_tag: str):
self.page_url = page_url
self.intended_tag = intended_tag
def get_links(self):
response = requests.get(self.page_url)
soup = BeautifulSoup(response.text, 'html.parser')
links = [page_link.get('href') for page_link in soup.find_all(self.intended_tag)]
return links
def main():
parser = argparse.ArgumentParser(description='Get all the links from the page')
parser.add_argument('--page-url', '-p', type=str, required=True, help='Enter URL of the page')
parser.add_argument('--tag', '-t', type=str, required=True, help='Enter the tag to look for')
args = parser.parse_args()
scraper = WebScraper(args.page_url, args.tag)
links_info = scraper.get_links()
for link in links_info:
print(link)
if __name__ == '__main__':
main()
When the code is used with argparse(code above). When I try to pass parameters with argparse I'm getting "ModuleNotFoundError: No module named 'requests'".
import requests
import argparse
from bs4 import BeautifulSoup
class WebScraper:
def __init__(self, page_url: str, intended_tag: str):
self.page_url = page_url
self.intended_tag = intended_tag
def get_links(self):
response = requests.get(self.page_url)
soup = BeautifulSoup(response.text, 'html.parser')
links = [page_link.get('href') for page_link in soup.find_all(self.intended_tag)]
return links
scraper = WebScraper('page_url', 'a')
page_links = scraper.get_links()
for link in page_links:
print(links)
When I'm not using argparse all works fine.