Web Scraping exercise for anyone (especially beginners) who wants get more hand on experience on web scraping using Python and Beautiful Soup (BeautifulSoup). In this video, I will walk through each step on how to web scrape Udemy course information.

🔔 Subscribe: https://www.youtube.com/channel/UCvVZ19DRSLIC2-RUOeWx8ug

Source Code:


import requests
from bs4 import BeautifulSoup

def extract_text(soup_obj, tag, attribute_name, attribute_value):
    response = soup_obj.find(tag, {attribute_name: attribute_value}).text.strip() if soup_obj.find(tag, {attribute_name: attribute_value}) else ''
    return response

def main(url):
    try:
        print(url)
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        course_title = extract_text(soup, 'h1', 'data-purpose', 'lead-title')

        if course_title:
            headline = extract_text(soup, 'div', 'data-purpose', 'lead-headline')
            rating = extract_text(soup, 'span', 'data-purpose', 'rating-number')
            last_updated = extract_text(soup, 'div', 'data-purpose', 'last-update-date')

            course_objectives = soup.find('ul', {'class': 'unstyled-list udlite-block-list what-you-will-learn--objectives-list--2cWZN'})
            object_items = course_objectives.find_all('li')

            summary_bullets = {}
            for indx, val in enumerate(object_items):
                summary_bullets[indx] = val.text

            course_description = extract_text(soup, 'div', 'data-purpose', 'safely-set-inner-html:description:description')

            course_content = soup.find('div', {'data-purpose': 'course-curriculum'})
            lesson_names = course_content.find_all('li')

            lessons = []
            for lesson_name in lesson_names:
                lessons.append('- ' + lesson_name.span.text.strip())

            print('Course Title: {0}'.format(course_title))
            print('Headline: {0}'.format(headline))
            print('Rating: {0}'.format(rating))
            print(last_updated)
            print('Course Description:')
            print(course_description)
            print('')
            print('Course Objectives')
            print('*' * 50)
            print(*summary_bullets.items(), sep='\n')
            print('')
            print('Lessons:')
            print('-' * 50)
            print(*lessons, sep='\n')
    except Exception as e:
        print('Information not available')

url = input('Enter course URL: ')
if url:
    main(url)

#python

Web Scrapping Exercise For Beginners (Python + BeautifulSoup)
2.45 GEEK