Scraping the Web pages.
Web scraping is the task of collecting web data in an automated fashion. which is also called web data aquisition or extraction. Results of such activities are often used in market research, news monitoring and many other feilds. It is absolutely legal to scrape data from websites for public consumption and use it for your analysis.
Python Knowledge Base: Make coding great again.
- Updated:
2024-12-20 by Andrey BRATUS, Senior Data Analyst.
Text scraping with Selenium:
Page login and click with Selenium:
A web scraping tool usually makes ordinary HTTP requests to a target website and extracts the data from a page. In following examples we will use a power of Python and its Selenium library.
from selenium import webdriver
def get_driver():
# Setting optimal options
options = webdriver.ChromeOptions()
options.add_argument("disable-infobars")
options.add_argument("start-maximized")
options.add_argument("disable-dev-shm-usage")
options.add_argument("no-sandbox")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_argument("disable-blink-features=AutomationControlled")
driver = webdriver.Chrome(options=options)
driver.get("https://python-code.pro/")
return driver
def main():
driver = get_driver()
element = driver.find_element(by="xpath", value="/html/body/section[1]/article/div/div/h2")
return element.text
print(main())
OUT: Weird Jokes Search
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
def get_driver():
# Setting optimal options
options = webdriver.ChromeOptions()
options.add_argument("disable-infobars")
options.add_argument("start-maximized")
options.add_argument("disable-dev-shm-usage")
options.add_argument("no-sandbox")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_argument("disable-blink-features=AutomationControlled")
driver = webdriver.Chrome(options=options)
driver.get("https://python-code.pro/accounts/login")
return driver
def main():
driver = get_driver()
driver.find_element(by="xpath", value="/html/body/section/div/div/div/div/div[2]/form/div[1]/input").send_keys("dick")
time.sleep(2)
driver.find_element(by="xpath", value="/html/body/section/div/div/div/div/div[2]/form/div[2]/input").send_keys("1234567" + Keys.RETURN)
time.sleep(5)
driver.find_element(by="xpath", value="/html/body/section[2]/div/nav/ol/li[1]/a").click()
print(driver.current_url)
print(main())
OUT: https://python-code.pro/