xxxxxxxxxx
#pip install beautifulsoup4
#python :
import os
import requests
from bs4 import BeautifulSoup
url = "https://www.google.com/"
reponse = requests.get(url)
if reponse.ok:
soup = BeautifulSoup(reponse.text, "lxml")
title = str(soup.find("title"))
title = title.replace("<title>", "")
title = title.replace("</title>", "")
print("The title is : " + str(title))
os.system("pause")
#python (code name).py
xxxxxxxxxx
Process of extracting data from unstructured sources.
Also known as screen scraping, web harvesting, and web data extraction.
Allows to download specific data from web pages based on defined parameters.
Common tools used : BeautifulSoup, Scrapy, Selenium
xxxxxxxxxx
//Making User agent just like google which helps the browser to say which site visit
$options = array('http'=>array('method'=>"GET",'headers'=>"User-Agent: Nir003"));
$context = stream_context_create($options);
// allows you to parse html pages
$doc = new DOMDocument();
// load full page in $doc variable