NameError Traceback (most recent call last) ~\AppData\Local\Temp/ipykernel_6708/1010376174.py in 21 print(e.reason) 22 ---> 23 html = html.decode('utf-8') 24 25 def trata_html(input):
NameError: name 'html' is not defined
from urllib.request import Request, urlopen
from urllib.error import URLError, HTTPError
from bs4 import BeautifulSoup
url = 'https://www.alura.com.br'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36'}
req = Request(url, headers = headers)
response = urlopen(req)
print(response.read())
try:
req = Request(url, headers = headers)
response = urlopen(req)
print(response.read())
except HTTPError as e:
print(e.status, e.reason)
except URLError as e:
print(e.reason)
html = html.decode('utf-8')
def trata_html(input):
return " ".join(input.split()).replace('> <', '><')
html = trata_html(html)
html
soup = BeautifulSoup(html, 'html.parser')
soup
print(soup.prettify())