import datetime
import requests
import aspose.words as aw
def compare_html_urls(url1, url2):
# Fetch the HTML content from the URLs
response1 = requests.get(url1)
response2 = requests.get(url2)
# Ensure successful responses
if response1.status_code == 200 and response2.status_code == 200:
html1 = response1.text
html2 = response2.text
# Load HTML content into Aspose.Words documents
doc1 = aw.Document()
doc1.from_string(html1, aw.LoadFormat.HTML)
doc2 = aw.Document()
doc2.from_string(html2, aw.LoadFormat.HTML)
# Accept all revisions before comparison
doc1.accept_all_revisions()
doc2.accept_all_revisions()
# Perform the comparison and save the output
doc1.compare(doc2, "Author Name", datetime.datetime.now())
doc1.save("Output.html")
else:
print('Failed to fetch HTML from one or both URLs.')
# Usage example
compare_html_urls(
'https://www.sec.gov/Archives/edgar/data/1067983/000156459022007322/brka-10k_20211231.htm',
'https://www.sec.gov/Archives/edgar/data/1067983/000156459021009611/brka-10k_20201231.htm'
)