from lxml import etree import sys if len(sys.argv) < 2: print "Please provide the HTML file name" sys.exit(1) f = open(sys.argv[1], "r") html = f.read() f.close() parser = etree.XMLParser(ns_clean=True) try: tree = etree.XML(html, parser) except Exception as inst: print inst