def handle_starttag(self, tag, attrs):
if tag == 'p':
self.__text.append('nn')
elif tag == 'br':
self.__text.append('n')
def handle_startendtag(self, tag, attrs):
if tag == 'br':
self.__text.append('nn')
def text(self):
return ''.join(self.__text).strip()
def dehtml(text):
try:
parser = _DeHTMLParser()
parser.feed(text)
parser.close()
return parser.text()
except:
print_exc(file=stderr)
return text
def main():
text = r'''''
<html>
<body>
<b>Project:</b> DeHTML<br>
<b>Description</b>:<br>
This small script is intended to allow conversion from HTML markup to
plain text.
</body>
</html>
'''
print(dehtml(text))
if __name__ == '__main__':
main()
运行结果:
>>> ================================ RESTART ================================
>>>
Project: DeHTML
Description :
This small script is intended to allow conversion from HTML markup to plain text.
希望本文所述对大家的Python程序设计有所帮助。










