diff --git a/sotawhat/sotawhat.py b/sotawhat/sotawhat.py index 8c9d6f1..5206f32 100644 --- a/sotawhat/sotawhat.py +++ b/sotawhat/sotawhat.py @@ -7,16 +7,19 @@ import nltk from nltk.tokenize import word_tokenize -from six.moves.html_parser import HTMLParser from spellchecker import SpellChecker +try: + from html import unescape # Python >= 3.9 +except ImportError: + from html.parser import HTMLParser + unescape = HTMLParser().unescape + try: nltk.data.find('tokenizers/punkt') except LookupError: nltk.download('punkt') -h = HTMLParser() - AUTHOR_TAG = '' ABSTRACT_TAG = '