# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Berkay Bozkurt <resitberkaybozkurt@gmail.com>
import difflib
import time
from autocorrect import Speller
from deep_translator import GoogleTranslator
from pylanguagetool import api as ltp
from spellchecker import SpellChecker
from textblob import TextBlob
from logger import get_logger
log = get_logger()
[docs]
class TextAnalyzer:
"""
A class that provides text analysis functionalities such as spell checking, correction, and error detection.
"""
TARGET_LANG = "en"
_instance = None
def __new__(cls, *args, **kwargs):
if not cls._instance:
cls._instance = super().__new__(cls, *args, **kwargs)
return cls._instance
def __init__(self):
self.spell_checker_insts = {}
self.speller_insts = {}
self.plt_insts = {}
def _get_spell_checker(self, lang_setting):
"""
Get an instance of SpellChecker for the specified language.
Args:
lang_setting (str): The language setting.
Returns:
SpellChecker: An instance of SpellChecker for the specified language, or None if the language is not supported.
"""
if lang_setting not in self.spell_checker_insts:
try:
self.spell_checker_insts[lang_setting] = SpellChecker(
language=lang_setting
)
except Exception:
log.warn(
f"Language '{lang_setting}' is not supported or does not exist."
)
return None
return self.spell_checker_insts[lang_setting]
def _get_speller(self, lang_setting):
"""
Get an instance of Speller for the specified language.
Args:
lang_setting (str): The language setting.
Returns:
Speller: An instance of Speller for the specified language, or None if the language is not supported.
"""
if lang_setting not in self.speller_insts:
try:
self.speller_insts[lang_setting] = Speller(lang=lang_setting)
except Exception:
log.warn(
f"Language '{lang_setting}' is not supported or does not exist."
)
return None
return self.speller_insts[lang_setting]
def _find_differences(self, text1, text2):
"""
Compare two texts and return a list of differences.
Args:
text1 (str): The first text.
text2 (str): The second text.
Returns:
list: A list of differences.
"""
diff = difflib.ndiff(text1.splitlines(), text2.splitlines())
return list(diff)
[docs]
def correct_text(self, text, language="en"):
"""
Correct the spelling of the given text using the specified language.
Args:
text (str): The text to be corrected.
language (str, optional): The language setting. Defaults to "en".
Returns:
str: The corrected text.
"""
speller = self._get_speller(language)
spell_checker = self._get_spell_checker(language)
if speller is None and spell_checker is None:
log.warn(
f"Could not find a spell checker or speller for language '{language}'."
)
return text
speller_corrected_text = speller(text) if speller is not None else text
split_word = speller_corrected_text.split()
spell_checker_corrected_text = (
" ".join(
spell_checker.correction(word)
if spell_checker.correction(word) is not None
else word
for word in speller_corrected_text.split()
)
if spell_checker is not None
else text
)
return spell_checker_corrected_text
[docs]
def find_number_of_spelling_errors(self, text, language="en"):
"""
Find the number of spelling errors in the given text using the specified language.
Args:
text (str): The text to be checked.
language (str, optional): The language setting. Defaults to "en".
Returns:
int: The number of spelling errors.
"""
return len(self.find_spelling_errors(text, language))
[docs]
def find_spelling_errors(self, text, language="en"):
"""
Find the spelling errors in the given text using the specified language.
Args:
text (str): The text to be checked.
language (str, optional): The language setting. Defaults to "en".
Returns:
list: A list of spelling errors.
"""
corrected_text = self.correct_text(text, language)
differences = self._find_differences(text, corrected_text)
return differences
[docs]
def find_number_of_grammatical_errors(self, inp_text, language="en"):
"""
Finds the number of grammatical errors in the input text.
Args:
inp_text (str): The input text to analyze for grammatical errors.
language (str, optional): The language of the input text. Defaults to "en".
max_retries (int, optional): The maximum number of retry attempts. Defaults to 3.
Returns:
int: The number of grammatical errors found in the input text, or None if an error occurs.
"""
max_retries = 5 # Maximum number of retries
retry_delay = 5 # Initial delay in seconds (5 seconds)
if inp_text is None or len(inp_text) == 0:
return None
for attempt in range(max_retries):
try:
errors = ltp.check(
inp_text, api_url="https://languagetool.org/api/v2/", lang=language
)
return len(errors)
except Exception as e:
log.error(f"Error while finding grammatical errors: {str(e)}")
if attempt < max_retries - 1: # No need to sleep on the last attempt
log.warning(
f"Rate limit exceeded, retrying in {retry_delay} seconds..."
)
time.sleep(retry_delay)
retry_delay *= 2
else:
return None
[docs]
def translate(self, inp_text, source_lang="auto", target_lang=TARGET_LANG):
"""
Translates the input text to the target language.
Args:
inp_text (str): The input text to translate.
source_lang (str, optional): The source language of the input text. Defaults to "auto".
target_lang (str, optional): The target language of the input text. Defaults to TARGET_LANG.
Returns:
str: The translated text, or None if an error occurs.
"""
if inp_text is None or len(inp_text) == 0:
return None
if source_lang == self.TARGET_LANG:
return inp_text
try:
return GoogleTranslator(source=source_lang, target=target_lang).translate(
inp_text
)
except Exception as e:
log.error(f"Error while translating: {str(e)}")
return None
[docs]
def calculate_sentiment_analysis_score(self, inp_text, lang="en"):
"""
Calculates the sentiment analysis of the input text.
Args:
inp_text (str): The input text to analyze for sentiment analysis.
lang (str, optional): The language of the input text. Defaults to "english".
Returns:
float: The sentiment analysis of the input text, or None if an error occurs.
"""
if inp_text is None or len(inp_text) == 0:
return None
try:
translated_text = self.translate(inp_text, source_lang=lang)
if translated_text is None:
return None
blob = TextBlob(inp_text)
return blob.sentiment.polarity
except Exception as e:
log.error(f"Error while calculating sentiment analysis: {str(e)}")
return None