Source code for prepars.spacing

import json
import re
from pathlib import Path

from .normalizer import Normalizer
from .regexer import Regexer
from .verb import verbProcessing

"""
    This class is responsible to impose compiled rules on text(suffix and prefix)
"""


ROOT = Path(__file__).parents[0]


[docs]class Spacing: def __init__(self) -> None: self.regexer = Regexer()
[docs] def suffixFixer(self, text): """ This method applies suffix rules on text Args: text: a pure text Returns: processed text """ patterns = self.regexer.sffixPatternGenerator() for pat, rep in patterns: text = pat.sub(rep, text) return text
[docs] def prefixFixer(self, text): """ This method applies prefix rules on text Args: text: a pure text Returns: processed text """ patterns = self.regexer.prefixPatternGenerator() for pat, rep in patterns: text = pat.sub(rep, text) return text
[docs] def unregularWords(self, text): """ This method applies unregular words rules on text Args: text: a pure text Returns: processed text """ file = open(ROOT / "PVC/Data/TXT/replacement.json", encoding="utf-8") rep = json.load(file) rep = dict((k, v) for k, v in rep.items()) pattern = re.compile("|".join(rep.keys())) text = pattern.sub(lambda m: rep[re.escape(m.group(0))], text) return text
[docs] def fix(self, text): """ This method used to fix text(call all spacing methods) Args: text: a pure text Returns: processed text """ # normalizing the text norm = Normalizer() text = norm.normalize(text) # fix unregular Words text = self.unregularWords(text) # fix the Verbs verb = verbProcessing() text = verb.fixVerbs(text) # fix the suffixes text = self.suffixFixer(text) # fix the prefixes text = self.prefixFixer(text) return norm.normalize(text)