Source code for prepars.regexer

import csv
import re
from pathlib import Path

"""
    This class used to manage rules and regex 
"""

HALF_SPACE = "‌"

ROOT = Path(__file__).parents[0]


[docs]class Regexer: def __init__(self) -> None: file = open(ROOT / "PVC/Data/TXT/suffix.csv", encoding="utf-8") self.suffix = csv.reader(file) file = open(ROOT / "PVC/Data/TXT/prefix.txt", encoding="utf-8") self.prefix = csv.reader(file)
[docs] def compilePatterns(self, patterns): """ This method take an array of tuples (pattern, replacement) and compile them Args: patterns: array of tuples (pattern, replacement) Returns: an array of compiled regex patterns """ return [(re.compile(pattern), repl) for pattern, repl in patterns]
[docs] def sffixPatternGenerator(self): """ This method fetchs all suffix pattern from rule file and generate regex patterns Args: self: python class Returns: an array of regex patterns[(pattern, replacement)] """ patterns = [] for item in self.suffix: # specify which space should be used. h: half space, a: affix space = "" if item[1] == "a" else HALF_SPACE # check if rule has exception if item[2] != "": pattern = r"(?<=(" + item[2] + "))\s+(?=(" + item[0] + "))" replacement = "‌" if item[1] == "a" else "" patterns.append(tuple([re.compile(pattern), replacement])) pattern = r"( )" + "(" + item[0] + ")" + r"( )" replacement = space + r"\2\3" patterns.append(tuple([re.compile(pattern), replacement])) return patterns
[docs] def prefixPatternGenerator(self): """ This method fetchs all affix pattern from rule file and generate regex patterns Args: self: python class Returns: an array of regex patterns[(pattern, replacement)] """ patterns = [] for item in self.prefix: # specify which space should be used. h: half space, a: affix space = "" if item[1] == "a" else HALF_SPACE # check if rule has exception if item[2] != "": pattern = "(?<=(" + item[0] + "))\s+(?=(" + item[2] + "))" replacement = HALF_SPACE if item[1] == "a" else "" patterns.append(tuple([re.compile(pattern), replacement])) pattern = r"( )" + "(" + item[0] + ")" + r"( )" replacement = r"\1\2" + space patterns.append(tuple([re.compile(pattern), replacement])) return patterns