from __future__ import annotations
from collections.abc import Iterable
[docs]
class StopwordsModel:
"""
A class representing the set of stopwords.
The class acts as a wrapper around a set of stopwords, providing basic
iterable-like functionality. Initially it represents the set of
basic/general stopwords imported from a text file, but extra words may
be added, removed or replaced.
"""
@property
def default_words(self) -> set[str]:
return self._default_words
@default_words.setter
def default_words(self, default_words: set[str]) -> None:
self._default_words = default_words
@property
def extra_words(self) -> set[str]:
return self._extra_words
@property
def extra_words_in_order(self) -> list[str]:
return self._extra_words_in_order
[docs]
def __init__(self, derive_from: StopwordsModel = None) -> None:
"""Initializes the stopwords model."""
if derive_from is None:
self._default_words = set()
self._extra_words = set()
self._extra_words_in_order = []
else:
self._default_words = derive_from.default_words.copy()
self._extra_words = derive_from.extra_words.copy()
self._extra_words_in_order = (
derive_from.extra_words_in_order.copy())
def __len__(self) -> int:
"""Gets the number of stopwords."""
return len(self._default_words) + len(self._extra_words)
def __contains__(self, word: str) -> bool:
"""Checks if the set of stopwords contains a word."""
return word in self._default_words or word in self._extra_words
def __iter__(self) -> Iterable[str]:
"""Returns an iterable of stopwords."""
return iter(self._default_words | self._extra_words)
[docs]
def add(self, *args: str | Iterable[str]) -> None:
"""
Adds one or more extra stopwords.
:param args: The word(s) to add
:return: None
"""
for arg in args:
# The argument is a string.
if isinstance(arg, str):
self._extra_words.add(arg)
self._extra_words_in_order.append(arg)
# The argument is an iterable.
elif isinstance(arg, Iterable):
self._extra_words.update(arg)
self._extra_words_in_order.extend(arg)
# The argument is of an unexpected type.
else:
raise TypeError(
"Arguments must be strings or iterables of strings.")
[docs]
def remove(self, *args: str | Iterable[str]) -> None:
"""
Remove one or more extra stopwords.
:param args: The word(s) to remove
:return: None
"""
for arg in args:
# The argument is a string.
if isinstance(arg, str):
self._extra_words.discard(arg)
self._extra_words_in_order.remove(arg)
self._extra_words_in_order = [
i for i in self._extra_words_in_order if i != arg]
# The argument is an iterable.
elif isinstance(arg, Iterable):
self._extra_words.difference_update(arg)
self._extra_words_in_order = [
i for i in self._extra_words_in_order if i not in arg]
# The argument is of an unexpected type.
else:
raise TypeError(
"Arguments must be strings or iterables of strings.")
[docs]
def replace(self, word_set: set[str], words_in_order: list[str]) -> None:
"""
Replace the extra stopwords with a new set of stopwords.
:param word_set: The new words to replace the old ones with
:param words_in_order: The new words, but in the order that the user
supplied them. This is necessary to make sure the order stays the same
when switching config.
:return: None
"""
self._extra_words.clear()
self._extra_words = word_set
self._extra_words_in_order = words_in_order
[docs]
def to_dict(self) -> dict:
"""
Convert the stopwords object to a dictionary.
:return: Dictionary representation of the stopwords
"""
return {
"extra_stopwords": self._extra_words_in_order
}
[docs]
@classmethod
def from_dict(cls, stopwords_dict: dict) -> StopwordsModel:
"""
Create a StopwordsModel instance from a dictionary representation.
:param stopwords_dict: Dictionary representation of the stopwords
:return: StopwordsModel instance
"""
stopwords_model = cls()
stopwords_model._extra_words_in_order = list(stopwords_dict[
"extra_stopwords"])
for word in stopwords_model._extra_words_in_order:
if not isinstance(word, str):
raise ValueError(
"Extra stopwords should be strings, but are not")
stopwords_model._extra_words = set(
stopwords_model.extra_words_in_order)
return stopwords_model
"""
This program has been developed by students from the bachelor Computer Science
at Utrecht University within the Software Project course.
© Copyright Utrecht University
(Department of Information and Computing Sciences)
"""