Source code for tommy.controller.graph_controller

from itertools import product

import networkx as nx
import matplotlib.figure
import matplotlib.pyplot

# Import controllers
from tommy.controller.corpus_controller import CorpusController
from tommy.controller.project_settings_controller import \
    ProjectSettingsController
from tommy.controller.topic_modelling_runners.abstract_topic_runner import (
    TopicRunner)
from tommy.controller.topic_modelling_controller import (
    TopicModellingController)

# Import visualizations
from tommy.controller.visualizations.abstract_visualization import (
    AbstractVisualization)
from tommy.controller.visualizations.correlation_matrix_creator import (
    CorrelationMatrixCreator)
from tommy.controller.visualizations.document_topic_network_summary_creator \
    import DocumentTopicNetworkSummaryCreator
from tommy.controller.visualizations.document_word_count_creator import (
    DocumentWordCountCreator)
from tommy.controller.visualizations.documents_over_time_creator import (
    DocumentsOverTimeCreator)
from tommy.controller.visualizations.documents_over_time_per_topic_creator import \
    DocumentsOverTimePerTopicCreator
from tommy.controller.visualizations.sum_topics_in_documents import \
    SumTopicsInDocuments
from tommy.controller.visualizations.top_words_bar_plot_creator import (
    TopWordsBarPlotCreator)
from tommy.controller.visualizations.welcome_screen import WelcomeScreen
from tommy.controller.visualizations.word_cloud_creator import WordCloudCreator
from tommy.controller.visualizations.word_topic_network_creator import (
    WordTopicNetworkCreator)
from tommy.controller.visualizations.k_value_creator import KValueCreator

# Import exporters
from tommy.controller.visualizations.nx_exporter import NxExporter
from tommy.controller.visualizations.nx_exporter_on_data import (
    NxExporterOnData)
from tommy.controller.visualizations.document_topic_nx_exporter import (
    DocumentTopicNxExporter)
from tommy.controller.visualizations.word_topic_nx_exporter import (
    WordTopicNxExporter)

from tommy.controller.visualizations.possible_visualization import (
    PossibleVisualization)
from tommy.controller.visualizations.visualization_input_datatypes import (
    VisInputData, ProcessedCorpus, MetadataCorpus)
from tommy.datatypes.exports import NxExport, MatplotLibExport

from tommy.datatypes.topics import TopicWithScores
from tommy.model.topic_model import TopicModel
from tommy.support.event_handler import EventHandler
from tommy.model.custom_name_model import TopicNameModel
from tommy.support.application_settings import application_settings


[docs] class GraphController: """ The central interface for extracting results from topic modelling results and creating visualizations. Contains an event for when the plots are changed and a publisher for when the topics have changed. """ _corpus_controller: CorpusController = None _topic_name_model: TopicNameModel = None # Visualization Creators VISUALIZATIONS: list[AbstractVisualization] = [ DocumentWordCountCreator(), KValueCreator(), DocumentsOverTimeCreator(), SumTopicsInDocuments(), CorrelationMatrixCreator(), WordTopicNetworkCreator(), DocumentTopicNetworkSummaryCreator(), WordCloudCreator(), TopWordsBarPlotCreator(), DocumentsOverTimePerTopicCreator(), WelcomeScreen() ] _possible_visualizations: list[PossibleVisualization] | None = None _current_topic_selected_id: int | None = None _current_topic_runner: TopicRunner | None = None # EventHandlers _possible_plots_changed_event: EventHandler[list[PossibleVisualization]] _topics_changed_event: EventHandler[None] _refresh_plots_event: EventHandler[None] _refresh_name_event: EventHandler[None] # Exporters NX_EXPORTS: list[NxExporterOnData | NxExporter] = [ DocumentTopicNxExporter(), WordTopicNxExporter()] _possible_nx_exports: list[int] | None = None @property def possible_plots_changed_event(self) -> ( EventHandler[list[PossibleVisualization]]): """Get event that triggers when the list of possible plots changes.""" return self._possible_plots_changed_event @property def topics_changed_event(self) -> EventHandler[None]: """Get the eventhandler that triggers when the topics are changed.""" return self._topics_changed_event @property def refresh_plots_event(self) -> EventHandler[None]: """ Get the event that triggers when the content of any plots changes. """ return self._refresh_plots_event @property def refresh_name_event(self) -> EventHandler[None]: """Get the event that triggers when the config name changes.""" return self._refresh_name_event @property def has_topic_runner(self) -> bool: return self._current_topic_runner is not None
[docs] def __init__(self) -> None: """Initialize the graph-controller and its two publishers""" super().__init__() self._possible_plots_changed_event = EventHandler[ list[PossibleVisualization]]() self._topics_changed_event = EventHandler[None]() self._refresh_plots_event = EventHandler[None]() self._refresh_name_event = EventHandler[None]()
[docs] def set_controller_refs( self, topic_modelling_controller: TopicModellingController, corpus_controller: CorpusController, project_settings_controller: ProjectSettingsController) -> None: """ Set reference to the TM controller corpus controller and add self to model trained event """ self._corpus_controller = corpus_controller topic_modelling_controller.model_trained_event.subscribe( self.on_new_topic_runner) topic_modelling_controller.topic_model_switched_event.subscribe( self._on_config_switch) project_settings_controller.input_folder_path_changed_event.subscribe( self.clear_graphs)
[docs] def set_model_refs(self, topic_name_model: TopicNameModel) -> None: """ Set the reference to the topic name model :param topic_name_model: The topic name model :return: None """ self._topic_name_model = topic_name_model
[docs] def on_model_swap(self): """Notify the frontend that the topic name model has changed""" self._refresh_name_event.publish(None)
[docs] def set_selected_topic(self, topic_index: int | None) -> None: """ Set the currently selected topic to the given index or None if no topic is to be selected. :param topic_index: the index of the topic to select :return: None """ self._current_topic_selected_id = topic_index self._refresh_plots_event.publish(None)
[docs] def get_topic_name(self, topic_index: int) -> str: """ Get the name of the topic with the given index :param topic_index: The index of the topic :return: The name of the topic """ return self._topic_name_model.get_topic_name(topic_index)
[docs] def set_topic_name(self, topic_index: int, name: str) -> None: """ Set the name of the topic with the given index :param topic_index: The index of the topic :param name: The new name of the topic :return: None """ self._topic_name_model.set_topic_name(topic_index, name)
def _clear_topic_names(self) -> None: """ Clear all custom topic names :return: None """ self._topic_name_model.clear_topic_names() self.topics_changed_event.publish(None)
[docs] def clear_graphs(self, _): """Clear all graphs when the input folder path changes""" self._delete_all_cached_plots() self._current_topic_runner = None self._calculate_possible_visualizations() self._topics_changed_event.publish(None) self._possible_plots_changed_event.publish( self._possible_visualizations)
[docs] def get_number_of_topics(self) -> int: """ Get the number of topics in the topic modelling results :return: the number of topics in the topic modelling results :raises RuntimeError: if the topic runner has not finished running yet. """ if not self.has_topic_runner: raise RuntimeError("Amount of topics requested before topic " "runner has finished running") return self._current_topic_runner.get_n_topics()
[docs] def get_model_type(self) -> str: """ Get the model type in the topic modelling results :return: the model type in the topic modelling results :raises RuntimeError: if the topic runner has not finished running yet. """ if not self.has_topic_runner: raise RuntimeError("Model type requested before topic " "runner has finished running") return self._current_topic_runner.get_model()
[docs] def get_topic_with_scores(self, topic_id, n_words) -> TopicWithScores: """ Return a topic object containing top n terms and their corresponding score for the topic identified by the topic_id. :param topic_id: the index of the requested topic :param n_words: number of terms in the resulting topic object :return: topic object containing top n terms and their corresponding scores """ return self._current_topic_runner.get_topic_with_scores( topic_id=topic_id, n_words=n_words)
def _calculate_possible_visualizations(self) -> None: """(re-)calculates and saves the list of possible visualizations""" # check for each visualization if it is possible self._possible_visualizations = [ PossibleVisualization(vis_index, visualization.name, visualization.short_tab_name, visualization.vis_group, (VisInputData.TOPIC_ID in visualization.needed_input_data)) for (vis_index, visualization) in enumerate(self.VISUALIZATIONS) if visualization.is_possible( self._corpus_controller.metadata_available(), self._current_topic_runner) ] # check for each export if it is possible self._possible_nx_exports = [ export_index for (export_index, exporter) in enumerate(self.NX_EXPORTS) if exporter.is_possible(self._current_topic_runner) ] # Calculate the visualizations ahead of time to cache them for a # smoother user experience for vis in self._possible_visualizations: if self.has_topic_runner and vis.needs_topic: for topic_id in range(self.get_number_of_topics()): self.get_visualization(vis.index, override_topic=topic_id) else: self.get_visualization(vis.index) def _get_nx_export(self, vis_index: int) -> nx.Graph: """ Returns the networkx graph corresponding showing the network corresponding to the given index in the list of all exports. :param vis_index: Index of the export to be requested :return: networkx graph of a visualization corresponding to the index :raises IndexError: if the index is negative or bigger than the number of exports supported by this class. """ if vis_index < 0 or vis_index >= len(self.NX_EXPORTS): raise IndexError(f'Negative index of {vis_index} is not accepted ' 'in _get_visualization') if isinstance(self.NX_EXPORTS[vis_index], NxExporterOnData): return self._run_nx_export_on_data(self.NX_EXPORTS[vis_index]) if isinstance(self.NX_EXPORTS[vis_index], NxExporter): return self._run_nx_export(self.NX_EXPORTS[vis_index]) # if not, the index is out of range raise IndexError(f'No exports with index {vis_index} available')
[docs] def get_visualization(self, vis_index: int, override_topic: int | None = None, ignore_cache: bool = False ) -> tuple[matplotlib.figure.Figure, str]: """ Returns the visualization corresponding to the given index in the list of all visualizations. :param vis_index: Index of the visualization to be requested :param override_topic: A topic index used to override the selected topic, default to None, which doesn't override the selected topic :param ignore_cache: Whether to ignore the cache and always create a new figure, defaults to False :return: matplotlib figure of visualization corresponding to the index and the type of the visualization :raises IndexError: if the index is negative or bigger than the number of visualizations or if the visualization corresponding to that index is not possible in the current topic model. """ # if not, the index is out of range if vis_index < 0 or vis_index >= len(self.VISUALIZATIONS): raise IndexError(f'No visualization with index ' f'{vis_index} available') vis_creator = self.VISUALIZATIONS[vis_index] return (self._run_visualization_creator(vis_creator, override_topic=override_topic, ignore_cache=ignore_cache), vis_creator.short_tab_name)
def _run_visualization_creator(self, vis_creator: AbstractVisualization, override_topic: int | None = None, ignore_cache: bool = False ) -> matplotlib.figure.Figure: """ Returns the given global visualization on the current topic runner and the needed additional data. :param vis_creator: The visualization creator be run :param override_topic: A topic index used to override the selected topic, default to None, which doesn't override the selected topic :param ignore_cache: Whether to ignore the cache and always create a new figure, defaults to False :return: matplotlib figure of visualization """ keyword_args = {} for arg_needed in vis_creator.needed_input_data: match arg_needed: case VisInputData.TOPIC_ID if override_topic is not None: keyword_args['topic_id'] = override_topic case VisInputData.TOPIC_ID if override_topic is None: if self._current_topic_selected_id is None: return self._get_no_topic_selected_screen() keyword_args['topic_id'] = self._current_topic_selected_id case VisInputData.PROCESSED_CORPUS: processed_corpus = (self._corpus_controller. get_processed_corpus()) keyword_args['processed_corpus'] = processed_corpus case VisInputData.METADATA_CORPUS: metadata = self._corpus_controller.get_metadata() keyword_args['metadata_corpus'] = metadata case _: raise NotImplementedError(f"Unsupported input data " f"{arg_needed} requested in " f"visualization " f"{vis_creator.name}.") return vis_creator.get_figure(self._current_topic_runner, ignore_cache=ignore_cache, **keyword_args) @staticmethod def _get_no_topic_selected_screen() -> matplotlib.figure.Figure: """Returns a figure showing a text that a topic needs to be selected""" fig = matplotlib.pyplot.figure() matplotlib.pyplot.figtext(0.5, 0.5, "Selecteer een topic om " "deze visualizatie te zien", horizontalalignment='center', verticalalignment='center') fig.figure.subplots_adjust(0.1, 0.1, 0.9, 0.9) matplotlib.pyplot.close() return fig def _run_nx_export_on_data(self, nx_exporter_on_data: NxExporterOnData ) -> nx.Graph: """ Runs the networkx exporter on the additional data that it needs :param nx_exporter_on_data: Index of the visualization to be requested :return: nx.graph of the exporter """ if nx_exporter_on_data.input_data_type == ProcessedCorpus: processed_corpus = self._corpus_controller.get_processed_corpus() return nx_exporter_on_data.get_nx_graph(self._current_topic_runner, processed_corpus) if nx_exporter_on_data.input_data_type == MetadataCorpus: metadata = self._corpus_controller.get_metadata() return nx_exporter_on_data.get_nx_graph(self._current_topic_runner, metadata) raise Exception("The graph-controller is asked to supply data of type" f" {nx_exporter_on_data.input_data_type}, which is not" " supported") def _run_nx_export(self, nx_exporter: NxExporter) -> nx.Graph: """ Runs the networkx exporter :param nx_exporter: Index of the visualization to be requested :return: nx.graph of the exporter """ return nx_exporter.get_nx_graph(self._current_topic_runner)
[docs] def get_all_visualizations(self, ignore_cache: bool = False ) -> list[MatplotLibExport]: """ Get all the possible visualization for the current run :param ignore_cache: Whether to ignore the cache and always create a new figure, defaults to False :return: A list of matplotlib Figures of all possible visualizations """ vis_without_topic = [MatplotLibExport(possible_vis.name, None, self.get_visualization( possible_vis.index, ignore_cache=ignore_cache )[0] ) for possible_vis in self._possible_visualizations if not possible_vis.needs_topic] if self._current_topic_runner is None: return vis_without_topic # loop over all topic and all visualization that need topics to # run all combinations vis_with_topic = [MatplotLibExport(possible_vis.name, topic_id, self.get_visualization( possible_vis.index, override_topic=topic_id, ignore_cache=ignore_cache)[0] ) for (possible_vis, topic_id) in product(self._possible_visualizations, range(self.get_number_of_topics())) if possible_vis.needs_topic] return vis_without_topic + vis_with_topic
[docs] def get_all_nx_exports(self) -> list[NxExport]: """ Get all the networkx graphs for the possible visualization for the current run :return: A list of nx.graph objects of all possible visualizations """ if self._current_topic_runner is None: raise RuntimeWarning("Exports cannot be requested when topic model" " has not been run.") return [NxExport(self.NX_EXPORTS[vis].name, self._get_nx_export(vis)) for vis in range(len(self._possible_nx_exports))]
def _delete_all_cached_plots(self): """Delete all cached figures saved by the visualization creators""" for vis_creator in self.VISUALIZATIONS: vis_creator.delete_cache()
[docs] def on_new_topic_runner(self, topic_runner: TopicRunner) -> None: """ Signal the graph-controller that a topic runner has finished training and is ready to provide results. Notify the subscribes of the plots and topics :param topic_runner: The newly trained topic runner object :return: None """ self._delete_all_cached_plots() self._current_topic_runner = topic_runner self._calculate_possible_visualizations() self._topics_changed_event.publish(None) self._possible_plots_changed_event.publish( self._possible_visualizations) self._clear_topic_names()
[docs] def on_topic_runner_switched(self, topic_runner: TopicRunner) -> None: """ Signal the graph-controller that a topic runner has been switched :param topic_runner: The newly trained topic runner object :return: None """ self._delete_all_cached_plots() self._current_topic_runner = topic_runner self._calculate_possible_visualizations() self._topics_changed_event.publish(None) self._possible_plots_changed_event.publish( self._possible_visualizations)
def _on_config_switch(self, topic_runner: TopicRunner | None): """Save and publish new topic runner on config switch""" self.on_topic_runner_switched(topic_runner)
[docs] def reset_graph_view_state(self) -> None: """Reset the state of the graph view""" self._current_topic_selected_id = None
[docs] def visualizations_available(self) -> bool: """ Check if there are any visualizations available for the current topic model. :return: True if there are visualizations available, False otherwise """ return self._current_topic_runner is not None
""" This program has been developed by students from the bachelor Computer Science at Utrecht University within the Software Project course. © Copyright Utrecht University (Department of Information and Computing Sciences) """