Source code for tommy.controller.graph_controller

from itertools import product

import networkx as nx
import matplotlib.figure
import matplotlib.pyplot

# Import controllers
from tommy.controller.corpus_controller import CorpusController
from tommy.controller.project_settings_controller import \
    ProjectSettingsController
from tommy.controller.topic_modelling_runners.abstract_topic_runner import (
    TopicRunner)
from tommy.controller.topic_modelling_controller import (
    TopicModellingController)

# Import visualizations
from tommy.controller.visualizations.abstract_visualization import (
    AbstractVisualization)
from tommy.controller.visualizations.correlation_matrix_creator import (
    CorrelationMatrixCreator)
from tommy.controller.visualizations.document_topic_network_summary_creator \
    import DocumentTopicNetworkSummaryCreator
from tommy.controller.visualizations.document_word_count_creator import (
    DocumentWordCountCreator)
from tommy.controller.visualizations.documents_over_time_creator import (
    DocumentsOverTimeCreator)
from tommy.controller.visualizations.documents_over_time_per_topic_creator import \
    DocumentsOverTimePerTopicCreator
from tommy.controller.visualizations.sum_topics_in_documents import \
    SumTopicsInDocuments
from tommy.controller.visualizations.top_words_bar_plot_creator import (
    TopWordsBarPlotCreator)
from tommy.controller.visualizations.welcome_screen import WelcomeScreen
from tommy.controller.visualizations.word_cloud_creator import WordCloudCreator
from tommy.controller.visualizations.word_topic_network_creator import (
    WordTopicNetworkCreator)
from tommy.controller.visualizations.k_value_creator import KValueCreator

# Import exporters
from tommy.controller.visualizations.nx_exporter import NxExporter
from tommy.controller.visualizations.nx_exporter_on_data import (
    NxExporterOnData)
from tommy.controller.visualizations.document_topic_nx_exporter import (
    DocumentTopicNxExporter)
from tommy.controller.visualizations.word_topic_nx_exporter import (
    WordTopicNxExporter)

from tommy.controller.visualizations.possible_visualization import (
    PossibleVisualization)
from tommy.controller.visualizations.visualization_input_datatypes import (
    VisInputData, ProcessedCorpus, MetadataCorpus)
from tommy.datatypes.exports import NxExport, MatplotLibExport

from tommy.datatypes.topics import TopicWithScores
from tommy.model.topic_model import TopicModel
from tommy.support.event_handler import EventHandler
from tommy.model.custom_name_model import TopicNameModel
from tommy.support.application_settings import application_settings



[docs]
class GraphController:
    """
    The central interface for extracting results from topic modelling results
    and creating visualizations.
    Contains an event for when the plots are changed and a publisher for
    when the topics have changed.
    """
    _corpus_controller: CorpusController = None

    _topic_name_model: TopicNameModel = None

    # Visualization Creators
    VISUALIZATIONS: list[AbstractVisualization] = [
        DocumentWordCountCreator(),
        KValueCreator(),
        DocumentsOverTimeCreator(),
        SumTopicsInDocuments(),
        CorrelationMatrixCreator(),
        WordTopicNetworkCreator(),
        DocumentTopicNetworkSummaryCreator(),
        WordCloudCreator(),
        TopWordsBarPlotCreator(),
        DocumentsOverTimePerTopicCreator(),
        WelcomeScreen()
    ]
    _possible_visualizations: list[PossibleVisualization] | None = None

    _current_topic_selected_id: int | None = None

    _current_topic_runner: TopicRunner | None = None

    # EventHandlers
    _possible_plots_changed_event: EventHandler[list[PossibleVisualization]]
    _topics_changed_event: EventHandler[None]
    _refresh_plots_event: EventHandler[None]
    _refresh_name_event: EventHandler[None]

    # Exporters
    NX_EXPORTS: list[NxExporterOnData | NxExporter] = [
        DocumentTopicNxExporter(),
        WordTopicNxExporter()]
    _possible_nx_exports: list[int] | None = None

    @property
    def possible_plots_changed_event(self) -> (
            EventHandler[list[PossibleVisualization]]):
        """Get event that triggers when the list of possible plots changes."""
        return self._possible_plots_changed_event

    @property
    def topics_changed_event(self) -> EventHandler[None]:
        """Get the eventhandler that triggers when the topics are changed."""
        return self._topics_changed_event

    @property
    def refresh_plots_event(self) -> EventHandler[None]:
        """
        Get the event that triggers when the content of any plots changes.
        """
        return self._refresh_plots_event

    @property
    def refresh_name_event(self) -> EventHandler[None]:
        """Get the event that triggers when the config name changes."""
        return self._refresh_name_event

    @property
    def has_topic_runner(self) -> bool:
        return self._current_topic_runner is not None


[docs]
    def __init__(self) -> None:
        """Initialize the graph-controller and its two publishers"""
        super().__init__()
        self._possible_plots_changed_event = EventHandler[
            list[PossibleVisualization]]()
        self._topics_changed_event = EventHandler[None]()
        self._refresh_plots_event = EventHandler[None]()
        self._refresh_name_event = EventHandler[None]()



[docs]
    def set_controller_refs(
            self,
            topic_modelling_controller: TopicModellingController,
            corpus_controller: CorpusController,
            project_settings_controller: ProjectSettingsController) -> None:
        """
        Set reference to the TM controller corpus controller and add self
        to model trained event
        """
        self._corpus_controller = corpus_controller

        topic_modelling_controller.model_trained_event.subscribe(
            self.on_new_topic_runner)
        topic_modelling_controller.topic_model_switched_event.subscribe(
            self._on_config_switch)
        project_settings_controller.input_folder_path_changed_event.subscribe(
            self.clear_graphs)



[docs]
    def set_model_refs(self, topic_name_model: TopicNameModel) -> None:
        """
        Set the reference to the topic name model
        :param topic_name_model: The topic name model
        :return: None
        """
        self._topic_name_model = topic_name_model



[docs]
    def on_model_swap(self):
        """Notify the frontend that the topic name model has changed"""
        self._refresh_name_event.publish(None)



[docs]
    def set_selected_topic(self, topic_index: int | None) -> None:
        """
        Set the currently selected topic to the given index or None if no
        topic is to be selected.

        :param topic_index: the index of the topic to select
        :return: None
        """
        self._current_topic_selected_id = topic_index
        self._refresh_plots_event.publish(None)



[docs]
    def get_topic_name(self, topic_index: int) -> str:
        """
        Get the name of the topic with the given index
        :param topic_index: The index of the topic
        :return: The name of the topic
        """
        return self._topic_name_model.get_topic_name(topic_index)



[docs]
    def set_topic_name(self, topic_index: int, name: str) -> None:
        """
        Set the name of the topic with the given index
        :param topic_index: The index of the topic
        :param name: The new name of the topic
        :return: None
        """

        self._topic_name_model.set_topic_name(topic_index, name)


    def _clear_topic_names(self) -> None:
        """
        Clear all custom topic names
        :return: None
        """
        self._topic_name_model.clear_topic_names()
        self.topics_changed_event.publish(None)


[docs]
    def clear_graphs(self, _):
        """Clear all graphs when the input folder path changes"""
        self._delete_all_cached_plots()
        self._current_topic_runner = None
        self._calculate_possible_visualizations()
        self._topics_changed_event.publish(None)
        self._possible_plots_changed_event.publish(
            self._possible_visualizations)



[docs]
    def get_number_of_topics(self) -> int:
        """
        Get the number of topics in the topic modelling results
        :return: the number of topics in the topic modelling results
        :raises RuntimeError: if the topic runner has not finished running yet.
        """
        if not self.has_topic_runner:
            raise RuntimeError("Amount of topics requested before topic "
                               "runner has finished running")
        return self._current_topic_runner.get_n_topics()



[docs]
    def get_model_type(self) -> str:
        """
        Get the model type in the topic modelling results
        :return: the model type in the topic modelling results
        :raises RuntimeError: if the topic runner has not finished running yet.
        """
        if not self.has_topic_runner:
            raise RuntimeError("Model type requested before topic "
                               "runner has finished running")
        return self._current_topic_runner.get_model()



[docs]
    def get_topic_with_scores(self, topic_id, n_words) -> TopicWithScores:
        """
        Return a topic object containing top n terms and their corresponding
        score for the topic identified by the topic_id.

        :param topic_id: the index of the requested topic
        :param n_words: number of terms in the resulting topic object
        :return: topic object containing top n terms and their corresponding
            scores
        """
        return self._current_topic_runner.get_topic_with_scores(
            topic_id=topic_id,
            n_words=n_words)


    def _calculate_possible_visualizations(self) -> None:
        """(re-)calculates and saves the list of possible visualizations"""
        # check for each visualization if it is possible
        self._possible_visualizations = [
            PossibleVisualization(vis_index,
                                  visualization.name,
                                  visualization.short_tab_name,
                                  visualization.vis_group,
                                  (VisInputData.TOPIC_ID in
                                   visualization.needed_input_data))
            for (vis_index, visualization)
            in enumerate(self.VISUALIZATIONS)
            if visualization.is_possible(
                self._corpus_controller.metadata_available(),
                self._current_topic_runner)
        ]

        # check for each export if it is possible
        self._possible_nx_exports = [
            export_index
            for (export_index, exporter)
            in enumerate(self.NX_EXPORTS)
            if exporter.is_possible(self._current_topic_runner)
        ]

        # Calculate the visualizations ahead of time to cache them for a
        # smoother user experience
        for vis in self._possible_visualizations:
            if self.has_topic_runner and vis.needs_topic:
                for topic_id in range(self.get_number_of_topics()):
                    self.get_visualization(vis.index, override_topic=topic_id)
            else:
                self.get_visualization(vis.index)

    def _get_nx_export(self, vis_index: int) -> nx.Graph:
        """
        Returns the networkx graph corresponding showing the network
        corresponding to the given index in the list of all exports.
        :param vis_index: Index of the export to be requested
        :return: networkx graph of a visualization corresponding to the index
        :raises IndexError: if the index is negative or bigger than the number
            of exports supported by this class.
        """
        if vis_index < 0 or vis_index >= len(self.NX_EXPORTS):
            raise IndexError(f'Negative index of {vis_index} is not accepted '
                             'in _get_visualization')

        if isinstance(self.NX_EXPORTS[vis_index], NxExporterOnData):
            return self._run_nx_export_on_data(self.NX_EXPORTS[vis_index])
        if isinstance(self.NX_EXPORTS[vis_index], NxExporter):
            return self._run_nx_export(self.NX_EXPORTS[vis_index])

        # if not, the index is out of range
        raise IndexError(f'No exports with index {vis_index} available')


[docs]
    def get_visualization(self, vis_index: int,
                          override_topic: int | None = None,
                          ignore_cache: bool = False
                          ) -> tuple[matplotlib.figure.Figure, str]:
        """
        Returns the visualization corresponding to the given index in the list
        of all visualizations.
        :param vis_index: Index of the visualization to be requested
        :param override_topic: A topic index used to override the selected
            topic, default to None, which doesn't override the selected topic
        :param ignore_cache: Whether to ignore the cache and always create a
            new figure, defaults to False
        :return: matplotlib figure of visualization corresponding to the index
        and the type of the visualization
        :raises IndexError: if the index is negative or bigger than the number
            of visualizations or if the visualization corresponding to that
            index is not possible in the current topic model.
        """
        # if not, the index is out of range
        if vis_index < 0 or vis_index >= len(self.VISUALIZATIONS):
            raise IndexError(f'No visualization with index '
                             f'{vis_index} available')

        vis_creator = self.VISUALIZATIONS[vis_index]

        return (self._run_visualization_creator(vis_creator,
                                                override_topic=override_topic,
                                                ignore_cache=ignore_cache),
                vis_creator.short_tab_name)


    def _run_visualization_creator(self, vis_creator: AbstractVisualization,
                                   override_topic: int | None = None,
                                   ignore_cache: bool = False
                                   ) -> matplotlib.figure.Figure:
        """
        Returns the given global visualization on the current topic runner and
        the needed additional data.
        :param vis_creator: The visualization creator be run
        :param override_topic: A topic index used to override the selected
            topic, default to None, which doesn't override the selected topic
        :param ignore_cache: Whether to ignore the cache and always create a
            new figure, defaults to False
        :return: matplotlib figure of visualization
        """
        keyword_args = {}
        for arg_needed in vis_creator.needed_input_data:
            match arg_needed:
                case VisInputData.TOPIC_ID if override_topic is not None:
                    keyword_args['topic_id'] = override_topic
                case VisInputData.TOPIC_ID if override_topic is None:
                    if self._current_topic_selected_id is None:
                        return self._get_no_topic_selected_screen()
                    keyword_args['topic_id'] = self._current_topic_selected_id
                case VisInputData.PROCESSED_CORPUS:
                    processed_corpus = (self._corpus_controller.
                                        get_processed_corpus())
                    keyword_args['processed_corpus'] = processed_corpus
                case VisInputData.METADATA_CORPUS:
                    metadata = self._corpus_controller.get_metadata()
                    keyword_args['metadata_corpus'] = metadata
                case _:
                    raise NotImplementedError(f"Unsupported input data "
                                              f"{arg_needed} requested in "
                                              f"visualization "
                                              f"{vis_creator.name}.")

        return vis_creator.get_figure(self._current_topic_runner,
                                      ignore_cache=ignore_cache,
                                      **keyword_args)

    @staticmethod
    def _get_no_topic_selected_screen() -> matplotlib.figure.Figure:
        """Returns a figure showing a text that a topic needs to be selected"""
        fig = matplotlib.pyplot.figure()
        matplotlib.pyplot.figtext(0.5, 0.5, "Selecteer een topic om "
                                            "deze visualizatie te zien",
                                  horizontalalignment='center',
                                  verticalalignment='center')

        fig.figure.subplots_adjust(0.1, 0.1, 0.9, 0.9)
        matplotlib.pyplot.close()
        return fig

    def _run_nx_export_on_data(self, nx_exporter_on_data: NxExporterOnData
                               ) -> nx.Graph:
        """
        Runs the networkx exporter on the additional data that it needs
        :param nx_exporter_on_data: Index of the visualization to be requested
        :return: nx.graph of the exporter
        """

        if nx_exporter_on_data.input_data_type == ProcessedCorpus:
            processed_corpus = self._corpus_controller.get_processed_corpus()
            return nx_exporter_on_data.get_nx_graph(self._current_topic_runner,
                                                    processed_corpus)
        if nx_exporter_on_data.input_data_type == MetadataCorpus:
            metadata = self._corpus_controller.get_metadata()
            return nx_exporter_on_data.get_nx_graph(self._current_topic_runner,
                                                    metadata)

        raise Exception("The graph-controller is asked to supply data of type"
                        f" {nx_exporter_on_data.input_data_type}, which is not"
                        " supported")

    def _run_nx_export(self, nx_exporter: NxExporter) -> nx.Graph:
        """
        Runs the networkx exporter
        :param nx_exporter: Index of the visualization to be requested
        :return: nx.graph of the exporter
        """
        return nx_exporter.get_nx_graph(self._current_topic_runner)


[docs]
    def get_all_visualizations(self, ignore_cache: bool = False
                               ) -> list[MatplotLibExport]:
        """
        Get all the possible visualization for the current run
        :param ignore_cache: Whether to ignore the cache and always create a
            new figure, defaults to False
        :return: A list of  matplotlib Figures of all possible visualizations
        """
        vis_without_topic = [MatplotLibExport(possible_vis.name, None,
                                              self.get_visualization(
                                                  possible_vis.index,
                                                  ignore_cache=ignore_cache
                                              )[0]
                                              )
                             for possible_vis
                             in self._possible_visualizations
                             if not possible_vis.needs_topic]

        if self._current_topic_runner is None:
            return vis_without_topic

        # loop over all topic and all visualization that need topics to
        #   run all combinations
        vis_with_topic = [MatplotLibExport(possible_vis.name, topic_id,
                                           self.get_visualization(
                                               possible_vis.index,
                                               override_topic=topic_id,
                                               ignore_cache=ignore_cache)[0]
                                           )
                          for (possible_vis, topic_id)
                          in product(self._possible_visualizations,
                                     range(self.get_number_of_topics()))
                          if possible_vis.needs_topic]

        return vis_without_topic + vis_with_topic



[docs]
    def get_all_nx_exports(self) -> list[NxExport]:
        """
        Get all the networkx graphs for the possible visualization for the
        current run
        :return: A list of nx.graph objects of all possible visualizations
        """
        if self._current_topic_runner is None:
            raise RuntimeWarning("Exports cannot be requested when topic model"
                                 " has not been run.")

        return [NxExport(self.NX_EXPORTS[vis].name, self._get_nx_export(vis))
                for vis
                in range(len(self._possible_nx_exports))]


    def _delete_all_cached_plots(self):
        """Delete all cached figures saved by the visualization creators"""
        for vis_creator in self.VISUALIZATIONS:
            vis_creator.delete_cache()


[docs]
    def on_new_topic_runner(self, topic_runner: TopicRunner) -> None:
        """
        Signal the graph-controller that a topic runner has finished training
        and is ready to provide results. Notify the subscribes of the plots
        and topics
        :param topic_runner: The newly trained topic runner object
        :return: None
        """
        self._delete_all_cached_plots()
        self._current_topic_runner = topic_runner
        self._calculate_possible_visualizations()
        self._topics_changed_event.publish(None)
        self._possible_plots_changed_event.publish(
            self._possible_visualizations)
        self._clear_topic_names()



[docs]
    def on_topic_runner_switched(self, topic_runner: TopicRunner) -> None:
        """
        Signal the graph-controller that a topic runner has been switched
        :param topic_runner: The newly trained topic runner object
        :return: None
        """
        self._delete_all_cached_plots()
        self._current_topic_runner = topic_runner
        self._calculate_possible_visualizations()
        self._topics_changed_event.publish(None)
        self._possible_plots_changed_event.publish(
            self._possible_visualizations)


    def _on_config_switch(self, topic_runner: TopicRunner | None):
        """Save and publish new topic runner on config switch"""
        self.on_topic_runner_switched(topic_runner)


[docs]
    def reset_graph_view_state(self) -> None:
        """Reset the state of the graph view"""
        self._current_topic_selected_id = None



[docs]
    def visualizations_available(self) -> bool:
        """
        Check if there are any visualizations available for the current topic
        model.
        :return: True if there are visualizations available, False otherwise
        """
        return self._current_topic_runner is not None




"""
This program has been developed by students from the bachelor Computer Science
at Utrecht University within the Software Project course.
© Copyright Utrecht University
(Department of Information and Computing Sciences)
"""