Source code for tommy.controller.visualizations.sum_topics_in_documents

import matplotlib.figure
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib.ticker import MaxNLocator

from tommy.controller.result_interfaces.document_topics_interface import (
    DocumentTopicsInterface)
from tommy.controller.topic_modelling_runners.abstract_topic_runner import (
    TopicRunner)
from tommy.controller.visualizations.abstract_visualization import (
    AbstractVisualization)
from tommy.controller.visualizations.possible_visualization import VisGroup
from tommy.controller.visualizations.visualization_input_datatypes import (
    VisInputData, ProcessedCorpus)
from tommy.support.constant_variables import plot_colors


[docs] class SumTopicsInDocuments(AbstractVisualization): _required_interfaces = [TopicRunner, DocumentTopicsInterface] name = "Topics in documenten" short_tab_name = "Topics in doc." vis_group = VisGroup.MODEL needed_input_data = [VisInputData.PROCESSED_CORPUS] def _create_figure(self, topic_runner: TopicRunner | DocumentTopicsInterface, processed_corpus: ProcessedCorpus = None, **kwargs ) -> matplotlib.figure.Figure: """ Construct a plot containing the sum of all probabilities for each topic. :param topic_runner: The topic model to construct the plot for. This should implement the DocumentTopicsInterface. :param processed_corpus: The preprocessed corpus containing all files as bags of words after preprocessing. :return: Matplotlib figure showing a sum topics in documents plot. """ # Construct a plot and axes fig, ax = plt.subplots() doc_info = {"topic_id": [], "probability": []} for document in processed_corpus: topics = topic_runner.get_document_topics(document.body.body, 0.0) for topic in topics: doc_info["topic_id"].append(topic[0]) doc_info["probability"].append(topic[1]) df = pd.DataFrame(doc_info) df = df.groupby(by="topic_id", as_index=False).sum() plt.bar(df["topic_id"] + 1, df["probability"], color=plot_colors) plt.title("Verdeling topics over documenten", pad=25) plt.xlabel("Topic") plt.ylabel("Som gewichten") fig.figure.subplots_adjust( left=0.15, right=0.85, top=0.85, bottom=0.15) fig.gca().xaxis.set_major_locator(MaxNLocator(integer=True)) return fig
""" This program has been developed by students from the bachelor Computer Science at Utrecht University within the Software Project course. © Copyright Utrecht University (Department of Information and Computing Sciences) """