Source code for database.leads.repository

# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Sophie Heasman <sophieheasmann@gmail.com>

from abc import ABC, abstractmethod
from datetime import datetime



[docs]
class Repository(ABC):
    DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S"

    # Database paths for dataframe and reviews have to be set
    @property
    @abstractmethod
    def DF_INPUT(self):
        """
        Define database path to load dataframe
        """
        pass

    @property
    @abstractmethod
    def DF_OUTPUT(self):
        """
        Define database path to store dataframe
        """
        pass

    @property
    @abstractmethod
    def DF_HISTORICAL_OUTPUT(self):
        """
        Define database path to store historical enriched dataframe (used for preprocessing input)
        """
        pass

    @property
    @abstractmethod
    def REVIEWS(self):
        """
        Define database path to store reviews
        """
        pass

    @property
    @abstractmethod
    def SNAPSHOTS(self):
        """
        Define database path to store snapshots
        """
        pass

    @property
    @abstractmethod
    def GPT_RESULTS(self):
        """
        Define database path to store GPT operations
        """
        pass

    def __init__(self):
        """
        Initialise DAL, and saves the input df as an attribute
        :param download_df: Specify if you want to download the dataframe in this instance (not needed when handling reviews)
        """
        self.df = None
        self._download()


[docs]
    def get_dataframe(self):
        return self.df



[docs]
    def set_dataframe(self, df):
        self.df = df



[docs]
    def get_input_path(self):
        return self.DF_INPUT



[docs]
    def get_enriched_data_path(self, historical=False):
        if historical:
            return self.DF_HISTORICAL_OUTPUT
        return self.DF_OUTPUT


    @abstractmethod
    def _download(self):
        """
        Download database from specified DF path
        """
        pass


[docs]
    @abstractmethod
    def save_dataframe(self):
        """
        Save dataframe in df attribute in chosen output location
        """
        pass



[docs]
    @abstractmethod
    def save_prediction(self, df):
        """
        Save dataframe in df parameter in chosen output location
        """
        pass



[docs]
    @abstractmethod
    def insert_data(self, data):
        """
        Insert new data into specified dataframe
        :param data: Data to be inserted (desired format must be checked)
        """
        pass



[docs]
    @abstractmethod
    def create_snapshot(self, df, prefix, name):
        """
        Snapshot the current state of the dataframe
        :param df: Data to create a snapshot of
        :param prefix: Prefix for a group of snapshots belonging to a singe pipeline run, used to identify snapshots
        when cleaning up after a pipeline run
        :param name: Name of the snapshot
        :return: None
        """



[docs]
    @abstractmethod
    def clean_snapshots(self, prefix):
        """
        Clean up the snapshots after a pipeline ran successfully
        :param prefix: Prefix of the current pipeline run used to identify all snapshots to delete
        """



[docs]
    @abstractmethod
    def save_review(self, review, place_id, force_refresh=False):
        """
        Upload review to specified review path
        :param review: json contents of the review to be uploaded
        """
        pass



[docs]
    @abstractmethod
    def fetch_review(self, place_id):
        """
        Fetch review for specified place_id
        :return: json contents of desired review
        """
        pass



[docs]
    @abstractmethod
    def save_lookup_table(self, lookup_table: dict, step_name: str) -> None:
        """
        Save the lookup table for hashes for a given step
        """
        pass



[docs]
    @abstractmethod
    def fetch_gpt_result(self, file_id, operation_name):
        """
        Fetches the GPT result for a given file ID and operation name.

        Args:
            file_id (str): The ID of the file.
            operation_name (str): The name of the GPT operation.

        Returns:
            The GPT result for the specified file ID and operation name.
        """
        pass



[docs]
    @abstractmethod
    def load_lookup_table(self, step_name: str) -> dict:
        """
        Create or load the lookup table of hashes for a given step
        :return: lookup table as a pandas DataFrame
        """
        pass



[docs]
    @abstractmethod
    def save_gpt_result(self, gpt_result, file_id, operation_name, force_refresh=False):
        """
        Saves the GPT result for a given file ID and operation name.

        Args:
            gpt_result (str): The GPT result to be saved.
            file_id (str): The ID of the file.
            operation_name (str): The name of the operation.
            force_refresh (bool, optional): Whether to force a refresh of the saved result. Defaults to False.
        """
        pass


    def _get_current_time_as_string(self):
        """
        Get the current time as a string
        """
        return datetime.now().strftime(self.DATETIME_FORMAT)

    def _convert_string_time_to_datetime(self, time):
        """
        Convert a string time to a datetime object
        """
        return datetime.strptime(time, self.DATETIME_FORMAT)


[docs]
    @abstractmethod
    def load_ml_model(self, model_name: str):
        """
        Load a ML model from a file with a given name

        Args:
            model_name (str): File name
        """
        pass



[docs]
    @abstractmethod
    def save_ml_model(self, model, model_name: str):
        """
        Save a given ML model to a file with a given name

        Args:
            model: Model to save
            model_name (str): File name
        """
        pass



[docs]
    @abstractmethod
    def load_classification_report(self, model_name: str):
        """
        Load a given classification report to a file with a given name

        Args:
            model_name (str): Model name that created the report
        """
        pass



[docs]
    @abstractmethod
    def save_classification_report(self, report, model_name: str):
        """
        Save a given classification report to a file with a given name

        Args:
            report: The classification report to save
            model_name (str): Model name that created the report
        """
        pass



[docs]
    @abstractmethod
    def get_preprocessed_data_path(self, historical: bool = True):
        """
        Returns the path for a preprocessed data file (either historical or current)
        """
        pass



[docs]
    @abstractmethod
    def load_preprocessed_data(self, historical: bool = True):
        """
        Load the preprocessed data from the given file
        """
        pass