Source code for modlee.model_metafeatures

from abc import abstractmethod
from functools import partial

import numpy as np
import pandas as pd

import torch

import modlee
from modlee.utils import get_model_size, class_from_modality_task, get_modality_task

converter = modlee.converter.Converter()

from modlee.utils import INPUT_DUMMY

[docs] class ModelMetafeatures: def __init__(self, torch_model: torch.nn.Module, sample_input=None, *args, **kwargs): self.torch_model = torch_model self.modality, self.task = get_modality_task(torch_model) self.sample_input = sample_input if sample_input is None: sample_input = INPUT_DUMMY[self.modality] print("Using default sample input") else: # Check the input type # self.check_input_type(sample_input) # Unpack if it's a list of length 1 if isinstance(sample_input, list) and len(sample_input) == 1: sample_input = sample_input[0] #print(f"Unpacked sample_input: {type(sample_input)}") # Convert to tensor if it's not already a tensor if not torch.is_tensor(sample_input): try: sample_input = torch.tensor(sample_input) except Exception as e: print(f"Error converting sample_input to tensor: {e}") self.onnx_graph = converter.torch_model2onnx_graph( torch_model, input_dummy=sample_input) # Must calculate NetworkX before initializing tensors self.onnx_nx = converter.index_nx(converter.onnx_graph2onnx_nx(self.onnx_graph)) self.onnx_text = converter.onnx_graph2onnx_text(self.onnx_graph) self.onnx_graph = converter.init_onnx_tensors( converter.init_onnx_params(self.onnx_graph) ) self.dataframe = self.get_graph_dataframe(self.onnx_graph) self.properties = self.get_properties()
[docs] def check_input_type(self, sample_input): """Prints the type and shape of the sample_input and breaks for debugging.""" #print(f"Input type: {type(sample_input)}") if isinstance(sample_input, np.ndarray): print(f"Input shape (NumPy array): {sample_input.shape}") elif isinstance(sample_input, list): print(f"Input is a list with length: {len(sample_input)}") elif torch.is_tensor(sample_input): print(f"Input is a tensor with shape: {sample_input.shape}") else: print("Unknown input type")
[docs] def get_properties(self, *args, **kwargs): ''' These are: - Layer counts - Layer parameter stats, e.g. min/max/mean conv sizes - Size - Input / output shapes Reference the ModelMFE (metafeature extractor): https://github.com/modlee-ai/recommender/blob/a86eb715c0f8771bbcb20a624eb20bc6f07d6c2b/data_prep/model_mfe.py#L117 In that prior implementation, used the ONNX text representation, and regexes ''' return { "size": get_model_size(self.torch_model, as_MB=False), "output_shape": self.get_output_shape(), **self.get_parameter_statistics(self.dataframe), **self.get_layer_counts(self.dataframe), }
[docs] @abstractmethod def get_output_shape(self): output = self.torch_model(self.sample_input) return np.array(output.shape[1:])
[docs] @staticmethod def get_graph_dataframe(onnx_graph, *args, **kwargs): """ Parse the layers of the model, maybe as a dataframe? With columns of layer type, parameters, indices (position in graph) Can then calculate parameters e.g. counts, parameter ranges, etc This almost seems like a converter function """ nodes = [] for node_idx, node in enumerate(onnx_graph.nodes): node_op = node.op.lower() node_dict = { "operation": node_op, "index": node_idx, # Attributes specific to this node operation, e.g. convolution kernel sizes **{ f"{node_op}_{node_attr_key}": node_attr_val for node_attr_key, node_attr_val in node.attrs.items() }, } nodes.append(node_dict) df = pd.DataFrame(nodes) df = ModelMetafeatures.dataframe_lists_to_columns(df) return df
[docs] @staticmethod def dataframe_lists_to_columns(df: pd.DataFrame): """ Split dataframe columns that are lists to separate, indexed columns :param df: _description_ """ object_cols = df.select_dtypes(include="object").columns list_cols = [ col for col in object_cols if isinstance(df[col].dropna().iloc[0], list) ] for list_col in list_cols: # Turn the lists into a dataframe, with number of columns equal to the max length of a list list_df = pd.DataFrame( df[list_col].apply(lambda x: x if isinstance(x, list) else []).to_list() ) # Get the number of columns created n_cols = list_df.shape[1] df[[f"{list_col}_{i}" for i in range(n_cols)]] = list_df df = df.drop(columns=list_cols) return df
[docs] @staticmethod def get_layer_counts(df: pd.DataFrame): """ Get the counts of each layer type in a dataframe :param df: _description_ """ count_dict = dict(df["operation"].value_counts()) count_dict = {f"{k}_count": v for k, v in count_dict.items()} return count_dict
[docs] @staticmethod def get_parameter_statistics(df: pd.DataFrame | pd.Series): """ Get the statistics of a single-column dataframe or series :param df: _description_ """ statistics = ["min", "max", "mean", "median", "std"] if isinstance(df, pd.DataFrame): df_float = df.select_dtypes(include="float") else: df_float = pd.DataFrame(df) ret = {} for col in df_float.columns: for statistic in statistics: ret.update( {f"{col}_{statistic}": getattr(np, statistic)(df_float[col])} ) return ret
[docs] class ImageModelMetafeatures(ModelMetafeatures):
[docs] def get_output_shape( self, ): output = self.torch_model(self.sample_input) return np.array(output.shape[1:])
[docs] class ImageClassificationModelMetafeatures(ImageModelMetafeatures): pass
[docs] class ImageSegmentationModelMetafeatures(ImageModelMetafeatures):
[docs] def get_output_shape(self): output = self.torch_model(self.sample_input) if isinstance(output, dict): output = output["out"] return np.array(output.shape[1:])
[docs] class TabularModelMetafeatures(ModelMetafeatures): pass
[docs] class TabularClassificationModelMetafeatures(TabularModelMetafeatures): pass
[docs] class TimeseriesModelMetafeatures(ModelMetafeatures): pass
[docs] class TimeseriesForecastingModelMetafeatures(TimeseriesModelMetafeatures): pass
from_modality_task = partial(class_from_modality_task, _class="Model_Metafeatures")