add a script to check if a given model is fully fusable

roll-away · roll-away · commit 22a277232ba0 · 2025-12-01T07:11:17.000Z
diff --git a/graph_net/test/naive_check_if_fully_fusable.sh b/graph_net/test/naive_check_if_fully_fusable.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+GRAPH_NET_ROOT=$(python3 -c "import graph_net; import os; print(
+os.path.dirname(graph_net.__file__))")
+
+# input model path
+MODEL_NAME=resnet18d.ra2_in1k
+MODEL_PATH_IN_SAMPLES=/timm/$MODEL_NAME
+checker_config_json_str=$(cat <<EOF
+{
+    "post_extract_process_config": {
+        "post_extract_process_path":"$GRAPH_NET_ROOT/torch/post_extract_process_count_kernels.py",
+        "post_extract_process_class_name": "GraphFullyFusionable"
+    }
+}
+EOF
+)
+CHECKER_CONFIG=$(echo $checker_config_json_str | base64 -w 0)
+
+python3 -m graph_net.torch.check_model_fusability --model-path $GRAPH_NET_ROOT/../samples/$MODEL_PATH_IN_SAMPLES --checker-config=$CHECKER_CONFIG
diff --git a/graph_net/torch/check_model_fusability.py b/graph_net/torch/check_model_fusability.py
@@ -0,0 +1,63 @@
+import argparse
+from graph_net.imp_util import load_module
+import sys
+import json
+import base64
+
+
+def _load_class_from_file(file_path, class_name):
+    module = load_module(file_path)
+    return getattr(module, class_name)
+
+
+def _convert_to_dict(config_str):
+    if config_str is None:
+        return {}
+    config_str = base64.b64decode(config_str).decode("utf-8")
+    config = json.loads(config_str)
+    assert isinstance(config, dict), f"config should be a dict. {config_str=}"
+    return config
+
+
+def _get_checker(args):
+    if args.checker_config is None:
+        return lambda model_path: model_path
+    checker_config = _convert_to_dict(args.checker_config).get(
+        "post_extract_process_config"
+    )
+    checker_class = _load_class_from_file(
+        checker_config["post_extract_process_path"],
+        class_name=checker_config["post_extract_process_class_name"],
+    )
+    return checker_class(checker_config.get("checker_config", {}))
+
+
+def main(args):
+    checker = _get_checker(args)
+    model_path = args.model_path
+    print(f"{model_path=}")
+    try:
+        checker(model_path)
+    except KeyboardInterrupt:
+        sys.exit(-1)
+    except Exception as e:
+        print(e)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="load and run model")
+    parser.add_argument(
+        "--model-path",
+        type=str,
+        required=True,
+        help="Path to folder e.g '../../samples/torch/resnet18'",
+    )
+    parser.add_argument(
+        "--checker-config",
+        type=str,
+        required=False,
+        default=None,
+        help="checker configuration string",
+    )
+    args = parser.parse_args()
+    main(args=args)
diff --git a/graph_net/torch/post_extract_process_count_kernels copy.py b/graph_net/torch/post_extract_process_count_kernels copy.py
@@ -0,0 +1,93 @@
+from graph_net.torch import utils
+import importlib.util
+import torch
+from typing import Type
+from torch.profiler import profile, record_function, ProfilerActivity
+
+
+class GraphFullyFusionable:
+    def __init__(self, config):
+        self.config = config
+
+    def __call__(self, model_path=None):
+        if model_path is None:
+            return False
+        # model
+        model_class = load_class_from_file(
+            f"{model_path}/model.py", class_name="GraphModule"
+        )
+        assert model_class is not None
+        model = model_class()
+        print(f"{model_path=}")
+
+        inputs_params = utils.load_converted_from_text(f"{model_path}")
+        params = inputs_params["weight_info"]
+        state_dict = {k: utils.replay_tensor(v) for k, v in params.items()}
+
+        # try to run the model
+        try:
+            model(**state_dict)
+        except Exception as e:
+            print(f"failed in running model:{e}")
+            # print(f"removing: {model_path}")
+            # shutil.rmtree(model_path)
+            return False
+        # try to compile the model
+        try:
+            compiled_model = torch.compile(model)
+        except Exception as e:
+            print(f"failed in compiling model:{e}")
+            # print(f"removing: {model_path}")
+            # shutil.rmtree(model_path)
+            return False
+        compiled_num_of_kernels = count_kernels(compiled_model, state_dict)
+        if compiled_num_of_kernels == 1:
+            print(model_path, "can be fully integrated")
+            return True
+        else:
+            print(f"{model_path} can not be fully integrated")
+            # print(f"removing: {model_path}")
+            # shutil.rmtree(model_path)
+            return False
+
+
+def load_class_from_file(file_path: str, class_name: str) -> Type[torch.nn.Module]:
+    spec = importlib.util.spec_from_file_location("unnamed", file_path)
+    unnamed = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(unnamed)
+    model_class = getattr(unnamed, class_name, None)
+    return model_class
+
+
+def count_kernels(model, sample_inputs) -> int:
+    """
+    Count the number of CUDA kernel launches performed during a model's forward pass.
+
+    Args:
+        model(graph models)
+        sample_inputs(tensors)
+
+    Returns:
+        int: The number of kernels used.
+
+    Behavior:
+        - Runs the model once inside a PyTorch profiler context.
+        - Identifies the event with key = 'cudaLaunchKernel', which corresponds
+        to the number of CUDA kernel launches.
+    """
+    model.eval()
+    # Use PyTorch Profiler
+
+    with profile(
+        activities=[ProfilerActivity.CUDA, ProfilerActivity.CPU],
+        record_shapes=True,
+    ) as prof:
+        with record_function("model_inference"):
+            _ = model(**sample_inputs)
+    events = prof.key_averages()
+
+    total_count = 0
+    for e in events:
+        if e.key == "cuLaunchKernel" or e.key == "cudaLaunchKernel":
+            total_count += e.count
+    return total_count