|
| 1 | +from graph_net.torch import utils |
| 2 | +import importlib.util |
| 3 | +import torch |
| 4 | +from typing import Type |
| 5 | +from torch.profiler import profile, record_function, ProfilerActivity |
| 6 | + |
| 7 | + |
| 8 | +class GraphFullyFusionable: |
| 9 | + def __init__(self, config): |
| 10 | + self.config = config |
| 11 | + |
| 12 | + def __call__(self, model_path=None): |
| 13 | + if model_path is None: |
| 14 | + return False |
| 15 | + # model |
| 16 | + model_class = load_class_from_file( |
| 17 | + f"{model_path}/model.py", class_name="GraphModule" |
| 18 | + ) |
| 19 | + assert model_class is not None |
| 20 | + model = model_class() |
| 21 | + print(f"{model_path=}") |
| 22 | + |
| 23 | + inputs_params = utils.load_converted_from_text(f"{model_path}") |
| 24 | + params = inputs_params["weight_info"] |
| 25 | + state_dict = {k: utils.replay_tensor(v) for k, v in params.items()} |
| 26 | + |
| 27 | + # try to run the model |
| 28 | + try: |
| 29 | + model(**state_dict) |
| 30 | + except Exception as e: |
| 31 | + print(f"failed in running model:{e}") |
| 32 | + # print(f"removing: {model_path}") |
| 33 | + # shutil.rmtree(model_path) |
| 34 | + return False |
| 35 | + # try to compile the model |
| 36 | + try: |
| 37 | + compiled_model = torch.compile(model) |
| 38 | + except Exception as e: |
| 39 | + print(f"failed in compiling model:{e}") |
| 40 | + # print(f"removing: {model_path}") |
| 41 | + # shutil.rmtree(model_path) |
| 42 | + return False |
| 43 | + compiled_num_of_kernels = count_kernels(compiled_model, state_dict) |
| 44 | + if compiled_num_of_kernels == 1: |
| 45 | + print(model_path, "can be fully integrated") |
| 46 | + return True |
| 47 | + else: |
| 48 | + print(f"{model_path} can not be fully integrated") |
| 49 | + # print(f"removing: {model_path}") |
| 50 | + # shutil.rmtree(model_path) |
| 51 | + return False |
| 52 | + |
| 53 | + |
| 54 | +def load_class_from_file(file_path: str, class_name: str) -> Type[torch.nn.Module]: |
| 55 | + spec = importlib.util.spec_from_file_location("unnamed", file_path) |
| 56 | + unnamed = importlib.util.module_from_spec(spec) |
| 57 | + spec.loader.exec_module(unnamed) |
| 58 | + model_class = getattr(unnamed, class_name, None) |
| 59 | + return model_class |
| 60 | + |
| 61 | + |
| 62 | +def count_kernels(model, sample_inputs) -> int: |
| 63 | + """ |
| 64 | + Count the number of CUDA kernel launches performed during a model's forward pass. |
| 65 | +
|
| 66 | + Args: |
| 67 | + model(graph models) |
| 68 | + sample_inputs(tensors) |
| 69 | +
|
| 70 | + Returns: |
| 71 | + int: The number of kernels used. |
| 72 | +
|
| 73 | + Behavior: |
| 74 | + - Runs the model once inside a PyTorch profiler context. |
| 75 | + - Identifies the event with key = 'cudaLaunchKernel', which corresponds |
| 76 | + to the number of CUDA kernel launches. |
| 77 | + """ |
| 78 | + model.eval() |
| 79 | + # Use PyTorch Profiler |
| 80 | + |
| 81 | + with profile( |
| 82 | + activities=[ProfilerActivity.CUDA, ProfilerActivity.CPU], |
| 83 | + record_shapes=True, |
| 84 | + ) as prof: |
| 85 | + with record_function("model_inference"): |
| 86 | + _ = model(**sample_inputs) |
| 87 | + events = prof.key_averages() |
| 88 | + |
| 89 | + total_count = 0 |
| 90 | + for e in events: |
| 91 | + if e.key == "cuLaunchKernel" or e.key == "cudaLaunchKernel": |
| 92 | + total_count += e.count |
| 93 | + return total_count |
0 commit comments