Skip to content

Commit 2b99941

Browse files
committed
Temporally support to save the random states.
1 parent ade8bb9 commit 2b99941

File tree

6 files changed

+125
-57
lines changed

6 files changed

+125
-57
lines changed

graph_net/paddle/random_util.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import os
2+
import pickle
3+
import numpy as np
4+
import random
5+
import re
6+
import paddle
7+
8+
from graph_net.paddle import samples_util
9+
10+
11+
def set_seed(random_seed):
12+
paddle.seed(random_seed)
13+
random.seed(random_seed)
14+
np.random.seed(random_seed)
15+
16+
17+
def _extract_model_name_for_original_sample(model_path):
18+
fields = model_path.rstrip("/").split(os.sep)
19+
pattern = r"^subgraph(_\d+)?$"
20+
model_name = (
21+
f"{fields[-2]}_{fields[-1]}" if re.match(pattern, fields[-1]) else fields[-1]
22+
)
23+
return model_name
24+
25+
26+
def _extract_model_name_for_decomposed_subgraph(model_path):
27+
# Parse model name and subgraph index
28+
model_name_with_subgraph_idx = model_path.rstrip("/").split(os.sep)[-1]
29+
model_name = "_".join(model_name_with_subgraph_idx.split("_")[:-1])
30+
return model_name
31+
32+
33+
def _generate_random_state_filename(model_path):
34+
samples_dir = samples_util.get_default_samples_directory()
35+
if os.path.abspath(model_path).startswith(samples_dir):
36+
model_name = _extract_model_name_for_original_sample(model_path)
37+
else:
38+
model_name = _extract_model_name_for_decomposed_subgraph(model_path)
39+
return f"{model_name}.random_states.pkl"
40+
41+
42+
def save_random_states(model_path, output_dir, random_state_dict):
43+
filepath = os.path.join(output_dir, _generate_random_state_filename(model_path))
44+
print(f"Write to {filepath}.", flush=True)
45+
try:
46+
with open(filepath, "wb") as f:
47+
pickle.dump(random_state_dict, f)
48+
except Exception:
49+
print(f"Fail to open {filepath}.")
50+
51+
52+
def load_random_states(model_path, output_dir):
53+
filepath = os.path.join(output_dir, _generate_random_state_filename(model_path))
54+
print(f"Read from {filepath}.", flush=True)
55+
random_states = None
56+
try:
57+
with open(filepath, "rb") as f:
58+
random_states = pickle.load(f)
59+
except Exception:
60+
print(f"Fail to open {filepath}.")
61+
return random_states

graph_net/paddle/run_model.py

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,12 @@
33
import base64
44
import argparse
55
import numpy as np
6-
import random
76

87
os.environ["FLAGS_logging_pir_py_code_dir"] = "/tmp/dump"
98

109
import paddle
1110
from graph_net import imp_util
12-
from graph_net.paddle import utils
13-
14-
15-
def set_seed(random_seed):
16-
paddle.seed(random_seed)
17-
random.seed(random_seed)
18-
np.random.seed(random_seed)
11+
from graph_net.paddle import utils, random_util
1912

2013

2114
def load_class_from_file(file_path: str, class_name: str):
@@ -31,13 +24,20 @@ def get_input_dict(model_path):
3124
params = inputs_params["weight_info"]
3225
inputs = inputs_params["input_info"]
3326

34-
state_dict = {}
35-
for k, v in params.items():
36-
name = v["original_name"] if v.get("original_name", None) else k
37-
state_dict[k] = paddle.nn.parameter.Parameter(utils.replay_tensor(v), name=name)
38-
for k, v in inputs.items():
39-
state_dict[k] = utils.replay_tensor(v)
40-
return state_dict
27+
random_state_dict = {}
28+
input_dict = {}
29+
for name, meta in params.items():
30+
original_name = (
31+
meta["original_name"] if meta.get("original_name", None) else name
32+
)
33+
random_state_dict[name] = np.random.get_state()
34+
input_dict[name] = paddle.nn.parameter.Parameter(
35+
utils.replay_tensor(meta), name=original_name
36+
)
37+
for name, meta in inputs.items():
38+
random_state_dict[name] = np.random.get_state()
39+
input_dict[name] = utils.replay_tensor(meta)
40+
return input_dict, random_state_dict
4141

4242

4343
def _convert_to_dict(config_str):
@@ -71,9 +71,11 @@ def main(args):
7171
print(f"{model_path=}")
7272

7373
initalize_seed = 123
74-
set_seed(random_seed=initalize_seed)
74+
random_util.set_seed(random_seed=initalize_seed)
7575

76-
input_dict = get_input_dict(args.model_path)
76+
input_dict, random_state_dict = get_input_dict(args.model_path)
77+
output_dir = "/work/GraphNet/graph_net/test/outputs/pass_0"
78+
random_util.save_random_states(model_path, output_dir, random_state_dict)
7779
model = _get_decorator(args)(model)
7880
model(**input_dict)
7981

graph_net/paddle/test_compiler.py

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,15 @@
44
from pathlib import Path
55
import sys
66
import os
7-
from dataclasses import dataclass
8-
from contextlib import contextmanager
9-
import time
10-
import math
117
import numpy as np
12-
import random
138
import platform
149
import traceback
1510
import subprocess
1611
import re
1712

18-
from graph_net.paddle import utils
1913
from graph_net import path_utils
2014
from graph_net import test_compiler_util
21-
15+
from graph_net.paddle import utils, random_util
2216
from graph_net.paddle.backend.graph_compiler_backend import GraphCompilerBackend
2317
from graph_net.paddle.backend.cinn_backend import CinnBackend
2418
from graph_net.paddle.backend.nope_backend import NopeBackend
@@ -35,12 +29,6 @@ def get_compiler_backend(args) -> GraphCompilerBackend:
3529
return registry_backend[args.compiler]
3630

3731

38-
def set_seed(random_seed):
39-
paddle.seed(random_seed)
40-
random.seed(random_seed)
41-
np.random.seed(random_seed)
42-
43-
4432
def init_env(args):
4533
if test_compiler_util.is_gpu_device(args.device):
4634
paddle.set_flags({"FLAGS_cudnn_exhaustive_search": 1})
@@ -62,7 +50,7 @@ def get_hardward_name(args):
6250
)
6351
)
6452
)
65-
except Exception as e:
53+
except Exception:
6654
pass
6755
elif args.device == "cpu":
6856
hardware = platform.processor()
@@ -100,14 +88,25 @@ def get_model(model_path):
10088
return model_class()
10189

10290

103-
def get_input_dict(model_path):
91+
def get_input_dict(model_path, random_states_path=None):
10492
inputs_params = utils.load_converted_from_text(f"{model_path}")
10593
params = inputs_params["weight_info"]
10694
inputs = inputs_params["input_info"]
107-
10895
params.update(inputs)
109-
state_dict = {k: utils.replay_tensor(v) for k, v in params.items()}
110-
return state_dict
96+
97+
random_states = (
98+
random_util.load_random_states(model_path, random_states_path)
99+
if random_states_path
100+
else None
101+
)
102+
103+
input_dict = {}
104+
for name, meta in params.items():
105+
if random_states is not None and random_states.get(name, None) is not None:
106+
np.random.set_state(random_states[name])
107+
tensor = utils.replay_tensor(meta)
108+
input_dict[name] = tensor
109+
return input_dict
111110

112111

113112
def get_input_spec(model_path):
@@ -476,7 +475,7 @@ def main(args):
476475
assert args.device in ["cuda", "dcu", "xpu", "cpu"]
477476

478477
initalize_seed = 123
479-
set_seed(random_seed=initalize_seed)
478+
random_util.set_seed(random_seed=initalize_seed)
480479

481480
if path_utils.is_single_model_dir(args.model_path):
482481
test_single_model(args)

graph_net/paddle/test_reference_device.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,15 @@
11
import argparse
2-
import importlib.util
32
import paddle
4-
import time
5-
import numpy as np
6-
import random
73
import os
84
from pathlib import Path
95
from contextlib import redirect_stdout, redirect_stderr
106
import json
11-
import re
127
import sys
138
import traceback
149

1510
from graph_net import path_utils
1611
from graph_net import test_compiler_util
17-
from graph_net.paddle import test_compiler
12+
from graph_net.paddle import random_util, test_compiler
1813

1914

2015
def get_reference_log_path(reference_dir, model_path):
@@ -38,7 +33,9 @@ def test_single_model(args):
3833
compiler = test_compiler.get_compiler_backend(args)
3934
test_compiler.check_and_print_gpu_utilization(compiler)
4035

41-
input_dict = test_compiler.get_input_dict(args.model_path)
36+
input_dict = test_compiler.get_input_dict(
37+
args.model_path, args.random_states_path
38+
)
4239
model = test_compiler.get_model(args.model_path)
4340
model.eval()
4441

@@ -107,6 +104,7 @@ def test_multi_models(args):
107104
f"--trials {args.trials}",
108105
f"--log-prompt {args.log_prompt}",
109106
f"--seed {args.seed}",
107+
f"--random-states-path {args.random_states_path}",
110108
f"--reference-dir {args.reference_dir}",
111109
]
112110
)
@@ -130,7 +128,7 @@ def main(args):
130128
assert args.compiler in {"cinn", "nope"}
131129
assert args.device in ["cuda"]
132130

133-
test_compiler.set_seed(random_seed=args.seed)
131+
random_util.set_seed(random_seed=args.seed)
134132
test_compiler.init_env(args)
135133

136134
ref_dump_dir = Path(args.reference_dir)
@@ -191,6 +189,12 @@ def main(args):
191189
default=123,
192190
help="Random seed (default: 123)",
193191
)
192+
parser.add_argument(
193+
"--random-states-path",
194+
type=str,
195+
required=False,
196+
help="Path to random-states of model (s)",
197+
)
194198
parser.add_argument(
195199
"--reference-dir",
196200
type=str,

graph_net/paddle/test_target_device.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,13 @@
11
import argparse
2-
import importlib.util
3-
import time
4-
import numpy as np
5-
import random
62
import os
7-
from pathlib import Path
83
import json
9-
import re
104
import sys
115
import traceback
126

137
import paddle
148
from graph_net import path_utils
159
from graph_net import test_compiler_util
16-
from graph_net.paddle import test_compiler, test_reference_device
10+
from graph_net.paddle import random_util, test_compiler, test_reference_device
1711

1812

1913
def parse_config_from_reference_log(log_path):
@@ -59,15 +53,15 @@ def update_args_and_set_seed(args, model_path):
5953
vars(args)["compiler"] = config.get("compiler")
6054
vars(args)["trials"] = int(config.get("trials"))
6155
vars(args)["warmup"] = int(config.get("warmup"))
62-
test_compiler.set_seed(random_seed=int(config.get("seed")))
56+
random_util.set_seed(random_seed=int(config.get("seed")))
6357
return args
6458

6559

6660
def test_single_model(args):
6761
compiler = test_compiler.get_compiler_backend(args)
6862
test_compiler.check_and_print_gpu_utilization(compiler)
6963

70-
input_dict = test_compiler.get_input_dict(args.model_path)
64+
input_dict = test_compiler.get_input_dict(args.model_path, args.random_states_path)
7165
model = test_compiler.get_model(args.model_path)
7266
model.eval()
7367

@@ -146,6 +140,7 @@ def test_multi_models(args):
146140
f"--device {args.device}",
147141
f"--log-prompt {args.log_prompt}",
148142
f"--reference-dir {args.reference_dir}",
143+
f"--random-states-path {args.random_states_path}",
149144
]
150145
)
151146
cmd_ret = os.system(cmd)
@@ -211,5 +206,11 @@ def main(args):
211206
default=None,
212207
help="Path to samples list, each line contains a sample path",
213208
)
209+
parser.add_argument(
210+
"--random-states-path",
211+
type=str,
212+
required=False,
213+
help="Path to random-states of model (s)",
214+
)
214215
args = parser.parse_args()
215216
main(args=args)

graph_net/subgraph_decompose_and_evaluation_step.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -262,18 +262,19 @@ def run_decomposer_for_multi_models(
262262

263263

264264
def run_evaluation(
265-
framework: str, test_cmd_b64: str, samples_dir: str, log_path: str
265+
framework: str, test_cmd_b64: str, work_dir: str, log_path: str
266266
) -> int:
267267
"""Executes the test command on the batch directory."""
268268

269269
test_config = convert_b64_string_to_json(test_cmd_b64)
270270
test_module_name = test_config["test_module_name"]
271271
test_module_arguments = test_config[f"{test_module_name}_arguments"]
272-
test_module_arguments["model-path"] = samples_dir
272+
test_module_arguments["model-path"] = work_dir
273273
if test_module_name in ["test_reference_device", "test_target_device"]:
274274
test_module_arguments["reference-dir"] = os.path.join(
275-
samples_dir, "reference_device_outputs"
275+
work_dir, "reference_device_outputs"
276276
)
277+
test_module_arguments["random-states-path"] = work_dir
277278

278279
cmd = [sys.executable, "-m", f"graph_net.{framework}.{test_module_name}"] + [
279280
item
@@ -289,7 +290,7 @@ def run_evaluation(
289290
result = subprocess.run(cmd, stdout=f, stderr=f, text=True)
290291
assert (
291292
result.returncode == 0
292-
), f"[ERROR] test failed for {samples_dir}, please check the log."
293+
), f"[ERROR] test failed for {work_dir}, please check the log."
293294

294295

295296
def reconstruct_subgraph_size(split_positions: List[int]) -> List[list]:

0 commit comments

Comments
 (0)