Skip to content

Commit 2955929

Browse files
protobird-gitcopybara-github
authored andcommitted
Internal changes only
LiteRT-PiperOrigin-RevId: 840370322
1 parent a52472d commit 2955929

File tree

14 files changed

+289
-47
lines changed

14 files changed

+289
-47
lines changed

litert/c/litert_common.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,21 @@ typedef enum {
334334
kLiteRtGpuBackendOpenGl = 3, // Experimental, do not use.
335335
} LiteRtGpuBackend;
336336

337+
// GPU Wait type on synchronous execution.
338+
// Values are 1:1 mapping to GpuDelegateWaitType.
339+
typedef enum {
340+
// Wait type will be automatically determined by the delegate.
341+
kLiteRtGpuWaitTypeDefault = 0,
342+
// Blocked waiting for GPU to finish.
343+
kLiteRtGpuWaitTypePassive = 1,
344+
// Active busy-waiting for GPU to finish.
345+
kLiteRtGpuWaitTypeActive = 2,
346+
// Do not wait for GPU to finish. Relies on other synchronization ways like
347+
// barriers or in-order queue. As it's for backward compatibility, not
348+
// recommended for new use cases. Use asynchronous execution mode instead.
349+
kLiteRtGpuWaitTypeDoNotWait = 3,
350+
} LiteRtGpuWaitType;
351+
337352
// Error reporter mode enum
338353
typedef enum LiteRtErrorReporterMode {
339354
// No error reporting (errors are ignored)

litert/c/litert_event.cc

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -131,16 +131,16 @@ LiteRtStatus LiteRtCreateManagedEvent(LiteRtEnvironment env,
131131
}
132132

133133
LiteRtStatus LiteRtSetCustomEvent(LiteRtEvent event,
134-
litert_custom_event custom_event) {
134+
LiteRtCustomEvent custom_event) {
135135
#if LITERT_HAS_CUSTOM_EVENT_SUPPORT
136136
if (event->type == LiteRtEventTypeCustom) {
137137
if (event->custom_event != nullptr &&
138-
event->custom_event->release != nullptr) {
139-
event->custom_event->release(event->custom_event);
138+
event->custom_event->Release != nullptr) {
139+
event->custom_event->Release(event->custom_event);
140140
}
141141
event->custom_event = custom_event;
142-
if (custom_event && custom_event->retain != nullptr) {
143-
custom_event->retain(custom_event);
142+
if (custom_event && custom_event->Retain != nullptr) {
143+
custom_event->Retain(custom_event);
144144
}
145145
return kLiteRtStatusOk;
146146
}

litert/c/litert_event.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ LiteRtStatus LiteRtCreateManagedEvent(LiteRtEnvironment env,
6666
// Sets a custom event to the LiteRtEvent. Event type must be
6767
// LiteRtEventTypeCustom.
6868
LiteRtStatus LiteRtSetCustomEvent(LiteRtEvent event,
69-
litert_custom_event custom_event);
69+
LiteRtCustomEvent custom_event);
7070

7171
LiteRtStatus LiteRtGetEventEventType(LiteRtEvent event, LiteRtEventType* type);
7272

litert/c/litert_event_type.h

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,19 +31,19 @@ typedef enum {
3131
} LiteRtEventType;
3232

3333
// Custom events managed by the client.
34-
typedef struct litert_custom_event_t* litert_custom_event;
35-
struct litert_custom_event_t {
34+
typedef struct LiteRtCustomEventT* LiteRtCustomEvent;
35+
struct LiteRtCustomEventT {
3636
// Retains the custom event, e.g. increases the reference count.
37-
void (*retain)(litert_custom_event event);
37+
void (*Retain)(LiteRtCustomEvent event); // NOLINT
3838
// Releases the custom event, e.g. decreases the reference count.
3939
// If the reference count reaches 0, the custom event will be destroyed.
40-
void (*release)(litert_custom_event event);
41-
// Waits for the custom event to be signaled.
42-
void (*wait)(litert_custom_event event, int64_t timeout_in_ms);
43-
// Signals the custom event to notify the waiters.
44-
void (*signal)(litert_custom_event event);
40+
void (*Release)(LiteRtCustomEvent event); // NOLINT
41+
// Waits for the custom event to be signaled. How to signal the event is
42+
// backend dependent, e.g. emulating within Wait() or wrapping an actual GPU
43+
// event signaled by the device.
44+
void (*Wait)(LiteRtCustomEvent event, int64_t timeout_in_ms); // NOLINT
4545
// Returns 1 if the custom event is signaled, 0 otherwise.
46-
int (*is_signaled)(litert_custom_event event);
46+
int (*IsSignaled)(LiteRtCustomEvent event); // NOLINT
4747
};
4848

4949
#ifdef __cplusplus

litert/c/options/litert_gpu_options.cc

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@ struct LiteRtGpuOptionsPayloadT {
7171
// Added in version 2.0.2a1.
7272
// Number of steps to prepare WebGPU command buffers in advance.
7373
int num_steps_of_command_buffer_preparations = 0;
74+
// Added in version 2.0.2a1.
75+
LiteRtGpuWaitType wait_type = kLiteRtGpuWaitTypeDefault;
7476
};
7577

7678
namespace litert {
@@ -254,6 +256,14 @@ LiteRtSetGpuAcceleratorRuntimeOptionsNumStepsOfCommandBufferPreparations(
254256
return kLiteRtStatusOk;
255257
}
256258

259+
LiteRtStatus LiteRtSetGpuAcceleratorRuntimeOptionsWaitType(
260+
LiteRtOpaqueOptions gpu_accelerator_options, LiteRtGpuWaitType wait_type) {
261+
LITERT_ASSIGN_OR_RETURN(LiteRtGpuOptionsPayloadT * payload,
262+
litert::GetPayload(gpu_accelerator_options));
263+
payload->wait_type = wait_type;
264+
return kLiteRtStatusOk;
265+
}
266+
257267
const char* LiteRtGetGpuOptionsPayloadIdentifier() {
258268
return LiteRtGpuOptionsPayloadT::kIdentifier.data();
259269
}
@@ -454,3 +464,13 @@ LiteRtGetGpuAcceleratorRuntimeOptionsNumStepsOfCommandBufferPreparations(
454464
payload->num_steps_of_command_buffer_preparations;
455465
return kLiteRtStatusOk;
456466
}
467+
468+
LiteRtStatus LiteRtGetGpuAcceleratorRuntimeOptionsWaitType(
469+
LiteRtGpuWaitType* wait_type, LiteRtGpuOptionsPayload payload) {
470+
LITERT_RETURN_IF_ERROR(wait_type, ErrorStatusBuilder::InvalidArgument())
471+
<< "`wait_type` cannot be null.";
472+
LITERT_RETURN_IF_ERROR(payload, ErrorStatusBuilder::InvalidArgument())
473+
<< "`payload` cannot be null.";
474+
*wait_type = payload->wait_type;
475+
return kLiteRtStatusOk;
476+
}

litert/c/options/litert_gpu_options.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,10 @@ LiteRtSetGpuAcceleratorRuntimeOptionsNumStepsOfCommandBufferPreparations(
143143
LiteRtOpaqueOptions gpu_accelerator_options,
144144
int num_steps_of_command_buffer_preparations);
145145

146+
// Sets the wait type.
147+
LiteRtStatus LiteRtSetGpuAcceleratorRuntimeOptionsWaitType(
148+
LiteRtOpaqueOptions gpu_accelerator_options, LiteRtGpuWaitType wait_type);
149+
146150
// Declarations below this point are meant to be used by accelerator code.
147151

148152
LITERT_DEFINE_HANDLE(LiteRtGpuOptionsPayload);
@@ -214,6 +218,9 @@ LiteRtGetGpuAcceleratorRuntimeOptionsNumStepsOfCommandBufferPreparations(
214218
int* num_steps_of_command_buffer_preparations,
215219
LiteRtGpuOptionsPayload payload);
216220

221+
LiteRtStatus LiteRtGetGpuAcceleratorRuntimeOptionsWaitType(
222+
LiteRtGpuWaitType* wait_type, LiteRtGpuOptionsPayload payload);
223+
217224
#ifdef __cplusplus
218225
} // extern "C"
219226
#endif

litert/c/windows_exported_symbols.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ EXPORTS
8787
LiteRtGetGpuAcceleratorCompilationOptionsSerializeExternalTensors
8888
LiteRtGetGpuAcceleratorCompilationOptionsSerializeProgramCache
8989
LiteRtGetGpuAcceleratorRuntimeOptionsNumStepsOfCommandBufferPreparations
90+
LiteRtGetGpuAcceleratorRuntimeOptionsWaitType
9091
LiteRtGetGpuOptionsBenchmarkMode
9192
LiteRtGetGpuOptionsConstantTensorSharing
9293
LiteRtGetGpuOptionsExternalTensorsMode
@@ -249,6 +250,7 @@ EXPORTS
249250
LiteRtSetGpuAcceleratorCompilationOptionsSerializeProgramCache
250251
LiteRtSetGpuAcceleratorCompilationOptionsUseBufferStorageType
251252
LiteRtSetGpuAcceleratorRuntimeOptionsNumStepsOfCommandBufferPreparations
253+
LiteRtSetGpuAcceleratorRuntimeOptionsWaitType
252254
LiteRtSetGpuOptionsBenchmarkMode
253255
LiteRtSetGpuOptionsConstantTensorSharing
254256
LiteRtSetGpuOptionsExternalTensorsMode

litert/cc/litert_compiled_model_metal_test.mm

Lines changed: 187 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -49,16 +49,17 @@
4949

5050
@interface BasicMetalTest : NSObject
5151

52-
// Tests the model with the given external tensors mode configuration.
52+
// Tests the model with the given execution mode and external tensors mode configuration.
5353
//
54+
// @param asyncMode Whether to use async execution mode.
5455
// @param externalTensorsMode Whether to use external tensors mode.
55-
+ (void)testBasicMetalTest:(BOOL)externalTensorsMode;
56+
+ (void)testBasicMetalTest:(BOOL)asyncMode externalTensorsMode:(BOOL)externalTensorsMode;
5657

5758
@end
5859

5960
@implementation BasicMetalTest
6061

61-
+ (void)testBasicMetalTest:(BOOL)externalTensorsMode {
62+
+ (void)testBasicMetalTest:(BOOL)asyncMode externalTensorsMode:(BOOL)externalTensorsMode {
6263
LITERT_ASSERT_OK_AND_ASSIGN(auto env, litert::Environment::Create({}));
6364
XCTAssertTrue(env);
6465

@@ -89,18 +90,160 @@ + (void)testBasicMetalTest:(BOOL)externalTensorsMode {
8990
input_buffers[1].Write<float>(absl::MakeConstSpan(kTestInput1Tensor, kTestInput1Size)));
9091

9192
// Execute model.
92-
compiled_model.Run(input_buffers, output_buffers);
93+
if (asyncMode) {
94+
bool async = false;
95+
litert::Expected<void> result = compiled_model.RunAsync(input_buffers, output_buffers, async);
96+
XCTAssertTrue(result);
97+
XCTAssertTrue(async);
98+
} else {
99+
litert::Expected<void> result = compiled_model.Run(input_buffers, output_buffers);
100+
XCTAssertTrue(result);
101+
}
93102

94103
// Check model output.
95104
LITERT_ASSERT_OK_AND_ASSIGN(auto output_names, compiled_model.GetSignatureOutputNames());
96105
XCTAssertEqual(output_names.size(), 1);
97106
XCTAssertEqualObjects([NSString stringWithUTF8String:output_names.at(0).data()], @"tfl.add");
98107
XCTAssertTrue(output_buffers[0].IsMetalMemory());
108+
if (asyncMode) {
109+
XCTAssertTrue(output_buffers[0].HasEvent());
110+
litert::Expected<litert::Event> event = output_buffers[0].GetEvent();
111+
XCTAssertTrue(event);
112+
litert::Expected<bool> result = event->IsSignaled();
113+
XCTAssertTrue(result);
114+
XCTAssertFalse(*result); // Not signaled yet.
115+
}
99116
litert::TensorBuffer *output_buffer = &output_buffers.at(0);
100117
[MetalTestHelper checkTensorBufferFloatOutput:output_buffer
101118
withExpectedOutput:kTestOutputTensor
102119
withElementCount:kTestOutputSize
103120
withTolerance:kTolerance];
121+
if (asyncMode) {
122+
litert::Expected<litert::Event> event = output_buffers[0].GetEvent();
123+
XCTAssertTrue(event);
124+
litert::Expected<bool> result = event->IsSignaled();
125+
XCTAssertTrue(result);
126+
// Buffer lock above lets the event be signaled.
127+
XCTAssertTrue(*result);
128+
}
129+
}
130+
131+
@end
132+
133+
@interface MetalPipelineTest : NSObject
134+
135+
// Tests the model with the given execution mode and external tensors mode configuration.
136+
//
137+
// @param asyncMode1stModel Whether to use async execution mode for 1st model.
138+
// @param asyncMode2ndModel Whether to use async execution mode for 2nd model.
139+
// @param externalTensorsMode Whether to use external tensors mode.
140+
+ (void)testMetalPipelineTest:(BOOL)asyncMode1stModel
141+
asyncMode2ndModel:(BOOL)asyncMode2ndModel
142+
externalTensorsMode:(BOOL)externalTensorsMode;
143+
144+
@end
145+
146+
@implementation MetalPipelineTest
147+
148+
+ (void)testMetalPipelineTest:(BOOL)asyncMode1stModel
149+
asyncMode2ndModel:(BOOL)asyncMode2ndModel
150+
externalTensorsMode:(BOOL)externalTensorsMode {
151+
constexpr const float kTestOutputTensorForPipelineTest[] = {21, 42};
152+
153+
LITERT_ASSERT_OK_AND_ASSIGN(auto env, litert::Environment::Create({}));
154+
XCTAssertTrue(env);
155+
156+
NSString *modelFilePath = [MetalTestHelper pathForModelName:@"simple_model"];
157+
XCTAssertNotNil(modelFilePath);
158+
159+
LITERT_ASSERT_OK_AND_ASSIGN(auto options, CreateGpuOptions(externalTensorsMode));
160+
XCTAssertTrue(options);
161+
162+
// Create 1st model.
163+
LITERT_ASSERT_OK_AND_ASSIGN(auto compiled_model_1, litert::CompiledModel::Create(
164+
env, modelFilePath.UTF8String, options));
165+
XCTAssertEqual(compiled_model_1.GetNumSignatures(), 1);
166+
XCTAssertTrue(compiled_model_1);
167+
LITERT_ASSERT_OK_AND_ASSIGN(auto input_buffers_1, compiled_model_1.CreateInputBuffers());
168+
LITERT_ASSERT_OK_AND_ASSIGN(auto output_buffers_1, compiled_model_1.CreateOutputBuffers());
169+
170+
// Create 2nd model.
171+
LITERT_ASSERT_OK_AND_ASSIGN(auto compiled_model_2, litert::CompiledModel::Create(
172+
env, modelFilePath.UTF8String, options));
173+
XCTAssertEqual(compiled_model_2.GetNumSignatures(), 1);
174+
XCTAssertTrue(compiled_model_2);
175+
176+
// One of input buffers of 2nd model is same as output of 1st model.
177+
// Set rest of the input buffers of 2nd model same as 1st model's input
178+
// buffers.
179+
std::vector<TensorBuffer> input_buffers_2(2);
180+
LITERT_ASSERT_OK_AND_ASSIGN(input_buffers_2[0], output_buffers_1[0].Duplicate());
181+
LITERT_ASSERT_OK_AND_ASSIGN(input_buffers_2[1], input_buffers_1[1].Duplicate());
182+
183+
LITERT_ASSERT_OK_AND_ASSIGN(auto output_buffers_2, compiled_model_2.CreateOutputBuffers());
184+
185+
// Fill model inputs for 1st model.
186+
LITERT_ASSERT_OK_AND_ASSIGN(auto input_names, compiled_model_1.GetSignatureInputNames());
187+
XCTAssertEqual(input_names.size(), 2);
188+
XCTAssertEqualObjects([NSString stringWithUTF8String:input_names.at(0).data()], @"arg0");
189+
XCTAssertEqualObjects([NSString stringWithUTF8String:input_names.at(1).data()], @"arg1");
190+
XCTAssertTrue(input_buffers_1[0].IsMetalMemory());
191+
XCTAssertTrue(
192+
input_buffers_1[0].Write<float>(absl::MakeConstSpan(kTestInput0Tensor, kTestInput0Size)));
193+
XCTAssertTrue(input_buffers_1[1].IsMetalMemory());
194+
XCTAssertTrue(
195+
input_buffers_1[1].Write<float>(absl::MakeConstSpan(kTestInput1Tensor, kTestInput1Size)));
196+
197+
// Execute 1st model.
198+
if (asyncMode1stModel) {
199+
bool async = false;
200+
litert::Expected<void> result =
201+
compiled_model_1.RunAsync(input_buffers_1, output_buffers_1, async);
202+
XCTAssertTrue(result);
203+
XCTAssertTrue(async);
204+
} else {
205+
litert::Expected<void> result = compiled_model_1.Run(input_buffers_1, output_buffers_1);
206+
XCTAssertTrue(result);
207+
}
208+
209+
// Execute 2nd model.
210+
if (asyncMode2ndModel) {
211+
bool async = false;
212+
litert::Expected<void> result =
213+
compiled_model_2.RunAsync(input_buffers_2, output_buffers_2, async);
214+
XCTAssertTrue(result);
215+
XCTAssertTrue(async);
216+
} else {
217+
litert::Expected<void> result = compiled_model_2.Run(input_buffers_2, output_buffers_2);
218+
XCTAssertTrue(result);
219+
}
220+
221+
// Check 2nd model output.
222+
LITERT_ASSERT_OK_AND_ASSIGN(auto output_names, compiled_model_2.GetSignatureOutputNames());
223+
XCTAssertEqual(output_names.size(), 1);
224+
XCTAssertEqualObjects([NSString stringWithUTF8String:output_names.at(0).data()], @"tfl.add");
225+
XCTAssertTrue(output_buffers_2[0].IsMetalMemory());
226+
if (asyncMode2ndModel) {
227+
XCTAssertTrue(output_buffers_2[0].HasEvent());
228+
litert::Expected<litert::Event> event = output_buffers_2[0].GetEvent();
229+
XCTAssertTrue(event);
230+
litert::Expected<bool> result = event->IsSignaled();
231+
XCTAssertTrue(result);
232+
XCTAssertFalse(*result); // Not signaled yet.
233+
}
234+
litert::TensorBuffer *output_buffer = &output_buffers_2.at(0);
235+
[MetalTestHelper checkTensorBufferFloatOutput:output_buffer
236+
withExpectedOutput:kTestOutputTensorForPipelineTest
237+
withElementCount:kTestOutputSize
238+
withTolerance:kTolerance];
239+
if (asyncMode2ndModel) {
240+
litert::Expected<litert::Event> event = output_buffers_2[0].GetEvent();
241+
XCTAssertTrue(event);
242+
litert::Expected<bool> result = event->IsSignaled();
243+
XCTAssertTrue(result);
244+
// Buffer lock above lets the event be signaled.
245+
XCTAssertTrue(*result);
246+
}
104247
}
105248

106249
@end
@@ -111,18 +254,51 @@ @interface LitertCompiledModelMetalTest : XCTestCase
111254
@implementation LitertCompiledModelMetalTest
112255

113256
- (void)testCompiledModelGpuBasic {
114-
[BasicMetalTest testBasicMetalTest:false];
257+
[BasicMetalTest testBasicMetalTest:false externalTensorsMode:false];
115258
}
116259

117-
- (void)testCompiledModelGpuBasic2nd {
118-
// Run the test twice to verify that the GPU environment is shared between two CompiledModel
119-
// instances.
120-
[BasicMetalTest testBasicMetalTest:false];
260+
- (void)testCompiledModelGpuBasicAsync {
261+
[BasicMetalTest testBasicMetalTest:true externalTensorsMode:false];
121262
}
122263

123264
- (void)testCompiledModelGpuExternalTensorsMode {
124-
// Test the model with external tensors mode enabled.
125-
[BasicMetalTest testBasicMetalTest:true];
265+
[BasicMetalTest testBasicMetalTest:false externalTensorsMode:true];
266+
}
267+
268+
- (void)testCompiledModelGpuExternalTensorsModeAsync {
269+
[BasicMetalTest testBasicMetalTest:true externalTensorsMode:true];
270+
}
271+
272+
- (void)testCompiledModelGpuPipeline {
273+
[MetalPipelineTest testMetalPipelineTest:false asyncMode2ndModel:false externalTensorsMode:false];
274+
}
275+
276+
- (void)testCompiledModelGpuPipelineAsync1stModel {
277+
[MetalPipelineTest testMetalPipelineTest:true asyncMode2ndModel:false externalTensorsMode:false];
278+
}
279+
280+
- (void)testCompiledModelGpuPipelineAsync2ndModel {
281+
[MetalPipelineTest testMetalPipelineTest:false asyncMode2ndModel:true externalTensorsMode:false];
282+
}
283+
284+
- (void)testCompiledModelGpuPipelineAsyncBothModels {
285+
[MetalPipelineTest testMetalPipelineTest:true asyncMode2ndModel:true externalTensorsMode:false];
286+
}
287+
288+
- (void)testCompiledModelGpuPipelineExternalTensorsMode {
289+
[MetalPipelineTest testMetalPipelineTest:false asyncMode2ndModel:false externalTensorsMode:true];
290+
}
291+
292+
- (void)testCompiledModelGpuPipelineExternalTensorsModeAsync1stModel {
293+
[MetalPipelineTest testMetalPipelineTest:true asyncMode2ndModel:false externalTensorsMode:true];
294+
}
295+
296+
- (void)testCompiledModelGpuPipelineExternalTensorsModeAsync2ndModel {
297+
[MetalPipelineTest testMetalPipelineTest:false asyncMode2ndModel:true externalTensorsMode:true];
298+
}
299+
300+
- (void)testCompiledModelGpuPipelineExternalTensorsModeAsyncBothModels {
301+
[MetalPipelineTest testMetalPipelineTest:true asyncMode2ndModel:true externalTensorsMode:true];
126302
}
127303

128304
- (void)testCompiledModelGpuEnvironment {

0 commit comments

Comments
 (0)