google-ai-edge
diff --git a/‎litert/c/litert_common.h‎
Lines changed: 15 additions & 0 deletions b/‎litert/c/litert_common.h‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎litert/c/litert_event.cc‎
Lines changed: 5 additions & 5 deletions b/‎litert/c/litert_event.cc‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎litert/c/litert_event.h‎
Lines changed: 1 addition & 1 deletion b/‎litert/c/litert_event.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎litert/c/litert_event_type.h‎
Lines changed: 9 additions & 9 deletions b/‎litert/c/litert_event_type.h‎
Lines changed: 9 additions & 9 deletions
diff --git a/‎litert/c/options/litert_gpu_options.cc‎
Lines changed: 20 additions & 0 deletions b/‎litert/c/options/litert_gpu_options.cc‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎litert/c/options/litert_gpu_options.h‎
Lines changed: 7 additions & 0 deletions b/‎litert/c/options/litert_gpu_options.h‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎litert/c/windows_exported_symbols.def‎
Lines changed: 2 additions & 0 deletions b/‎litert/c/windows_exported_symbols.def‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎litert/cc/litert_compiled_model_metal_test.mm‎
Lines changed: 187 additions & 11 deletions b/‎litert/cc/litert_compiled_model_metal_test.mm‎
Lines changed: 187 additions & 11 deletions
@@ -334,6 +334,21 @@ typedef enum {
   kLiteRtGpuBackendOpenGl = 3,  // Experimental, do not use.
 } LiteRtGpuBackend;
 
+// GPU Wait type on synchronous execution.
+// Values are 1:1 mapping to GpuDelegateWaitType.
+typedef enum {
+  // Wait type will be automatically determined by the delegate.
+  kLiteRtGpuWaitTypeDefault = 0,
+  // Blocked waiting for GPU to finish.
+  kLiteRtGpuWaitTypePassive = 1,
+  // Active busy-waiting for GPU to finish.
+  kLiteRtGpuWaitTypeActive = 2,
+  // Do not wait for GPU to finish. Relies on other synchronization ways like
+  // barriers or in-order queue. As it's for backward compatibility, not
+  // recommended for new use cases. Use asynchronous execution mode instead.
+  kLiteRtGpuWaitTypeDoNotWait = 3,
+} LiteRtGpuWaitType;
+
 // Error reporter mode enum
 typedef enum LiteRtErrorReporterMode {
   // No error reporting (errors are ignored)
 
@@ -131,16 +131,16 @@ LiteRtStatus LiteRtCreateManagedEvent(LiteRtEnvironment env,
 }
 
 LiteRtStatus LiteRtSetCustomEvent(LiteRtEvent event,
-                                  litert_custom_event custom_event) {
+                                  LiteRtCustomEvent custom_event) {
 #if LITERT_HAS_CUSTOM_EVENT_SUPPORT
   if (event->type == LiteRtEventTypeCustom) {
     if (event->custom_event != nullptr &&
-        event->custom_event->release != nullptr) {
-      event->custom_event->release(event->custom_event);
+        event->custom_event->Release != nullptr) {
+      event->custom_event->Release(event->custom_event);
     }
     event->custom_event = custom_event;
-    if (custom_event && custom_event->retain != nullptr) {
-      custom_event->retain(custom_event);
+    if (custom_event && custom_event->Retain != nullptr) {
+      custom_event->Retain(custom_event);
     }
     return kLiteRtStatusOk;
   }
 
@@ -66,7 +66,7 @@ LiteRtStatus LiteRtCreateManagedEvent(LiteRtEnvironment env,
 // Sets a custom event to the LiteRtEvent. Event type must be
 // LiteRtEventTypeCustom.
 LiteRtStatus LiteRtSetCustomEvent(LiteRtEvent event,
-                                  litert_custom_event custom_event);
+                                  LiteRtCustomEvent custom_event);
 
 LiteRtStatus LiteRtGetEventEventType(LiteRtEvent event, LiteRtEventType* type);
 
 
@@ -31,19 +31,19 @@ typedef enum {
 } LiteRtEventType;
 
 // Custom events managed by the client.
-typedef struct litert_custom_event_t* litert_custom_event;
-struct litert_custom_event_t {
+typedef struct LiteRtCustomEventT* LiteRtCustomEvent;
+struct LiteRtCustomEventT {
   // Retains the custom event, e.g. increases the reference count.
-  void (*retain)(litert_custom_event event);
+  void (*Retain)(LiteRtCustomEvent event);  // NOLINT
   // Releases the custom event, e.g. decreases the reference count.
   // If the reference count reaches 0, the custom event will be destroyed.
-  void (*release)(litert_custom_event event);
-  // Waits for the custom event to be signaled.
-  void (*wait)(litert_custom_event event, int64_t timeout_in_ms);
-  // Signals the custom event to notify the waiters.
-  void (*signal)(litert_custom_event event);
+  void (*Release)(LiteRtCustomEvent event);  // NOLINT
+  // Waits for the custom event to be signaled. How to signal the event is
+  // backend dependent, e.g. emulating within Wait() or wrapping an actual GPU
+  // event signaled by the device.
+  void (*Wait)(LiteRtCustomEvent event, int64_t timeout_in_ms);  // NOLINT
   // Returns 1 if the custom event is signaled, 0 otherwise.
-  int (*is_signaled)(litert_custom_event event);
+  int (*IsSignaled)(LiteRtCustomEvent event);  // NOLINT
 };
 
 #ifdef __cplusplus
 
@@ -71,6 +71,8 @@ struct LiteRtGpuOptionsPayloadT {
   // Added in version 2.0.2a1.
   // Number of steps to prepare WebGPU command buffers in advance.
   int num_steps_of_command_buffer_preparations = 0;
+  // Added in version 2.0.2a1.
+  LiteRtGpuWaitType wait_type = kLiteRtGpuWaitTypeDefault;
 };
 
 namespace litert {
@@ -254,6 +256,14 @@ LiteRtSetGpuAcceleratorRuntimeOptionsNumStepsOfCommandBufferPreparations(
   return kLiteRtStatusOk;
 }
 
+LiteRtStatus LiteRtSetGpuAcceleratorRuntimeOptionsWaitType(
+    LiteRtOpaqueOptions gpu_accelerator_options, LiteRtGpuWaitType wait_type) {
+  LITERT_ASSIGN_OR_RETURN(LiteRtGpuOptionsPayloadT * payload,
+                          litert::GetPayload(gpu_accelerator_options));
+  payload->wait_type = wait_type;
+  return kLiteRtStatusOk;
+}
+
 const char* LiteRtGetGpuOptionsPayloadIdentifier() {
   return LiteRtGpuOptionsPayloadT::kIdentifier.data();
 }
@@ -454,3 +464,13 @@ LiteRtGetGpuAcceleratorRuntimeOptionsNumStepsOfCommandBufferPreparations(
       payload->num_steps_of_command_buffer_preparations;
   return kLiteRtStatusOk;
 }
+
+LiteRtStatus LiteRtGetGpuAcceleratorRuntimeOptionsWaitType(
+    LiteRtGpuWaitType* wait_type, LiteRtGpuOptionsPayload payload) {
+  LITERT_RETURN_IF_ERROR(wait_type, ErrorStatusBuilder::InvalidArgument())
+      << "`wait_type` cannot be null.";
+  LITERT_RETURN_IF_ERROR(payload, ErrorStatusBuilder::InvalidArgument())
+      << "`payload` cannot be null.";
+  *wait_type = payload->wait_type;
+  return kLiteRtStatusOk;
+}
@@ -143,6 +143,10 @@ LiteRtSetGpuAcceleratorRuntimeOptionsNumStepsOfCommandBufferPreparations(
     LiteRtOpaqueOptions gpu_accelerator_options,
     int num_steps_of_command_buffer_preparations);
 
+// Sets the wait type.
+LiteRtStatus LiteRtSetGpuAcceleratorRuntimeOptionsWaitType(
+    LiteRtOpaqueOptions gpu_accelerator_options, LiteRtGpuWaitType wait_type);
+
 // Declarations below this point are meant to be used by accelerator code.
 
 LITERT_DEFINE_HANDLE(LiteRtGpuOptionsPayload);
@@ -214,6 +218,9 @@ LiteRtGetGpuAcceleratorRuntimeOptionsNumStepsOfCommandBufferPreparations(
     int* num_steps_of_command_buffer_preparations,
     LiteRtGpuOptionsPayload payload);
 
+LiteRtStatus LiteRtGetGpuAcceleratorRuntimeOptionsWaitType(
+    LiteRtGpuWaitType* wait_type, LiteRtGpuOptionsPayload payload);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
 
@@ -87,6 +87,7 @@ EXPORTS
   LiteRtGetGpuAcceleratorCompilationOptionsSerializeExternalTensors
   LiteRtGetGpuAcceleratorCompilationOptionsSerializeProgramCache
   LiteRtGetGpuAcceleratorRuntimeOptionsNumStepsOfCommandBufferPreparations
+  LiteRtGetGpuAcceleratorRuntimeOptionsWaitType
   LiteRtGetGpuOptionsBenchmarkMode
   LiteRtGetGpuOptionsConstantTensorSharing
   LiteRtGetGpuOptionsExternalTensorsMode
@@ -249,6 +250,7 @@ EXPORTS
   LiteRtSetGpuAcceleratorCompilationOptionsSerializeProgramCache
   LiteRtSetGpuAcceleratorCompilationOptionsUseBufferStorageType
   LiteRtSetGpuAcceleratorRuntimeOptionsNumStepsOfCommandBufferPreparations
+  LiteRtSetGpuAcceleratorRuntimeOptionsWaitType
   LiteRtSetGpuOptionsBenchmarkMode
   LiteRtSetGpuOptionsConstantTensorSharing
   LiteRtSetGpuOptionsExternalTensorsMode
 
@@ -49,16 +49,17 @@
 
 @interface BasicMetalTest : NSObject
 
-// Tests the model with the given external tensors mode configuration.
+// Tests the model with the given execution mode and external tensors mode configuration.
 //
+// @param asyncMode Whether to use async execution mode.
 // @param externalTensorsMode Whether to use external tensors mode.
-+ (void)testBasicMetalTest:(BOOL)externalTensorsMode;
++ (void)testBasicMetalTest:(BOOL)asyncMode externalTensorsMode:(BOOL)externalTensorsMode;
 
 @end
 
 @implementation BasicMetalTest
 
-+ (void)testBasicMetalTest:(BOOL)externalTensorsMode {
++ (void)testBasicMetalTest:(BOOL)asyncMode externalTensorsMode:(BOOL)externalTensorsMode {
   LITERT_ASSERT_OK_AND_ASSIGN(auto env, litert::Environment::Create({}));
   XCTAssertTrue(env);
 
@@ -89,18 +90,160 @@ + (void)testBasicMetalTest:(BOOL)externalTensorsMode {
       input_buffers[1].Write<float>(absl::MakeConstSpan(kTestInput1Tensor, kTestInput1Size)));
 
   // Execute model.
-  compiled_model.Run(input_buffers, output_buffers);
+  if (asyncMode) {
+    bool async = false;
+    litert::Expected<void> result = compiled_model.RunAsync(input_buffers, output_buffers, async);
+    XCTAssertTrue(result);
+    XCTAssertTrue(async);
+  } else {
+    litert::Expected<void> result = compiled_model.Run(input_buffers, output_buffers);
+    XCTAssertTrue(result);
+  }
 
   // Check model output.
   LITERT_ASSERT_OK_AND_ASSIGN(auto output_names, compiled_model.GetSignatureOutputNames());
   XCTAssertEqual(output_names.size(), 1);
   XCTAssertEqualObjects([NSString stringWithUTF8String:output_names.at(0).data()], @"tfl.add");
   XCTAssertTrue(output_buffers[0].IsMetalMemory());
+  if (asyncMode) {
+    XCTAssertTrue(output_buffers[0].HasEvent());
+    litert::Expected<litert::Event> event = output_buffers[0].GetEvent();
+    XCTAssertTrue(event);
+    litert::Expected<bool> result = event->IsSignaled();
+    XCTAssertTrue(result);
+    XCTAssertFalse(*result);  // Not signaled yet.
+  }
   litert::TensorBuffer *output_buffer = &output_buffers.at(0);
   [MetalTestHelper checkTensorBufferFloatOutput:output_buffer
                              withExpectedOutput:kTestOutputTensor
                                withElementCount:kTestOutputSize
                                   withTolerance:kTolerance];
+  if (asyncMode) {
+    litert::Expected<litert::Event> event = output_buffers[0].GetEvent();
+    XCTAssertTrue(event);
+    litert::Expected<bool> result = event->IsSignaled();
+    XCTAssertTrue(result);
+    // Buffer lock above lets the event be signaled.
+    XCTAssertTrue(*result);
+  }
+}
+
+@end
+
+@interface MetalPipelineTest : NSObject
+
+// Tests the model with the given execution mode and external tensors mode configuration.
+//
+// @param asyncMode1stModel Whether to use async execution mode for 1st model.
+// @param asyncMode2ndModel Whether to use async execution mode for 2nd model.
+// @param externalTensorsMode Whether to use external tensors mode.
++ (void)testMetalPipelineTest:(BOOL)asyncMode1stModel
+            asyncMode2ndModel:(BOOL)asyncMode2ndModel
+          externalTensorsMode:(BOOL)externalTensorsMode;
+
+@end
+
+@implementation MetalPipelineTest
+
++ (void)testMetalPipelineTest:(BOOL)asyncMode1stModel
+            asyncMode2ndModel:(BOOL)asyncMode2ndModel
+          externalTensorsMode:(BOOL)externalTensorsMode {
+  constexpr const float kTestOutputTensorForPipelineTest[] = {21, 42};
+
+  LITERT_ASSERT_OK_AND_ASSIGN(auto env, litert::Environment::Create({}));
+  XCTAssertTrue(env);
+
+  NSString *modelFilePath = [MetalTestHelper pathForModelName:@"simple_model"];
+  XCTAssertNotNil(modelFilePath);
+
+  LITERT_ASSERT_OK_AND_ASSIGN(auto options, CreateGpuOptions(externalTensorsMode));
+  XCTAssertTrue(options);
+
+  // Create 1st model.
+  LITERT_ASSERT_OK_AND_ASSIGN(auto compiled_model_1, litert::CompiledModel::Create(
+      env, modelFilePath.UTF8String, options));
+  XCTAssertEqual(compiled_model_1.GetNumSignatures(), 1);
+  XCTAssertTrue(compiled_model_1);
+  LITERT_ASSERT_OK_AND_ASSIGN(auto input_buffers_1, compiled_model_1.CreateInputBuffers());
+  LITERT_ASSERT_OK_AND_ASSIGN(auto output_buffers_1, compiled_model_1.CreateOutputBuffers());
+
+  // Create 2nd model.
+  LITERT_ASSERT_OK_AND_ASSIGN(auto compiled_model_2, litert::CompiledModel::Create(
+      env, modelFilePath.UTF8String, options));
+  XCTAssertEqual(compiled_model_2.GetNumSignatures(), 1);
+  XCTAssertTrue(compiled_model_2);
+
+  // One of input buffers of 2nd model is same as output of 1st model.
+  // Set rest of the input buffers of 2nd model same as 1st model's input
+  // buffers.
+  std::vector<TensorBuffer> input_buffers_2(2);
+  LITERT_ASSERT_OK_AND_ASSIGN(input_buffers_2[0], output_buffers_1[0].Duplicate());
+  LITERT_ASSERT_OK_AND_ASSIGN(input_buffers_2[1], input_buffers_1[1].Duplicate());
+
+  LITERT_ASSERT_OK_AND_ASSIGN(auto output_buffers_2, compiled_model_2.CreateOutputBuffers());
+
+  // Fill model inputs for 1st model.
+  LITERT_ASSERT_OK_AND_ASSIGN(auto input_names, compiled_model_1.GetSignatureInputNames());
+  XCTAssertEqual(input_names.size(), 2);
+  XCTAssertEqualObjects([NSString stringWithUTF8String:input_names.at(0).data()], @"arg0");
+  XCTAssertEqualObjects([NSString stringWithUTF8String:input_names.at(1).data()], @"arg1");
+  XCTAssertTrue(input_buffers_1[0].IsMetalMemory());
+  XCTAssertTrue(
+      input_buffers_1[0].Write<float>(absl::MakeConstSpan(kTestInput0Tensor, kTestInput0Size)));
+  XCTAssertTrue(input_buffers_1[1].IsMetalMemory());
+  XCTAssertTrue(
+      input_buffers_1[1].Write<float>(absl::MakeConstSpan(kTestInput1Tensor, kTestInput1Size)));
+
+  // Execute 1st model.
+  if (asyncMode1stModel) {
+    bool async = false;
+    litert::Expected<void> result =
+        compiled_model_1.RunAsync(input_buffers_1, output_buffers_1, async);
+    XCTAssertTrue(result);
+    XCTAssertTrue(async);
+  } else {
+    litert::Expected<void> result = compiled_model_1.Run(input_buffers_1, output_buffers_1);
+    XCTAssertTrue(result);
+  }
+
+  // Execute 2nd model.
+  if (asyncMode2ndModel) {
+    bool async = false;
+    litert::Expected<void> result =
+        compiled_model_2.RunAsync(input_buffers_2, output_buffers_2, async);
+    XCTAssertTrue(result);
+    XCTAssertTrue(async);
+  } else {
+    litert::Expected<void> result = compiled_model_2.Run(input_buffers_2, output_buffers_2);
+    XCTAssertTrue(result);
+  }
+
+  // Check 2nd model output.
+  LITERT_ASSERT_OK_AND_ASSIGN(auto output_names, compiled_model_2.GetSignatureOutputNames());
+  XCTAssertEqual(output_names.size(), 1);
+  XCTAssertEqualObjects([NSString stringWithUTF8String:output_names.at(0).data()], @"tfl.add");
+  XCTAssertTrue(output_buffers_2[0].IsMetalMemory());
+  if (asyncMode2ndModel) {
+    XCTAssertTrue(output_buffers_2[0].HasEvent());
+    litert::Expected<litert::Event> event = output_buffers_2[0].GetEvent();
+    XCTAssertTrue(event);
+    litert::Expected<bool> result = event->IsSignaled();
+    XCTAssertTrue(result);
+    XCTAssertFalse(*result);  // Not signaled yet.
+  }
+  litert::TensorBuffer *output_buffer = &output_buffers_2.at(0);
+  [MetalTestHelper checkTensorBufferFloatOutput:output_buffer
+                             withExpectedOutput:kTestOutputTensorForPipelineTest
+                               withElementCount:kTestOutputSize
+                                  withTolerance:kTolerance];
+  if (asyncMode2ndModel) {
+    litert::Expected<litert::Event> event = output_buffers_2[0].GetEvent();
+    XCTAssertTrue(event);
+    litert::Expected<bool> result = event->IsSignaled();
+    XCTAssertTrue(result);
+    // Buffer lock above lets the event be signaled.
+    XCTAssertTrue(*result);
+  }
 }
 
 @end
@@ -111,18 +254,51 @@ @interface LitertCompiledModelMetalTest : XCTestCase
 @implementation LitertCompiledModelMetalTest
 
 - (void)testCompiledModelGpuBasic {
-  [BasicMetalTest testBasicMetalTest:false];
+  [BasicMetalTest testBasicMetalTest:false externalTensorsMode:false];
 }
 
-- (void)testCompiledModelGpuBasic2nd {
-  // Run the test twice to verify that the GPU environment is shared between two CompiledModel
-  // instances.
-  [BasicMetalTest testBasicMetalTest:false];
+- (void)testCompiledModelGpuBasicAsync {
+  [BasicMetalTest testBasicMetalTest:true externalTensorsMode:false];
 }
 
 - (void)testCompiledModelGpuExternalTensorsMode {
-  // Test the model with external tensors mode enabled.
-  [BasicMetalTest testBasicMetalTest:true];
+  [BasicMetalTest testBasicMetalTest:false externalTensorsMode:true];
+}
+
+- (void)testCompiledModelGpuExternalTensorsModeAsync {
+  [BasicMetalTest testBasicMetalTest:true externalTensorsMode:true];
+}
+
+- (void)testCompiledModelGpuPipeline {
+  [MetalPipelineTest testMetalPipelineTest:false asyncMode2ndModel:false externalTensorsMode:false];
+}
+
+- (void)testCompiledModelGpuPipelineAsync1stModel {
+  [MetalPipelineTest testMetalPipelineTest:true asyncMode2ndModel:false externalTensorsMode:false];
+}
+
+- (void)testCompiledModelGpuPipelineAsync2ndModel {
+  [MetalPipelineTest testMetalPipelineTest:false asyncMode2ndModel:true externalTensorsMode:false];
+}
+
+- (void)testCompiledModelGpuPipelineAsyncBothModels {
+  [MetalPipelineTest testMetalPipelineTest:true asyncMode2ndModel:true externalTensorsMode:false];
+}
+
+- (void)testCompiledModelGpuPipelineExternalTensorsMode {
+  [MetalPipelineTest testMetalPipelineTest:false asyncMode2ndModel:false externalTensorsMode:true];
+}
+
+- (void)testCompiledModelGpuPipelineExternalTensorsModeAsync1stModel {
+  [MetalPipelineTest testMetalPipelineTest:true asyncMode2ndModel:false externalTensorsMode:true];
+}
+
+- (void)testCompiledModelGpuPipelineExternalTensorsModeAsync2ndModel {
+  [MetalPipelineTest testMetalPipelineTest:false asyncMode2ndModel:true externalTensorsMode:true];
+}
+
+- (void)testCompiledModelGpuPipelineExternalTensorsModeAsyncBothModels {
+  [MetalPipelineTest testMetalPipelineTest:true asyncMode2ndModel:true externalTensorsMode:true];
 }
 
 - (void)testCompiledModelGpuEnvironment {
Original file line number	Diff line number	Diff line change
`@@ -131,16 +131,16 @@ LiteRtStatus LiteRtCreateManagedEvent(LiteRtEnvironment env,`
`131`	`131`	`}`
`132`	`132`
`133`	`133`	`LiteRtStatus LiteRtSetCustomEvent(LiteRtEvent event,`
`134`		`- litert_custom_event custom_event) {`
	`134`	`+ LiteRtCustomEvent custom_event) {`
`135`	`135`	`#if LITERT_HAS_CUSTOM_EVENT_SUPPORT`
`136`	`136`	`if (event->type == LiteRtEventTypeCustom) {`
`137`	`137`	`if (event->custom_event != nullptr &&`
`138`		`- event->custom_event->release != nullptr) {`
`139`		`- event->custom_event->release(event->custom_event);`
	`138`	`+ event->custom_event->Release != nullptr) {`
	`139`	`+ event->custom_event->Release(event->custom_event);`
`140`	`140`	`}`
`141`	`141`	`event->custom_event = custom_event;`
`142`		`- if (custom_event && custom_event->retain != nullptr) {`
`143`		`- custom_event->retain(custom_event);`
	`142`	`+ if (custom_event && custom_event->Retain != nullptr) {`
	`143`	`+ custom_event->Retain(custom_event);`
`144`	`144`	`}`
`145`	`145`	`return kLiteRtStatusOk;`
`146`	`146`	`}`