Add CoreML backend options with compute unit configuration (#18369)

metascroy · facebook-github-bot · commit ae0d0ef7c11b · 2026-03-23T16:06:04.000-07:00
Summary:

This diff adds type-safe backend options support to the CoreML delegate, allowing users to configure compute units (CPU, GPU, Neural Engine) at model load time using the new `LoadBackendOptionsMap` infrastructure.

Key changes:
- Added `LoadOptionsBuilder` class in `coreml_backend_options.h` providing a fluent API for setting CoreML options with compile-time type safety
- `ComputeUnit` enum nested inside the builder for type-safe compute unit selection (CPU_ONLY, CPU_AND_GPU, CPU_AND_NE, ALL)
- Integrated runtime spec retrieval in `backend_delegate.mm` to read `compute_unit` option from `BackendInitContext`
- Added comprehensive unit tests for the new options builder

Example usage:
```cpp
using executorch::backends::coreml::LoadOptionsBuilder;

LoadOptionsBuilder coreml_opts;
coreml_opts.setComputeUnit(LoadOptionsBuilder::ComputeUnit::CPU_AND_GPU);

LoadBackendOptionsMap map;
map.set_options(coreml_opts);

module.load(method_name, map);
```

Differential Revision: D92358632
diff --git a/backends/apple/coreml/BUCK b/backends/apple/coreml/BUCK
@@ -89,6 +89,7 @@ runtime.cxx_library(
     platforms = [APPLE],
     visibility = ["PUBLIC"],
     deps = [
+        "//executorch/runtime/backend:backend_options",
         "//executorch/runtime/backend:interface",
         "//executorch/runtime/core:core",
         "//executorch/runtime/kernel:kernel_includes",
@@ -133,6 +134,32 @@ _PROTOS = [
     "WordTagger",
 ]
 
+runtime.cxx_test(
+    name = "coreml_backend_options_test",
+    srcs = [
+        "runtime/test/coreml_backend_options_test.cpp",
+    ],
+    deps = [
+        ":coreml_backend_options",
+        "//executorch/runtime/backend:backend_options",
+        "//executorch/runtime/backend:backend_options_map",
+        "//executorch/runtime/core:core",
+    ],
+)
+
+# Header-only library for CoreML backend options
+runtime.cxx_library(
+    name = "coreml_backend_options",
+    exported_headers = [
+        "runtime/include/coreml_backend/coreml_backend_options.h",
+    ],
+    header_namespace = "executorch/backends/apple/coreml",
+    visibility = ["PUBLIC"],
+    exported_deps = [
+        "//executorch/runtime/backend:backend_options",
+    ],
+)
+
 runtime.cxx_library(
     name = "proto",
     srcs = [
diff --git a/backends/apple/coreml/TARGETS b/backends/apple/coreml/TARGETS
@@ -133,3 +133,29 @@ runtime.python_test(
         "fbsource//third-party/pypi/scikit-learn:scikit-learn",
     ],
 )
+
+# Header-only library for CoreML backend options
+runtime.cxx_library(
+    name = "coreml_backend_options",
+    exported_headers = [
+        "runtime/include/coreml_backend/coreml_backend_options.h",
+    ],
+    header_namespace = "executorch/backends/apple/coreml",
+    visibility = ["PUBLIC"],
+    exported_deps = [
+        "//executorch/runtime/backend:backend_options",
+    ],
+)
+
+runtime.cxx_test(
+    name = "coreml_backend_options_test",
+    srcs = [
+        "runtime/test/coreml_backend_options_test.cpp",
+    ],
+    deps = [
+        ":coreml_backend_options",
+        "//executorch/runtime/backend:backend_options",
+        "//executorch/runtime/backend:backend_options_map",
+        "//executorch/runtime/core:core",
+    ],
+)
diff --git a/backends/apple/coreml/runtime/delegate/backend_delegate.mm b/backends/apple/coreml/runtime/delegate/backend_delegate.mm
@@ -19,30 +19,49 @@
 namespace  {
 using namespace executorchcoreml;
 
-MLComputeUnits get_compute_units(const Buffer& buffer) {
+std::optional<MLComputeUnits> get_compute_units(const Buffer& buffer) {
     std::string value(reinterpret_cast<const char *>(buffer.data()), buffer.size());
     if (value == std::string(ETCoreMLStrings.cpuComputeUnitName.UTF8String)) {
         return MLComputeUnitsCPUOnly;
     } else if (value == std::string(ETCoreMLStrings.cpuAndGpuComputeUnitsName.UTF8String)) {
         return MLComputeUnitsCPUAndGPU;
     } else if (value == std::string(ETCoreMLStrings.cpuAndNeuralEngineComputeUnitsName.UTF8String)) {
         return MLComputeUnitsCPUAndNeuralEngine;
-    } else {
+    } else if (value == std::string(ETCoreMLStrings.allComputeUnitsName.UTF8String)) {
         return MLComputeUnitsAll;
+    } else {
+        return std::nullopt;
     }
 }
 
-MLModelConfiguration *get_model_configuration(const std::unordered_map<std::string, Buffer>& specs) {
+MLModelConfiguration * _Nullable get_model_configuration(const std::unordered_map<std::string, Buffer>& specs,
+                                                         NSError * __autoreleasing *error) {
     std::string compute_units_key(ETCoreMLStrings.computeUnitsKeyName.UTF8String);
     MLModelConfiguration *configuration = [[MLModelConfiguration alloc] init];
-    
+
     for (const auto& [key, buffer] : specs) {
         if (key == compute_units_key) {
-            configuration.computeUnits = get_compute_units(buffer);
+            auto compute_units = get_compute_units(buffer);
+            if (!compute_units.has_value()) {
+                std::string value(reinterpret_cast<const char *>(buffer.data()), buffer.size());
+                NSString *errorMessage = [NSString stringWithFormat:@"Invalid compute_unit value: '%s'. Valid values are: %@, %@, %@, %@",
+                    value.c_str(),
+                    ETCoreMLStrings.cpuComputeUnitName,
+                    ETCoreMLStrings.cpuAndGpuComputeUnitsName,
+                    ETCoreMLStrings.cpuAndNeuralEngineComputeUnitsName,
+                    ETCoreMLStrings.allComputeUnitsName];
+                if (error) {
+                    *error = [NSError errorWithDomain:ETCoreMLStrings.productIdentifier
+                                                 code:-1
+                                             userInfo:@{NSLocalizedDescriptionKey: errorMessage}];
+                }
+                return nil;
+            }
+            configuration.computeUnits = compute_units.value();
             break;
         }
     }
-    
+
     return configuration;
 }
 
@@ -112,15 +131,15 @@ - (instancetype)initWithConfig:(BackendDelegate::Config)config {
         _config = std::move(config);
         _syncQueue = dispatch_queue_create("com.executorchcoreml.modelmanagerdelegate.sync", DISPATCH_QUEUE_SERIAL_WITH_AUTORELEASE_POOL);
     }
-    
+
     return self;
 }
 
 - (BOOL)_loadAndReturnError:(NSError * _Nullable __autoreleasing *)error {
     if (self.impl != nil) {
         return YES;
     }
-    
+
     ETCoreMLAssetManager *assetManager = create_asset_manager(ETCoreMLStrings.assetsDirectoryPath,
                                                               ETCoreMLStrings.trashDirectoryPath,
                                                               ETCoreMLStrings.databaseDirectoryPath,
@@ -130,14 +149,14 @@ - (BOOL)_loadAndReturnError:(NSError * _Nullable __autoreleasing *)error {
     if (!assetManager) {
         return NO;
     }
-    
+
     ETCoreMLModelManager *modelManager = [[ETCoreMLModelManager alloc] initWithAssetManager:assetManager];
     if (!modelManager) {
         return NO;
     }
-    
+
     self.impl = modelManager;
-    
+
     if (self.config.should_prewarm_asset) {
         [modelManager prewarmRecentlyUsedAssetsWithMaxCount:1];
     }
@@ -151,11 +170,11 @@ - (BOOL)loadAndReturnError:(NSError * _Nullable __autoreleasing *)error {
     dispatch_sync(self.syncQueue, ^{
         result = [self _loadAndReturnError:&localError];
     });
-    
+
     if (error) {
         *error = localError;
     }
-    
+
     return result;
 }
 
@@ -183,7 +202,7 @@ - (ModelHandle*)loadModelFromAOTData:(NSData*)data
     if (![self loadAndReturnError:error]) {
         return nil;
     }
-    
+
     auto handle = [self.impl loadModelFromAOTData:data
                                     configuration:configuration
                                        methodName:methodName
@@ -223,15 +242,15 @@ - (BOOL)purgeModelsCacheAndReturnError:(NSError * _Nullable __autoreleasing *)er
     if (![self loadAndReturnError:error]) {
         return NO;
     }
-    
+
     return [self.impl purgeModelsCacheAndReturnError:error];;
 }
 
 - (BOOL)isAvailable {
     if (![self loadAndReturnError:nil]) {
         return NO;
     }
-    
+
     return YES;
 }
 
@@ -267,20 +286,24 @@ explicit BackendDelegateImpl(const Config& config) noexcept
     {
         [model_manager_ loadAsynchronously];
     }
-    
+
     BackendDelegateImpl(BackendDelegateImpl const&) = delete;
     BackendDelegateImpl& operator=(BackendDelegateImpl const&) = delete;
-    
+
 Handle *init(Buffer processed,
                      const std::unordered_map<std::string, Buffer>& specs,
                      const char* method_name = nullptr,
                      const char* function_name = nullptr) const noexcept override {
         NSError *localError = nil;
-        MLModelConfiguration *configuration = get_model_configuration(specs);
-        
+        MLModelConfiguration *configuration = get_model_configuration(specs, &localError);
+        if (configuration == nil) {
+            ETCoreMLLogError(localError, "Invalid model configuration");
+            return nullptr;
+        }
+
         NSString *methodNameStr = method_name ? @(method_name) : nil;
         NSString *functionNameStr = function_name ? @(function_name) : nil;
-        
+
         NSData *data = [NSData dataWithBytesNoCopy:const_cast<void *>(processed.data())
                                             length:processed.size()
                                       freeWhenDone:NO];
@@ -294,7 +317,7 @@ explicit BackendDelegateImpl(const Config& config) noexcept
         }
         return modelHandle;
     }
-    
+
     bool execute(Handle* handle,
                  std::vector<MultiArray>& args,
                  const ModelLoggingOptions& logging_options,
@@ -309,36 +332,36 @@ bool execute(Handle* handle,
             if (localError != nil) {
                 ETCoreMLLogError(localError, "Model execution failed");
                 ec = static_cast<ErrorCode>(localError.code);
-            }                                    
+            }
             return false;
         }
-        
+
         return true;
     }
-    
+
     bool is_valid_handle(Handle* handle) const noexcept override {
         return [model_manager_ modelWithHandle:handle] != nil;
     }
-    
+
     bool is_available() const noexcept override {
         return static_cast<bool>(model_manager_.isAvailable);
     }
-    
+
     std::pair<size_t, size_t> get_num_arguments(Handle* handle) const noexcept override {
         ETCoreMLModel *model = [model_manager_ modelWithHandle:handle];
         return {model.orderedInputNames.count, model.orderedOutputNames.count};
     }
-    
+
     void destroy(Handle* handle) const noexcept override {
         [model_manager_ unloadModelWithHandle:handle];
     }
-    
+
     bool purge_models_cache() const noexcept override {
         NSError *localError = nil;
         bool result = static_cast<bool>([model_manager_ purgeModelsCacheAndReturnError:&localError]);
         return result;
     }
-    
+
     ETCoreMLModelManagerDelegate *model_manager_;
     Config config_;
 };
diff --git a/backends/apple/coreml/runtime/delegate/coreml_backend_delegate.mm b/backends/apple/coreml/runtime/delegate/coreml_backend_delegate.mm
@@ -263,6 +263,24 @@ ModelLoggingOptions get_logging_options(BackendExecutionContext& context) {
         specs_map.emplace(spec.key, std::move(buffer));
     }
 
+    // Check RuntimeSpec for compute_unit override.
+    // RuntimeSpec takes precedence over CompileSpec for load-time configuration.
+    std::string runtime_compute_unit_value;
+    auto runtime_specs = context.runtime_specs();
+    if (runtime_specs.size() > 0) {
+        auto compute_unit_result = context.get_runtime_spec<const char*>("compute_unit");
+        if (compute_unit_result.ok()) {
+            runtime_compute_unit_value = compute_unit_result.get();
+            ET_LOG(Debug, "%s: Using compute_unit from RuntimeSpec: %s",
+                   ETCoreMLStrings.delegateIdentifier.UTF8String,
+                   runtime_compute_unit_value.c_str());
+            // Override the compile spec with runtime spec value
+            std::string compute_units_key(ETCoreMLStrings.computeUnitsKeyName.UTF8String);
+            auto buffer = Buffer(runtime_compute_unit_value.data(), runtime_compute_unit_value.size());
+            specs_map.insert_or_assign(compute_units_key, std::move(buffer));
+        }
+    }
+
     // This will hold the NamedDataStore data if needed, keeping it alive until scope exit
     std::optional<FreeableBuffer> namedDataStoreBuffer;
     Buffer buffer(nullptr, 0);
diff --git a/backends/apple/coreml/runtime/include/coreml_backend/coreml_backend_options.h b/backends/apple/coreml/runtime/include/coreml_backend/coreml_backend_options.h
diff --git a/backends/apple/coreml/runtime/test/coreml_backend_options_test.cpp b/backends/apple/coreml/runtime/test/coreml_backend_options_test.cpp