@@ -100,14 +100,14 @@ func (ef *EngineMetricsFetcher) FetchTypedMetric(ctx context.Context, endpoint,
100100 return nil , fmt .Errorf ("metric %s is not a raw pod metric, use FetchAllTypedMetrics for complex queries" , metricName )
101101 }
102102
103- // Get raw metric name for this engine
104- rawMetricName , exists := metricDef .EngineMetricsNameMapping [engineType ]
105- if ! exists {
103+ // Get raw metric name candidates for this engine
104+ candidates , exists := metricDef .EngineMetricsNameMapping [engineType ]
105+ if ! exists || len ( candidates ) == 0 {
106106 return nil , fmt .Errorf ("metric %s not supported for engine type %s" , metricName , engineType )
107107 }
108108
109109 url := fmt .Sprintf ("http://%s/metrics" , endpoint )
110-
110+ var lastErr error
111111 // Fetch with retry logic
112112 for attempt := 0 ; attempt <= ef .config .MaxRetries ; attempt ++ {
113113 if attempt > 0 {
@@ -130,21 +130,37 @@ func (ef *EngineMetricsFetcher) FetchTypedMetric(ctx context.Context, endpoint,
130130 continue
131131 }
132132
133- // Parse the specific metric we need
134- metricValue , err := ef .parseMetricFromFamily (allMetrics , rawMetricName , metricDef )
135- if err != nil {
136- klog .V (4 ).InfoS ("Failed to parse metric from engine endpoint" ,
137- "attempt" , attempt + 1 , "identifier" , identifier , "metric" , metricName , "error" , err )
138- continue
133+ // Try each candidate until one exists and can be parsed
134+ for _ , rawMetricName := range candidates {
135+ if _ , ok := allMetrics [rawMetricName ]; ! ok {
136+ continue // skip if not present
137+ }
138+
139+ metricValue , err := ef .parseMetricFromFamily (allMetrics , rawMetricName , metricDef )
140+ if err != nil {
141+ lastErr = err
142+ klog .V (5 ).InfoS ("Failed to parse candidate metric" , "candidate" , rawMetricName , "error" , err )
143+ continue
144+ }
145+
146+ klog .V (4 ).InfoS ("Successfully fetched typed metric from engine endpoint" ,
147+ "identifier" , identifier , "metric" , metricName , "rawMetric" , rawMetricName , "value" , metricValue , "attempt" , attempt + 1 )
148+ return metricValue , nil
139149 }
140150
141- klog .V (4 ).InfoS ("Successfully fetched typed metric from engine endpoint " ,
142- "identifier " , identifier , "metric " , metricName , "value " , metricValue , "attempt" , attempt + 1 )
143- return metricValue , nil
151+ klog .V (4 ).InfoS ("Failed to find valid metric among candidates " ,
152+ "candidates " , candidates , "identifier " , identifier , "metric " , metricName )
153+ // Continue to next retry if any
144154 }
145155
146- return nil , fmt .Errorf ("failed to fetch typed metric %s from engine endpoint %s after %d attempts" ,
147- metricName , identifier , ef .config .MaxRetries + 1 )
156+ // If we get here, none of the candidates worked
157+ errMsg := "none of the candidate metric names found or parsed successfully"
158+ if lastErr != nil {
159+ errMsg += ": " + lastErr .Error ()
160+ }
161+
162+ return nil , fmt .Errorf ("failed to fetch typed metric %s from engine endpoint %s after %d attempts: %w" ,
163+ metricName , identifier , ef .config .MaxRetries + 1 , lastErr )
148164}
149165
150166// FetchAllTypedMetrics fetches all available typed metrics from an engine endpoint
@@ -215,10 +231,26 @@ func (ef *EngineMetricsFetcher) FetchAllTypedMetrics(ctx context.Context, endpoi
215231 continue
216232 }
217233
218- // Get raw metric name for this engine
219- rawMetricName , exists := metricDef .EngineMetricsNameMapping [result .EngineType ]
220- if ! exists {
221- klog .V (5 ).InfoS ("Metric not supported for engine type" , "metric" , metricName , "engine" , result .EngineType )
234+ // Get raw metric name candidates for this engine
235+ candidates , exists := metricDef .EngineMetricsNameMapping [result .EngineType ]
236+ if ! exists || len (candidates ) == 0 {
237+ klog .V (5 ).InfoS ("No raw metric names defined for metric and engine type" ,
238+ "metric" , metricName , "engine" , result .EngineType )
239+ continue
240+ }
241+
242+ // Find the first candidate that exists in allMetrics
243+ var rawMetricName string
244+ for _ , name := range candidates {
245+ if _ , ok := allMetrics [name ]; ok {
246+ rawMetricName = name
247+ break
248+ }
249+ }
250+
251+ if rawMetricName == "" {
252+ klog .V (5 ).InfoS ("None of the candidate raw metrics found in endpoint response" ,
253+ "metric" , metricName , "engine" , result .EngineType , "candidates" , candidates )
222254 continue
223255 }
224256
0 commit comments