umaf/monitor.py at main · MillionthOdin16/umaf · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
"""
Monitoring module for the UMAF Capability Extractor.

This module provides monitoring and logging functionality for the capability extractor.
"""

from typing import Dict, Any, List, Optional, Union
import time
import json
import os
from pathlib import Path
import numpy as np


class CapabilityExtractionMonitor:
    """
    Monitor for capability extraction.

    Tracks and logs metrics during training and evaluation.
    """

    def __init__(self, log_dir: Optional[str] = None):
        """
        Initialize monitor.

        Args:
            log_dir (Optional[str]): Directory to save logs
        """
        self.metrics = {
            'epoch': [],
            'train_loss': [],
            'val_loss': [],
            'test_loss': [],
            'learning_rate': [],
            'epoch_time': [],
            'fingerprint_clustering': [],
            'capability_transfer_performance': [],
            'representational_similarity': []
        }

        self.log_dir = log_dir
        if log_dir is not None:
            os.makedirs(log_dir, exist_ok=True)

        self.start_time = time.time()

    def log_metrics(self, **kwargs):
        """
        Log metrics.

        Args:
            **kwargs: Metrics to log
        """
        for key, value in kwargs.items():
            if key in self.metrics:
                if value is not None:
                    self.metrics[key].append(value)

        # Save metrics to file if log_dir is provided
        if self.log_dir is not None:
            self._save_metrics()

    def _save_metrics(self):
        """Save metrics to file."""
        metrics_file = os.path.join(self.log_dir, 'metrics.json')
        with open(metrics_file, 'w') as f:
            json.dump(self.metrics, f, indent=2)

    def generate_report(self) -> Dict[str, Dict[str, float]]:
        """
        Generate a report of metrics.

        Returns:
            Dict[str, Dict[str, float]]: Report of metrics
        """
        report = {}

        for key, values in self.metrics.items():
            if len(values) > 0:
                report[key] = {
                    'mean': float(np.mean(values)),
                    'std': float(np.std(values)),
                    'min': float(np.min(values)),
                    'max': float(np.max(values)),
                    'last': float(values[-1])
                }

        # Add total training time
        total_time = time.time() - self.start_time
        report['total_time'] = {
            'mean': total_time,
            'std': 0.0,
            'min': total_time,
            'max': total_time,
            'last': total_time
        }

        # Save report to file if log_dir is provided
        if self.log_dir is not None:
            report_file = os.path.join(self.log_dir, 'report.json')
            with open(report_file, 'w') as f:
                json.dump(report, f, indent=2)

        return report

    def plot_metrics(self, save_dir: Optional[str] = None):
        """
        Plot metrics.

        Args:
            save_dir (Optional[str]): Directory to save plots
        """
        try:
            import matplotlib.pyplot as plt

            # Create save directory if provided
            if save_dir is not None:
                os.makedirs(save_dir, exist_ok=True)

            # Plot training and validation loss
            if len(self.metrics['train_loss']) > 0:
                plt.figure(figsize=(10, 6))
                plt.plot(self.metrics['epoch'], self.metrics['train_loss'], label='Train Loss')
                if len(self.metrics['val_loss']) > 0:
                    plt.plot(self.metrics['epoch'], self.metrics['val_loss'], label='Val Loss')
                plt.xlabel('Epoch')
                plt.ylabel('Loss')
                plt.title('Training and Validation Loss')
                plt.legend()
                plt.grid(True)

                if save_dir is not None:
                    plt.savefig(os.path.join(save_dir, 'loss.png'))
                else:
                    plt.show()

            # Plot learning rate
            if len(self.metrics['learning_rate']) > 0:
                plt.figure(figsize=(10, 6))
                plt.plot(self.metrics['epoch'], self.metrics['learning_rate'])
                plt.xlabel('Epoch')
                plt.ylabel('Learning Rate')
                plt.title('Learning Rate Schedule')
                plt.grid(True)

                if save_dir is not None:
                    plt.savefig(os.path.join(save_dir, 'learning_rate.png'))
                else:
                    plt.show()

            # Plot epoch time
            if len(self.metrics['epoch_time']) > 0:
                plt.figure(figsize=(10, 6))
                plt.plot(self.metrics['epoch'], self.metrics['epoch_time'])
                plt.xlabel('Epoch')
                plt.ylabel('Time (s)')
                plt.title('Epoch Time')
                plt.grid(True)

                if save_dir is not None:
                    plt.savefig(os.path.join(save_dir, 'epoch_time.png'))
                else:
                    plt.show()

            # Plot fingerprint clustering quality
            if len(self.metrics['fingerprint_clustering']) > 0:
                plt.figure(figsize=(10, 6))
                plt.plot(self.metrics['fingerprint_clustering'])
                plt.xlabel('Evaluation')
                plt.ylabel('Silhouette Score')
                plt.title('Fingerprint Clustering Quality')
                plt.grid(True)

                if save_dir is not None:
                    plt.savefig(os.path.join(save_dir, 'fingerprint_clustering.png'))
                else:
                    plt.show()

            # Plot capability transfer performance
            if len(self.metrics['capability_transfer_performance']) > 0:
                plt.figure(figsize=(10, 6))
                plt.plot(self.metrics['capability_transfer_performance'])
                plt.xlabel('Evaluation')
                plt.ylabel('Performance Improvement (%)')
                plt.title('Capability Transfer Performance')
                plt.grid(True)

                if save_dir is not None:
                    plt.savefig(os.path.join(save_dir, 'capability_transfer_performance.png'))
                else:
                    plt.show()

            # Plot representational similarity
            if len(self.metrics['representational_similarity']) > 0:
                plt.figure(figsize=(10, 6))
                plt.plot(self.metrics['representational_similarity'])
                plt.xlabel('Evaluation')
                plt.ylabel('Correlation')
                plt.title('Representational Similarity')
                plt.grid(True)

                if save_dir is not None:
                    plt.savefig(os.path.join(save_dir, 'representational_similarity.png'))
                else:
                    plt.show()

        except ImportError:
            print("matplotlib not available, skipping plots")

    def log_model_comparison(
        self,
        model_names: List[str],
        fingerprints: List[torch.Tensor],
        task_performances: List[float],
        similarity_metric: Optional[Callable] = None
    ):
        """
        Log model comparison.

        Args:
            model_names (List[str]): Names of models
            fingerprints (List[torch.Tensor]): Capability fingerprints
            task_performances (List[float]): Task performances
            similarity_metric (Optional[Callable]): Similarity metric
        """
        try:
            import matplotlib.pyplot as plt
            import seaborn as sns
            import pandas as pd

            # Compute similarity matrix
            n_models = len(model_names)
            similarity_matrix = np.zeros((n_models, n_models))

            for i in range(n_models):
                for j in range(n_models):
                    if similarity_metric is not None:
                        similarity_matrix[i, j] = similarity_metric(fingerprints[i], fingerprints[j])
                    else:
                        # Default to cosine similarity
                        similarity_matrix[i, j] = F.cosine_similarity(
                            fingerprints[i].unsqueeze(0),
                            fingerprints[j].unsqueeze(0),
                            dim=1
                        ).item()

            # Create DataFrame
            df = pd.DataFrame(similarity_matrix, index=model_names, columns=model_names)

            # Plot heatmap
            plt.figure(figsize=(12, 10))
            sns.heatmap(df, annot=True, cmap='viridis', vmin=0, vmax=1)
            plt.title('Model Similarity Matrix')

            if self.log_dir is not None:
                plt.savefig(os.path.join(self.log_dir, 'model_similarity.png'))
            else:
                plt.show()

            # Plot task performance vs. similarity
            performance_diff = []
            similarities = []
            model_pairs = []

            for i in range(n_models):
                for j in range(i + 1, n_models):
                    performance_diff.append(abs(task_performances[i] - task_performances[j]))
                    similarities.append(similarity_matrix[i, j])
                    model_pairs.append(f"{model_names[i]} vs {model_names[j]}")

            plt.figure(figsize=(12, 8))
            plt.scatter(similarities, performance_diff)

            for i, pair in enumerate(model_pairs):
                plt.annotate(pair, (similarities[i], performance_diff[i]))

            plt.xlabel('Fingerprint Similarity')
            plt.ylabel('Task Performance Difference')
            plt.title('Fingerprint Similarity vs. Task Performance Difference')
            plt.grid(True)

            if self.log_dir is not None:
                plt.savefig(os.path.join(self.log_dir, 'similarity_vs_performance.png'))
            else:
                plt.show()

            # Compute correlation between similarity and performance difference
            correlation = np.corrcoef(similarities, performance_diff)[0, 1]

            # Log correlation
            self.log_metrics(representational_similarity=correlation)

            print(f"Correlation between fingerprint similarity and task performance difference: {correlation:.4f}")

        except ImportError:
            print("matplotlib, seaborn, or pandas not available, skipping model comparison plots")