Graph-Reasoning-Engine/03_domains/devops/exercises/devops_exercise2.py at main · MEVENGUE/Graph-Reasoning-Engine · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
"""
EXERCICE 2 — Décision de rollback après déploiement

Contexte :
- Un déploiement vient d'être effectué en production
- Après sa mise en ligne, plusieurs métriques évoluent de manière inhabituelle

Question : L'agent doit décider s'il est pertinent d'effectuer un rollback maintenant ou d'attendre.

Éléments observables :
- Le déploiement est terminé
- Le taux d'erreur évolue
- La latence du service évolue
- Le trafic réseau évolue
- Un mécanisme de déploiement progressif est actif
"""

from 03_domains.devops.agents.rollback_agent import RollbackAgent
from 03_domains.devops.agents.export import export_devops_decision
from 01_core_engine.reasoning.export import export_to_json_file
from 01_core_engine.graph.types import ReasoningStatus
from datetime import datetime


def format_time():
    """Retourne l'heure actuelle formatée."""
    return datetime.now().strftime("%H:%M")


def exercise2_rollback_decision():
    """Exercice 2 : Décision de rollback après déploiement."""
    print("=" * 70)
    print("EXERCICE 2 — DECISION DE ROLLBACK APRES DEPLOIEMENT")
    print("=" * 70)

    print("\nContexte :")
    print("  - Un deploiement vient d'etre effectue en production")
    print("  - Apres sa mise en ligne, plusieurs metriques evoluent de maniere inhabituelle")

    print("\nQuestion :")
    print("  L'agent doit decider s'il est pertinent d'effectuer un rollback maintenant ou d'attendre.")

    print("\nElements observables :")
    print("  - Le deploiement est termine")
    print("  - Le taux d'erreur evolue")
    print("  - La latence du service evolue")
    print("  - Le trafic reseau evolue")
    print("  - Un mecanisme de deploiement progressif est actif")

    # Historique des décisions
    decisions_history = []

    # ============================================================
    # SCÉNARIO 1 : État initial post-déploiement (métriques incertaines)
    # ============================================================
    print("\n" + "=" * 70)
    print("SCENARIO 1 : ETAT INITIAL POST-DEPLOIEMENT (METRIQUES INCERTAINES)")
    print("=" * 70)

    agent = RollbackAgent(threshold=0.7)

    # Afficher l'état initial
    print("\nEtat des signaux post-deploiement :")
    node_a = agent.graph.get_node("A")
    node_b = agent.graph.get_node("B")
    node_c = agent.graph.get_node("C")
    node_d = agent.graph.get_node("D")
    node_e = agent.graph.get_node("E")
    node_g = agent.graph.get_node("G")

    for node in [node_a, node_b, node_c, node_d, node_e, node_g]:
        if node:
            status_icon = "[OK]" if node.status == ReasoningStatus.PROVEN else "[?]"
            print(f"  {status_icon} {node.id}: {node.content} ({node.status.value})")

    # Évaluer la décision initiale
    result1 = agent.decide_rollback()
    label1 = f"{format_time()} - Post-deployment initial"
    decisions_history.append({"label": label1, "result": result1})

    print(f"\nDecision initiale :")
    print(f"  - Statut : {result1.get('status', 'unknown')}")
    print(f"  - Decision : {result1.get('decision', 'No decision')}")
    print(f"  - Niveau de risque : {result1.get('devops_context', {}).get('risk_level', 'unknown')}")
    print(f"  - Recommandation : {result1.get('devops_context', {}).get('recommendation', '')}")
    print(f"  - Justification : {result1.get('justification', '')[:100]}...")

    # ============================================================
    # SCÉNARIO 2 : Taux d'erreur augmenté (signal négatif isolé)
    # ============================================================
    print("\n" + "=" * 70)
    print("SCENARIO 2 : TAUX D'ERREUR AUGMENTE (SIGNAL NEGATIF ISOLE)")
    print("=" * 70)

    # Simuler : Error rate PROVEN
    node_b = agent.graph.get_node("B")
    if node_b:
        node_b.update_status(ReasoningStatus.PROVEN, "Error rate confirmed")
    what_if_result2 = agent.decide_rollback()
    label2 = f"{format_time()} - Error rate increased"
    decisions_history.append({"label": label2, "result": what_if_result2})

    print("\nApres confirmation du taux d'erreur augmente :")
    print(f"  - Statut : {what_if_result2.get('status', 'unknown')}")
    print(f"  - Decision : {what_if_result2.get('decision', 'No decision')}")
    print(f"  - Niveau de risque : {what_if_result2.get('devops_context', {}).get('risk_level', 'unknown')}")
    print(f"  - Recommandation : {what_if_result2.get('devops_context', {}).get('recommendation', '')}")
    print(f"  - Justification : {what_if_result2.get('justification', '')[:100]}...")

    # ============================================================
    # SCÉNARIO 3 : Canary échoué (pivot fort mais pas suffisant seul)
    # ============================================================
    print("\n" + "=" * 70)
    print("SCENARIO 3 : CANARY ECHOUE (PIVOT FORT MAIS PAS SUFFISANT SEUL)")
    print("=" * 70)

    # Simuler : Canary failed PROVEN
    node_e = agent.graph.get_node("E")
    if node_e:
        node_e.update_status(ReasoningStatus.PROVEN, "Canary failed confirmed")
    what_if_result3 = agent.decide_rollback()
    label3 = f"{format_time()} - Canary failed"
    decisions_history.append({"label": label3, "result": what_if_result3})

    print("\nApres confirmation du canary echoue :")
    print(f"  - Statut : {what_if_result3.get('status', 'unknown')}")
    print(f"  - Decision : {what_if_result3.get('decision', 'No decision')}")
    print(f"  - Niveau de risque : {what_if_result3.get('devops_context', {}).get('risk_level', 'unknown')}")
    print(f"  - Recommandation : {what_if_result3.get('devops_context', {}).get('recommendation', '')}")
    print(f"  - Justification : {what_if_result3.get('justification', '')[:100]}...")

    # ============================================================
    # SCÉNARIO 4 : Erreur + Canary (convergence de signaux)
    # ============================================================
    print("\n" + "=" * 70)
    print("SCENARIO 4 : ERREUR + CANARY (CONVERGENCE DE SIGNAUX)")
    print("=" * 70)

    # Simuler : Error + Canary PROVEN
    node_b = agent.graph.get_node("B")
    node_e = agent.graph.get_node("E")
    if node_b:
        node_b.update_status(ReasoningStatus.PROVEN, "Error rate confirmed")
    if node_e:
        node_e.update_status(ReasoningStatus.PROVEN, "Canary failed confirmed")
    what_if_result4 = agent.decide_rollback()
    label4 = f"{format_time()} - Error + Canary failed"
    decisions_history.append({"label": label4, "result": what_if_result4})

    print("\nApres confirmation Erreur + Canary :")
    print(f"  - Statut : {what_if_result4.get('status', 'unknown')}")
    print(f"  - Decision : {what_if_result4.get('decision', 'No decision')}")
    print(f"  - Niveau de risque : {what_if_result4.get('devops_context', {}).get('risk_level', 'unknown')}")
    print(f"  - Recommandation : {what_if_result4.get('devops_context', {}).get('recommendation', '')}")
    print(f"  - Justification : {what_if_result4.get('justification', '')[:100]}...")

    # ============================================================
    # SCÉNARIO 5 : Convergence forte (Erreur + Latence + Canary)
    # ============================================================
    print("\n" + "=" * 70)
    print("SCENARIO 5 : CONVERGENCE FORTE (ERREUR + LATENCE + CANARY)")
    print("=" * 70)

    # Simuler : Error + Latency + Canary PROVEN
    node_b = agent.graph.get_node("B")
    node_c = agent.graph.get_node("C")
    node_e = agent.graph.get_node("E")
    if node_b:
        node_b.update_status(ReasoningStatus.PROVEN, "Error rate confirmed")
    if node_c:
        node_c.update_status(ReasoningStatus.PROVEN, "Latency increased confirmed")
    if node_e:
        node_e.update_status(ReasoningStatus.PROVEN, "Canary failed confirmed")
    what_if_result5 = agent.decide_rollback()
    label5 = f"{format_time()} - Strong convergence (Error + Latency + Canary)"
    decisions_history.append({"label": label5, "result": what_if_result5})

    print("\nApres confirmation Erreur + Latence + Canary :")
    print(f"  - Statut : {what_if_result5.get('status', 'unknown')}")
    print(f"  - Decision : {what_if_result5.get('decision', 'No decision')}")
    print(f"  - Niveau de risque : {what_if_result5.get('devops_context', {}).get('risk_level', 'unknown')}")
    print(f"  - Recommandation : {what_if_result5.get('devops_context', {}).get('recommendation', '')}")
    print(f"  - Justification : {what_if_result5.get('justification', '')[:100]}...")

    # ============================================================
    # SCÉNARIO 6 : Tous les signaux négatifs (rollback urgent)
    # ============================================================
    print("\n" + "=" * 70)
    print("SCENARIO 6 : TOUS LES SIGNAUX NEGATIFS (ROLLBACK URGENT)")
    print("=" * 70)

    # Simuler : Tous les signaux négatifs PROVEN
    node_b = agent.graph.get_node("B")
    node_c = agent.graph.get_node("C")
    node_d = agent.graph.get_node("D")
    node_e = agent.graph.get_node("E")
    if node_b:
        node_b.update_status(ReasoningStatus.PROVEN, "Error rate confirmed")
    if node_c:
        node_c.update_status(ReasoningStatus.PROVEN, "Latency increased confirmed")
    if node_d:
        node_d.update_status(ReasoningStatus.PROVEN, "Traffic spike confirmed")
    if node_e:
        node_e.update_status(ReasoningStatus.PROVEN, "Canary failed confirmed")
    what_if_result6 = agent.decide_rollback()
    label6 = f"{format_time()} - All negative signals"
    decisions_history.append({"label": label6, "result": what_if_result6})

    print("\nApres confirmation de tous les signaux negatifs :")
    print(f"  - Statut : {what_if_result6.get('status', 'unknown')}")
    print(f"  - Decision : {what_if_result6.get('decision', 'No decision')}")
    print(f"  - Niveau de risque : {what_if_result6.get('devops_context', {}).get('risk_level', 'unknown')}")
    print(f"  - Recommandation : {what_if_result6.get('devops_context', {}).get('recommendation', '')}")

    # ============================================================
    # EXPORT POUR VISUALISATION
    # ============================================================
    print("\n" + "=" * 70)
    print("EXPORT POUR VISUALISATION")
    print("=" * 70)

    try:
        from pathlib import Path

        # Utiliser le dernier résultat pour l'export principal
        export_data = export_devops_decision(agent.graph, what_if_result6, history=decisions_history)
        json_path = Path("03_domains/devops/visualizations/devops_exercise2_data.json")

        json_path.parent.mkdir(parents=True, exist_ok=True)
        export_to_json_file(export_data, str(json_path))

        if json_path.exists():
            file_size = json_path.stat().st_size
            print(f"\n[OK] Donnees exportees vers {json_path}")
            print(f"     Taille: {file_size} octets")
            print(f"     Historique: {len(decisions_history)} decisions")
        else:
            print(f"\n[ERREUR] Le fichier {json_path} n'a pas ete cree")
    except Exception as e:
        print(f"\n[ERREUR] Impossible d'exporter les donnees: {e}")
        import traceback
        traceback.print_exc()

    # ============================================================
    # RÉSUMÉ DES APPRENTISSAGES
    # ============================================================
    print("\n" + "=" * 70)
    print("RESUME DES APPRENTISSAGES")
    print("=" * 70)

    print("\nL'agent de rollback :")
    print("  [OK] Refuse le rollback sur un seul signal negatif isole")
    print("  [OK] Utilise le canary comme pivot fort mais non suffisant seul")
    print("  [OK] Passe en decision PARTIAL en cas d'instabilite globale")
    print("  [OK] Accepte le rollback uniquement lorsque les signaux convergent")
    print("  [OK] Convergence forte (3+ signaux) = rollback urgent accepte")

    print("\n" + "=" * 70)


if __name__ == "__main__":
    exercise2_rollback_decision()