intial commit (forked from private repo)

This commit is contained in:
2025-04-11 11:08:28 +02:00
commit 3bdd37f46c
154 changed files with 45901 additions and 0 deletions

49
extracted_cells/cell18.py Normal file
View File

@ -0,0 +1,49 @@
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Wählen wir zwei Features zur Variation:
less_important_feature_education = "education.num"
less_important_feature_fnlwgt = "fnlwgt"
# Festlegen derRange für die Variation der zwei Features
education_range = np.linspace(instance_df[less_important_feature_education].values[0] - 10, instance_df[less_important_feature_education].values[0] + 10, 50)
fnlwgt_range = np.linspace(instance_df[less_important_feature_fnlwgt].values[0] - 100000, instance_df[less_important_feature_fnlwgt].values[0] + 100000, 50)
# Erstellen von Instanzen für LIME
instances_education = pd.DataFrame([instance] * len(education_range), columns=X_train.columns)
instances_fnlwgt = pd.DataFrame([instance] * len(fnlwgt_range), columns=X_train.columns)
# Ändern der Feature-Werte in den Instanzen
instances_education[less_important_feature_education] = education_range
instances_fnlwgt[less_important_feature_fnlwgt] = fnlwgt_range
# Vorhersagen mit dem Modell (Wahrscheinlichkeiten)
instances_education["prediction"] = best_rf_model.predict_proba(instances_education)[:, 1]
instances_fnlwgt["prediction"] = best_rf_model.predict_proba(instances_fnlwgt)[:, 1]
# Bestimmen der y-Achsen-Grenzen (min/max für alle Vorhersagen)
y_min = min(instances_education["prediction"].min(), instances_fnlwgt["prediction"].min())
y_max = max(instances_education["prediction"].max(), instances_fnlwgt["prediction"].max())
# Visualisierung der Variation von 'education-num' (moderater Einfluss)
plt.figure(figsize=(8,5))
plt.plot(education_range, instances_education["prediction"], label="Moderater Einfluss auf die Vorhersage", color='green')
plt.axvline(instance_df[less_important_feature_education].values[0], color="red", linestyle="dashed", label="Originalwert")
plt.xlabel("Bildungsniveau (education-num)")
plt.ylabel("Vorhersage (0 = <=50K, 1 = >50K)")
plt.title(f"Einfluss von {less_important_feature_education} auf die Vorhersage")
plt.ylim([y_min, y_max]) # Einheitliche y-Achse
plt.legend()
plt.show()
# Visualisierung der Variation von 'fnlwgt' (wenig Einfluss)
plt.figure(figsize=(8,5))
plt.plot(fnlwgt_range, instances_fnlwgt["prediction"], label="Wenig Einfluss auf die Vorhersage", color='orange')
plt.axvline(instance_df[less_important_feature_fnlwgt].values[0], color="red", linestyle="dashed", label="Originalwert")
plt.xlabel("Finales Gewicht (fnlwgt)")
plt.ylabel("Vorhersage (0 = <=50K, 1 = >50K)")
plt.title(f"Einfluss von {less_important_feature_fnlwgt} auf die Vorhersage")
plt.ylim([y_min, y_max]) # Einheitliche y-Achse
plt.legend()
plt.show()