49 lines
2.6 KiB
Python
49 lines
2.6 KiB
Python
import numpy as np
|
|
import pandas as pd
|
|
import matplotlib.pyplot as plt
|
|
|
|
# Wählen wir zwei Features zur Variation:
|
|
less_important_feature_education = "education.num"
|
|
less_important_feature_fnlwgt = "fnlwgt"
|
|
|
|
# Festlegen derRange für die Variation der zwei Features
|
|
education_range = np.linspace(instance_df[less_important_feature_education].values[0] - 10, instance_df[less_important_feature_education].values[0] + 10, 50)
|
|
fnlwgt_range = np.linspace(instance_df[less_important_feature_fnlwgt].values[0] - 100000, instance_df[less_important_feature_fnlwgt].values[0] + 100000, 50)
|
|
|
|
# Erstellen von Instanzen für LIME
|
|
instances_education = pd.DataFrame([instance] * len(education_range), columns=X_train.columns)
|
|
instances_fnlwgt = pd.DataFrame([instance] * len(fnlwgt_range), columns=X_train.columns)
|
|
|
|
# Ändern der Feature-Werte in den Instanzen
|
|
instances_education[less_important_feature_education] = education_range
|
|
instances_fnlwgt[less_important_feature_fnlwgt] = fnlwgt_range
|
|
|
|
# Vorhersagen mit dem Modell (Wahrscheinlichkeiten)
|
|
instances_education["prediction"] = best_rf_model.predict_proba(instances_education)[:, 1]
|
|
instances_fnlwgt["prediction"] = best_rf_model.predict_proba(instances_fnlwgt)[:, 1]
|
|
|
|
# Bestimmen der y-Achsen-Grenzen (min/max für alle Vorhersagen)
|
|
y_min = min(instances_education["prediction"].min(), instances_fnlwgt["prediction"].min())
|
|
y_max = max(instances_education["prediction"].max(), instances_fnlwgt["prediction"].max())
|
|
|
|
# Visualisierung der Variation von 'education-num' (moderater Einfluss)
|
|
plt.figure(figsize=(8,5))
|
|
plt.plot(education_range, instances_education["prediction"], label="Moderater Einfluss auf die Vorhersage", color='green')
|
|
plt.axvline(instance_df[less_important_feature_education].values[0], color="red", linestyle="dashed", label="Originalwert")
|
|
plt.xlabel("Bildungsniveau (education-num)")
|
|
plt.ylabel("Vorhersage (0 = <=50K, 1 = >50K)")
|
|
plt.title(f"Einfluss von {less_important_feature_education} auf die Vorhersage")
|
|
plt.ylim([y_min, y_max]) # Einheitliche y-Achse
|
|
plt.legend()
|
|
plt.show()
|
|
|
|
# Visualisierung der Variation von 'fnlwgt' (wenig Einfluss)
|
|
plt.figure(figsize=(8,5))
|
|
plt.plot(fnlwgt_range, instances_fnlwgt["prediction"], label="Wenig Einfluss auf die Vorhersage", color='orange')
|
|
plt.axvline(instance_df[less_important_feature_fnlwgt].values[0], color="red", linestyle="dashed", label="Originalwert")
|
|
plt.xlabel("Finales Gewicht (fnlwgt)")
|
|
plt.ylabel("Vorhersage (0 = <=50K, 1 = >50K)")
|
|
plt.title(f"Einfluss von {less_important_feature_fnlwgt} auf die Vorhersage")
|
|
plt.ylim([y_min, y_max]) # Einheitliche y-Achse
|
|
plt.legend()
|
|
plt.show() |