Files
ExplainableAI/extracted_cells/cell9.py

20 lines
804 B
Python

# Kategorische Variablen in numerische umwandeln
categorical_cols = df_clean.select_dtypes(include=['object']).columns
print("Kategorische Spalten:", categorical_cols.tolist())
# Label Encoding für die Zielvariable
label_encoder = LabelEncoder()
df_clean['income_encoded'] = label_encoder.fit_transform(df_clean['income'])
print("\nLabel Encoding für 'income':")
for i, label in enumerate(label_encoder.classes_):
print(f"{label} -> {i}")
# One-Hot Encoding für kategorische Variablen (außer der Zielvariable)
categorical_cols = categorical_cols.drop('income')
df_encoded = pd.get_dummies(df_clean, columns=categorical_cols, drop_first=False)
print("\nNeue Spalten durch One-Hot Encoding:")
print(df_encoded.columns[:10].tolist())
print("\nDatensatz nach Vorverarbeitung:", df_encoded.shape)