California Housing
[ ]:
import numpy as np
import pandas as pd
import green_tsetlin as gt
import green_tsetlin_core as gtc
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_california_housing
[ ]:
cali_housing = fetch_california_housing()
X = cali_housing.data
y = cali_housing.target
df = pd.DataFrame(X, columns=cali_housing.feature_names)
df['target'] = y
df.describe()
df
[ ]:
# loop all features, and convert to 5 binary cats each
for col in df.columns:
if col == 'target':
continue
df[col] = pd.cut(df[col], bins=5, labels=False)
# make each column 5 binary columns
df = pd.concat([df, pd.get_dummies(df[col], prefix=col)], axis=1)
df = df.drop(columns=cali_housing.feature_names)
df