import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier

from sklearn.model_selection import (
    StratifiedKFold,
    train_test_split, 
    cross_val_score,
    StratifiedKFold, 
    KFold
)
from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    ConfusionMatrixDisplay,
    f1_score,
    r2_score,
    mean_squared_error,
    mean_absolute_error,
    precision_score, 
    recall_score,
)

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neural_network import MLPClassifier


df = pd.read_csv('customer_shopping_data.csv')
df.head()


# grabbing columns that are useful and dropping all else
df.drop(['invoice_no', 'customer_id', 'payment_method', 'invoice_date'], axis=1, inplace=True)

# dropping rows we dont care about, like toys

df.drop(df[df['category'] == 'Toys'].index, inplace = True)
df.drop(df[df['category'] == 'Souvenir'].index, inplace = True)
df.drop(df[df['category'] == 'Books'].index, inplace = True)
df.drop(df[df['category'] == 'Shoes'].index, inplace = True)


# recalculating price column to include totals
df['total_price'] = (df['price'] * df['quantity'])
df.drop(['quantity', 'price'], axis=1, inplace=True)
df.head()


df['category'].value_counts()

category
Clothing           34487
Cosmetics          15097
Food & Beverage    14776
Technology          4996
Name: count, dtype: int64


plt.figure(figsize=(10,6))
sns.barplot(df,x='gender',y='total_price',hue = 'gender',estimator = sum)
plt.ylabel('Total amount spent')
plt.title('Total Amount spent by males vs. females ')
plt.ticklabel_format(style='plain',axis='y')
plt.show()


plt.figure(figsize=(10,6))
sns.barplot(df,x='category',y='total_price',hue = 'gender',estimator = sum)
plt.title('Category Wise Shopping')
plt.show()


plt.figure(figsize=(10,6))
ax = sns.barplot(df,x='category',y='total_price',hue = 'gender',estimator = 'mean')
ax.set(xlabel='Category',
       ylabel='Average amount of money spent',
       title='Category Wise Shopping by Gender')
plt.show()


# checking average price spent by gender per category
avg_price_spent_per=df.groupby(['gender','category'], as_index=False)['total_price'].mean()
avg_age_per=df.groupby(['gender','category'], as_index=False)['age'].mean()
print(avg_price_spent_per)
print("\n",avg_age_per)

   gender         category   total_price
0  Female         Clothing   3304.846775
1  Female        Cosmetics    448.376245
2  Female  Food & Beverage     57.396933
3  Female       Technology  11630.308621
4    Male         Clothing   3306.475999
5    Male        Cosmetics    452.312985
6    Male  Food & Beverage     57.637718
7    Male       Technology  11509.875931

    gender         category        age
0  Female         Clothing  43.381948
1  Female        Cosmetics  43.571665
2  Female  Food & Beverage  43.489209
3  Female       Technology  43.249581
4    Male         Clothing  43.449946
5    Male        Cosmetics  43.594160
6    Male  Food & Beverage  43.155727
7    Male       Technology  43.544417


plt.figure(figsize=(20,6))
ax = sns.barplot(df,x='shopping_mall',y='total_price',hue = 'gender',estimator = 'mean')
ax.set(xlabel='Shopping mall',
       ylabel='Average amount of money spent',
       title='Shopping Mall Wise Shopping by Gender')
plt.show()


avg_price_spent_per=df.groupby(['gender','shopping_mall'], as_index=False)['total_price'].mean()
print(avg_price_spent_per)

    gender      shopping_mall  total_price
0   Female        Cevahir AVM  2578.274630
1   Female  Emaar Square Mall  2721.922403
2   Female     Forum Istanbul  2498.865477
3   Female       Istinye Park  2601.940877
4   Female             Kanyon  2652.917047
5   Female   Mall of Istanbul  2588.465487
6   Female          Metrocity  2565.397838
7   Female       Metropol AVM  2399.639890
8   Female     Viaport Outlet  2723.107971
9   Female       Zorlu Center  2628.558746
10    Male        Cevahir AVM  2666.947484
11    Male  Emaar Square Mall  2652.309208
12    Male     Forum Istanbul  2567.935761
13    Male       Istinye Park  2516.635455
14    Male             Kanyon  2572.807329
15    Male   Mall of Istanbul  2626.392354
16    Male          Metrocity  2540.795903
17    Male       Metropol AVM  2670.127212
18    Male     Viaport Outlet  2517.975887
19    Male       Zorlu Center  2509.945500


df.groupby(['gender','category'], as_index=False)['total_price'].describe()


df.groupby(['gender','category'], as_index=False)['age'].describe()


# creating the class label
# label -> ['maybe', 'no', 'yes']
def assignNewLabels(row):
    # for book shoppers
    if row['category'] == "Technology" and row["total_price"] > 16800:
        return "yes"
    elif row['category'] == "Technology" and row["total_price"] <= 4200:
        return "no"
    elif row['category'] == "Technology" and row["total_price"] > 4200.0 and row["total_price"] <= 16800:
        return "maybe"
    
    # for clothing
    elif row['category'] == "Clothing" and row["total_price"] > 4801.28:
        return "yes"
    elif row['category'] == "Clothing" and row["total_price"] <= 1200.32:
        return "no"
    elif row['category'] == "Clothing" and row["total_price"] > 1200.32 and row["total_price"] <= 4801.28:
        return "maybe"
    
    # for clothing
    elif row['category'] == "Cosmetics" and row["total_price"] > 650.56:
        return "yes"
    elif row['category'] == "Cosmetics" and row["total_price"] <= 162.64:
        return "no"
    elif row['category'] == "Cosmetics" and row["total_price"] > 162.64 and row["total_price"] <= 650.56:
        return "maybe"
    
    # for clothing
    elif row['category'] == "Food & Beverage" and row["total_price"] > 83.68:
        return "yes"
    elif row['category'] == "Food & Beverage" and row["total_price"] <= 20.92:
        return "no"
    elif row['category'] == "Food & Beverage" and row["total_price"] > 20.92 and row["total_price"] <= 83.68:
        return "maybe"


df['Should_We_Market'] = df.apply(lambda row: assignNewLabels(row), axis=1)
df.head()


df_dummy = pd.get_dummies(df[['gender', 'category', 'age', 'total_price', 'shopping_mall']])
df_dummy.head()


# Normalizing the ages
min_age = df_dummy['age'].min()
max_age = df_dummy['age'].max()

# Apply min-max normalization to the 'age' column
df_dummy['normalized_age'] = (df_dummy['age'] - min_age) / (max_age - min_age)
df_dummy.drop(['age'], axis=1, inplace=True)


# Normalize the prices
min_price = df_dummy['total_price'].min()
max_price = df_dummy['total_price'].max()

# Apply min-max normalization to the 'price' column
df_dummy['normalized_price'] = (df_dummy['total_price'] - min_price) / (max_price - min_price)
df_dummy.drop(['total_price'], axis=1, inplace=True)


df_dummy.head()


X = df_dummy
y = df.Should_We_Market

print(X.shape, y.shape)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20, random_state=125
)

(69356, 18) (69356,)


# Build a Gaussian Classifier
model = GaussianNB()

# Model training
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_pred, y_test)
f1 = f1_score(y_pred, y_test, average="weighted")

print("Accuracy:", accuracy)
print("F1 Score:", f1)
print()

cm = confusion_matrix(y_test, y_pred, labels=["maybe", "no", "yes"])
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["maybe", "no", "yes"])
disp.plot();

# Calculate recall and precision per label
precision = precision_score(y_test, y_pred, average=None)
recall = recall_score(y_test, y_pred, average=None)
labels = sorted(set(y_test))

for label, score in zip(labels, precision):
    print("Precision for Label", label, ":", score)
print()
for label, score in zip(labels, recall):
    print("Recall for Label", label, ":", score)

# interpret results in writeup using https://www.v7labs.com/blog/confusion-matrix-guide

Accuracy: 0.6008506343713956
F1 Score: 0.6575598917743605

Precision for Label maybe : 0.6384685923888125
Precision for Label no : 0.5991282968261064
Precision for Label yes : 0.33629893238434166

Recall for Label maybe : 0.49910394265232977
Recall for Label no : 0.9643820831084727
Recall for Label yes : 0.06915477497255763


df_dummy = pd.get_dummies(df[['gender', 'age', 'total_price', 'shopping_mall']])
df_dummy.head()


# Normalizing the ages
min_age = df_dummy['age'].min()
max_age = df_dummy['age'].max()

# Apply min-max normalization to the 'age' column
df_dummy['normalized_age'] = (df_dummy['age'] - min_age) / (max_age - min_age)

# Normalize the prices
min_price = df_dummy['total_price'].min()
max_price = df_dummy['total_price'].max()

# Apply min-max normalization to the 'price' column
df_dummy['normalized_price'] = (df_dummy['total_price'] - min_price) / (max_price - min_price)
df_dummy.drop(['total_price', 'age'], axis=1, inplace=True)
df_dummy.head()


X = df_dummy
y = df.category

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Create a Random Forest classifier
rf_classifier = RandomForestClassifier()

rf_classifier.fit(X_train, y_train)

# Get feature importances
importances = rf_classifier.feature_importances_

# Sort feature importances in descending order
indices = importances.argsort()[::-1]
print("Feature ranking:")
for f in range(X.shape[1]):
    print(f"{f + 1}. Feature {X.columns[indices[f]]} ({importances[indices[f]]})")

Feature ranking:
1. Feature normalized_price (0.9838227373523575)
2. Feature normalized_age (0.013448435592251564)
3. Feature gender_Female (0.00027928701689037904)
4. Feature gender_Male (0.00026625939216013946)
5. Feature shopping_mall_Forum Istanbul (0.0002534961256342729)
6. Feature shopping_mall_Metropol AVM (0.00023439648534342907)
7. Feature shopping_mall_Viaport Outlet (0.0002244742417978541)
8. Feature shopping_mall_Mall of Istanbul (0.0002216370459257878)
9. Feature shopping_mall_Zorlu Center (0.00021877321933309464)
10. Feature shopping_mall_Cevahir AVM (0.00021492151146649798)
11. Feature shopping_mall_Emaar Square Mall (0.00021018732275164542)
12. Feature shopping_mall_Metrocity (0.00020817252413814425)
13. Feature shopping_mall_Kanyon (0.000205471583071539)
14. Feature shopping_mall_Istinye Park (0.00019175058687818515)


# dropping useless features according to Random Forest
df_dummy.drop(['gender_Female', 'gender_Male', 'shopping_mall_Mall of Istanbul',
               'shopping_mall_Metrocity', 'shopping_mall_Forum Istanbul', 'shopping_mall_Viaport Outlet',
               'shopping_mall_Emaar Square Mall', 'shopping_mall_Kanyon', 'shopping_mall_Istinye Park',
               'shopping_mall_Zorlu Center', 'shopping_mall_Cevahir AVM', 'shopping_mall_Metropol AVM'], axis=1, inplace=True)


X = df_dummy
y = df.category

print(X.shape, y.shape)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=125
)

(69356, 2) (69356,)

X


# Build a Gaussian Classifier
model = GaussianNB()

# Model training
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_pred, y_test)
f1 = f1_score(y_pred, y_test, average="weighted")

print("Accuracy:", accuracy)
print("F1 Score:", f1)
print()

cm = confusion_matrix(y_test, y_pred, labels=["Clothing", "Cosmetics", "Food & Beverage", "Technology"])
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["Clothing", "Cosmetics", "Food & Beverage", "Technology"])
disp.plot();

# Calculate recall and precision per label
precision = precision_score(y_test, y_pred, average=None)
recall = recall_score(y_test, y_pred, average=None)
labels = sorted(set(y_test))

for label, score in zip(labels, precision):
    print("Precision for Label", label, ":", score)
print()
for label, score in zip(labels, recall):
    print("Recall for Label", label, ":", score)

# interpret results in writeup using https://www.v7labs.com/blog/confusion-matrix-guide

Accuracy: 0.8121575638733491
F1 Score: 0.8137622111213028
Precision for Label Clothing : 0.9326949015376315
Precision for Label Cosmetics : 0.6076938489280705
Precision for Label Food & Beverage : 0.8206680116565792
Precision for Label Technology : 1.0

Recall for Label Clothing : 0.8017391304347826
Recall for Label Cosmetics : 0.7912861987998956
Recall for Label Food & Beverage : 1.0
Recall for Label Technology : 0.3877049180327869


kf = StratifiedKFold(n_splits=10, random_state=40, shuffle=True) 
accscore = []
classifier2 = GaussianNB()


for train_index , test_index in kf.split(X, y):
    X_train , X_test = X.iloc[train_index],X.iloc[test_index]
    y_train , y_test = y.iloc[train_index] , y.iloc[test_index]
     
    classifier2.fit(X_train, y_train.values.ravel())
    Y_pred = classifier2.predict(X_test)

     
    acc2 = accuracy_score(y_test, Y_pred)
    accscore.append(acc2)
     
avgscore = sum(accscore)/10
 
print("Accuracy from each fold = \n" + str(accscore))
print("\nAverage accuracy = \n" + str(avgscore))

Accuracy from each fold = 
[0.81199538638985, 0.8184832756632064, 0.8171856978085352, 0.8114186851211073, 0.8124279123414071, 0.828719723183391, 0.8152847873107426, 0.8116798846431147, 0.8108147080028839, 0.813410237923576]

Average accuracy = 
0.8151420298387814


# one hot encode the category classes
df_dummy = pd.get_dummies(df[['gender', 'age', 'total_price']])
df_y_dummy = pd.get_dummies(df[['category']])
df_y_dummy.head()


# Normalizing the ages
min_age = df_dummy['age'].min()
max_age = df_dummy['age'].max()

# Apply min-max normalization to the 'age' column
df_dummy['normalized_age'] = (df_dummy['age'] - min_age) / (max_age - min_age)

# Normalize the prices
min_price = df_dummy['total_price'].min()
max_price = df_dummy['total_price'].max()

# Apply min-max normalization to the 'price' column
df_dummy['normalized_price'] = (df_dummy['total_price'] - min_price) / (max_price - min_price)

# drop converted columns
df_dummy.drop(['total_price', 'age'], axis=1, inplace=True)
df_dummy.head()


# Split the data into features and target
X = df_dummy
y = df['category']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


model = MLPClassifier(hidden_layer_sizes=(16, 8), activation='relu', solver='adam', max_iter=1500, random_state=42)
model.fit(X_train, y_train)

accuracy = model.score(X_test, y_test)
print(f'Test accuracy: {accuracy:.4f}')
print()

Test accuracy: 0.8965


y_pred = model.predict(X_test)
cm2 = confusion_matrix(y_test, y_pred, labels=["Clothing", "Cosmetics", "Food & Beverage", "Technology"])
disp2 = ConfusionMatrixDisplay(confusion_matrix=cm2, display_labels=["Clothing", "Cosmetics", "Food & Beverage", "Technology"])
disp2.plot();

# Calculate recall and precision per label
precision = precision_score(y_test, y_pred, average=None)
recall = recall_score(y_test, y_pred, average=None)
labels = sorted(set(y_test))

for label, score in zip(labels, precision):
    print("Precision for Label", label, ":", score)
print()
for label, score in zip(labels, recall):
    print("Recall for Label", label, ":", score)

Precision for Label Clothing : 0.8938223938223938
Precision for Label Cosmetics : 1.0
Precision for Label Food & Beverage : 0.8261240751280592
Precision for Label Technology : 1.0

Recall for Label Clothing : 1.0
Recall for Label Cosmetics : 0.5886618325642716
Recall for Label Food & Beverage : 1.0
Recall for Label Technology : 0.8101010101010101


from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors = 50, metric = 'minkowski', p = 2)
classifier.fit(X_train, y_train)
Y_pred = classifier.predict(X_test)

acc = accuracy_score(y_test, Y_pred)
print ("Accuracy:" + str(acc))
f1 = f1_score(Y_pred, y_test, average="weighted")
print ("F1 score:" + str(f1))
print()

cm = confusion_matrix(y_test, Y_pred, labels=["Clothing", "Cosmetics", "Food & Beverage", "Technology"])
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["Clothing", "Cosmetics", "Food & Beverage", "Technology"])
disp.plot();

# Calculate recall and precision per label
precision = precision_score(y_test, Y_pred, average=None)
recall = recall_score(y_test, Y_pred, average=None)
labels = sorted(set(y_test))

for label, score in zip(labels, precision):
    print("Precision for Label", label, ":", score)
print()
for label, score in zip(labels, recall):
    print("Recall for Label", label, ":", score)

Accuracy:0.8768021914648212
F1 score:0.8843731485812386

Precision for Label Clothing : 0.9081992938407218
Precision for Label Cosmetics : 0.8407120036513007
Precision for Label Food & Beverage : 0.8038163387000596
Precision for Label Technology : 1.0

Recall for Label Clothing : 1.0
Recall for Label Cosmetics : 0.6071193144363876
Recall for Label Food & Beverage : 0.9286944540130899
Recall for Label Technology : 0.6868686868686869


#KNN - 10 fold validation 

kf = StratifiedKFold(n_splits=10,random_state=40, shuffle=True) 
accscore = []
fscorearr = []
classifier = KNeighborsClassifier(n_neighbors = 50, metric = 'minkowski', p = 2)

for train_index , test_index in kf.split(X, y):
    X_train , X_test = X.iloc[train_index],X.iloc[test_index]
    y_train , y_test = y.iloc[train_index] , y.iloc[test_index]
     
    #classifier.fit(X_train,y_train)
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)
     
    acc = accuracy_score(y_test, y_pred)
    accscore.append(acc)
    fscore = f1_score(y_test, y_pred, average=None)
    fscorearr.append(fscore)

     
avgscore = sum(accscore)/10
avgfscore = sum(fscore)/10
 
print("Accuracy from each fold using KNN with 35 neighbors = " + str(accscore))
print("Average accuracy using KNN with 35 neighbors = " + str(avgscore))
print("Average fscore using KNN with 35 neighbors = " + str(avgscore))
print()

Accuracy from each fold using KNN with 35 neighbors = [0.901239907727797, 0.9057093425605537, 0.9028258362168397, 0.9047001153402537, 0.9042675893886967, 0.9068627450980392, 0.9058399423215573, 0.9088680605623648, 0.8986301369863013, 0.9029560201874549]
Average accuracy using KNN with 35 neighbors = 0.9041899696389859
Average fscore using KNN with 35 neighbors = 0.9041899696389859


import matplotlib.pyplot as plt
import pandas as pd

# Create a dictionary to map class labels to colors
class_colors = {'Clothing': 'red', 'Cosmetics': 'blue', 'Technology': 'green', 'Food & Beverage': 'purple'}

# Plotting the scatter plot
plt.figure(figsize=(8, 6))

for category, color in class_colors.items():
    class_data = df[df['category'] == category]
    plt.scatter(class_data['age'], class_data['total_price'], color=color, label=category)

plt.xlabel('Age')
plt.ylabel('Total Price')
plt.title('Scatter Plot of Data Points')
plt.legend()
plt.show()


# Split the data into features and target
X = df_dummy
y = df['category']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


acc = []
# Will take some time
for i in range(1,40):
    neigh = KNeighborsClassifier(n_neighbors = i).fit(X_train,y_train)
    yhat = neigh.predict(X_test)
    acc.append(accuracy_score(y_test, yhat))
    
plt.figure(figsize=(10,6))
plt.plot(range(1,40),acc,color = 'blue',linestyle='dashed', 
         marker='o',markerfacecolor='red', markersize=10)
plt.title('accuracy vs. K Value')
plt.xlabel('K')
plt.ylabel('Accuracy')
print("Maximum accuracy:-",max(acc),"at K =",acc.index(max(acc)))

Maximum accuracy:- 1.0 at K = 0

	gender	category	count	mean	std	min	25%	50%	75%	max
0	Female	Clothing	20652.0	3304.846775	2591.805146	300.08	1200.32	2700.72	4801.28	7502.00
1	Female	Cosmetics	9070.0	448.376245	351.210785	40.66	162.64	365.94	650.56	1016.50
2	Female	Food & Beverage	8804.0	57.396933	45.211737	5.23	20.92	47.07	83.68	130.75
3	Female	Technology	2981.0	11630.308621	9126.822685	1050.00	4200.00	9450.00	16800.00	26250.00
4	Male	Clothing	13835.0	3306.475999	2605.834237	300.08	1200.32	2700.72	4801.28	7502.00
5	Male	Cosmetics	6027.0	452.312985	352.884728	40.66	162.64	365.94	650.56	1016.50
6	Male	Food & Beverage	5972.0	57.637718	45.644014	5.23	20.92	47.07	83.68	130.75
7	Male	Technology	2015.0	11509.875931	8988.335801	1050.00	4200.00	9450.00	16800.00	26250.00

	gender	category	count	mean	std	min	25%	50%	75%	max
0	Female	Clothing	20652.0	43.381948	14.979862	18.0	30.0	43.0	56.0	69.0
1	Female	Cosmetics	9070.0	43.571665	14.966309	18.0	31.0	44.0	56.0	69.0
2	Female	Food & Beverage	8804.0	43.489209	14.970409	18.0	31.0	43.0	56.0	69.0
3	Female	Technology	2981.0	43.249581	14.825544	18.0	30.0	43.0	56.0	69.0
4	Male	Clothing	13835.0	43.449946	15.025515	18.0	30.0	43.0	56.0	69.0
5	Male	Cosmetics	6027.0	43.594160	14.892899	18.0	31.0	44.0	57.0	69.0
6	Male	Food & Beverage	5972.0	43.155727	14.996070	18.0	30.0	43.0	56.0	69.0
7	Male	Technology	2015.0	43.544417	14.851730	18.0	31.0	43.0	56.0	69.0

	normalized_age	normalized_price
0	0.196078	0.285648
2	0.039216	0.011235
5	0.196078	0.285648
6	0.607843	0.001350
7	0.274510	0.045536
...	...	...
99446	0.274510	0.000000
99449	0.921569	0.011235
99453	0.176471	0.000598
99454	0.882353	0.000598
99455	0.745098	0.639928

Retail Promotion Classification-Prediction Problem¶

Vasundhara Bagchi, Elizabeth Slesarev, and Paul Beltran¶

About the dataset¶

Step 1: Data Cleaning and Preprocessing¶

Description of the features:¶

Reading in the data¶

Feature Engineering¶

Visualizing the cleaned data¶

Part 1: Tackling the Initial Problem Statement¶

Creating class labels for our dataset¶

Starting the Machine Learning Process¶

Running the Models¶

Gaussian Naive Bayes with labels = yes or no or maybe¶

Part 2: Updated Problem Statement¶

Random Forest Classification for Feature Selection¶

Gaussian Naive Bayes with new labels based on categories¶

Gaussian Naive Bayes: Round 2¶

Stratified K-fold cross validation for Naive Bayes without Gender and Shopping Mall features¶

Neural Networks:¶

Part 3: Some More Experimentation¶

KNN¶

Conclusion¶

Policy Recommendation¶

	invoice_no	customer_id	gender	age	category	quantity	price	payment_method	invoice_date	shopping_mall
0	I138884	C241288	Female	28	Clothing	5	1500.40	Credit Card	5/8/2022	Kanyon
1	I317333	C111565	Male	21	Shoes	3	1800.51	Debit Card	12/12/2021	Forum Istanbul
2	I127801	C266599	Male	20	Clothing	1	300.08	Cash	9/11/2021	Metrocity
3	I173702	C988172	Female	66	Shoes	5	3000.85	Credit Card	16/05/2021	Metropol AVM
4	I337046	C189076	Female	53	Books	4	60.60	Cash	24/10/2021	Kanyon

	age	total_price	gender_Female	gender_Male	category_Clothing	category_Cosmetics	category_Food & Beverage	category_Technology	shopping_mall_Cevahir AVM	shopping_mall_Emaar Square Mall	shopping_mall_Forum Istanbul	shopping_mall_Istinye Park	shopping_mall_Kanyon	shopping_mall_Mall of Istanbul	shopping_mall_Metrocity	shopping_mall_Metropol AVM	shopping_mall_Viaport Outlet	shopping_mall_Zorlu Center
0	28	7502.00	True	False	True	False	False	False	False	False	False	False	True	False	False	False	False	False
2	20	300.08	False	True	True	False	False	False	False	False	False	False	False	False	True	False	False	False
5	28	7502.00	True	False	True	False	False	False	False	False	True	False	False	False	False	False	False	False
6	49	40.66	True	False	False	True	False	False	False	False	False	True	False	False	False	False	False	False
7	32	1200.32	True	False	True	False	False	False	False	False	False	False	False	True	False	False	False	False