Code for Python Email Spam Classifier
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.feature_extraction.text import CountVectorizer
import pandas as pd
import numpy as np
data = pd.read_csv('_data/spam.csv')
print(data['v1'])
# Pre-data processing
columnsdrop = [col for col in data.columns if 'Unnamed' in col]
data.drop(columns=columnsdrop, inplace=True)
data['v1']=data['v1'].map({'ham':1,'spam':0}) # binary hot encoding
cv = CountVectorizer()
data=data.dropna()
print(data)
x_train,x_test,y_train,y_test=train_test_split(data['v2'],data['v1'],test_size = 0.3)
x_train = cv.fit_transform(x_train) # Bag of words
from sklearn.linear_model import LogisticRegression
MNB = LogisticRegression(C=1.0, solver='lbfgs', max_iter=100)
MNB.fit(x_train, y_train)
from sklearn.metrics import r2_score
print(type(x_test))
predicted = MNB.predict(cv.transform(x_test).toarray())
p2=MNB.predict(x_train.toarray())
print(predicted)
print(r2_score(y_test,predicted))
print(r2_score(y_train,p2))
print(y_test)
print()
text=["click on my link thing.com"]
testingvalue=MNB.predict(cv.transform(text))
print(testingvalue)
coefficients = MNB.coef_ # Coefficients for each feature
intercept = MNB.intercept_
print(coefficients, intercept)