Python dasturini amalga oshirish
def weightInitialization(n_features):
w = np.zeros((1,n_features))
b = 0
return w,bdef sigmoid_activation(result):
final_result = 1/(1+np.exp(-result))
return final_result
def model_optimize(w, b, X, Y):
m = X.shape[0]
#Prediction
final_result = sigmoid_activation(np.dot(w,X.T)+b)
Y_T = Y.T
cost = (-1/m)*(np.sum((Y_T*np.log(final_result)) + ((1-Y_T)*(np.log(1-final_result)))))
#
#Gradient calculation
dw = (1/m)*(np.dot(X.T, (final_result-Y.T).T))
db = (1/m)*(np.sum(final_result-Y.T))
grads = {"dw": dw, "db": db}
return grads, costdef model_predict(w, b, X, Y, learning_rate, no_iterations):
costs = []
for i in range(no_iterations):
#
grads, cost = model_optimize(w,b,X,Y)
#
dw = grads["dw"]
db = grads["db"]
#weight update
w = w - (learning_rate * (dw.T))
b = b - (learning_rate * db)
#
if (i % 100 == 0):
costs.append(cost)
#print("Cost after %i iteration is %f" %(i, cost))
#final parameters
coeff = {"w": w, "b": b}
gradient = {"dw": dw, "db": db}
return coeff, gradient, costsdef predict(final_pred, m):
y_pred = np.zeros((1,m))
for i in range(final_pred.shape[1]):
if final_pred[0][i] > 0.5:
y_pred[0][i] = 1
return y_pred
O'zgarishlar soni va soni
13-rasm: costni pasaytirish
Tizimning poezd va sinov aniqligi 100%
Ushbu dastur ikkilik logistik regressiya uchun mo'ljallangan. Ikki sinfdan ko'proq ma'lumot uchun softmax regressiyasidan foydalanish kerak.
1-ilova. Ikkilik logistic regressiya
import csv
import numpy as np
import matplotlib.pyplot as plt
def loadCSV(filename):
'''
function to load dataset
'''
with open(filename,"r") as csvfile:
lines = csv.reader(csvfile)
dataset = list(lines)
for i in range(len(dataset)):
dataset[i] = [float(x) for x in dataset[i]]
return np.array(dataset)
def normalize(X):
'''
function to normalize feature matrix, X
'''
mins = np.min(X, axis = 0)
maxs = np.max(X, axis = 0)
rng = maxs - mins
norm_X = 1 - ((maxs - X)/rng)
return norm_X
def logistic_func(beta, X):
'''
logistic(sigmoid) function
'''
return 1.0/(1 + np.exp(-np.dot(X, beta.T)))
def log_gradient(beta, X, y):
'''
logistic gradient function
'''
first_calc = logistic_func(beta, X) - y.reshape(X.shape[0], -1)
final_calc = np.dot(first_calc.T, X)
return final_calc
def cost_func(beta, X, y):
'''
cost function, J
'''
log_func_v = logistic_func(beta, X)
y = np.squeeze(y)
step1 = y * np.log(log_func_v)
step2 = (1 - y) * np.log(1 - log_func_v)
final = -step1 - step2
return np.mean(final)
def grad_desc(X, y, beta, lr=.01, converge_change=.001):
'''
gradient descent function
'''
cost = cost_func(beta, X, y)
change_cost = 1
num_iter = 1
while(change_cost > converge_change):
old_cost = cost
beta = beta - (lr * log_gradient(beta, X, y))
cost = cost_func(beta, X, y)
change_cost = old_cost - cost
num_iter += 1
return beta, num_iter
def pred_values(beta, X):
'''
function to predict labels
'''
pred_prob = logistic_func(beta, X)
pred_value = np.where(pred_prob >= .5, 1, 0)
return np.squeeze(pred_value)
def plot_reg(X, y, beta):
'''
function to plot decision boundary
'''
# labelled observations
x_0 = X[np.where(y == 0.0)]
x_1 = X[np.where(y == 1.0)]
# plotting points with diff color for diff label
plt.scatter([x_0[:, 1]], [x_0[:, 2]], c='b', label='y = 0')
plt.scatter([x_1[:, 1]], [x_1[:, 2]], c='r', label='y = 1')
# plotting decision boundary
x1 = np.arange(0, 1, 0.1)
x2 = -(beta[0,0] + beta[0,1]*x1)/beta[0,2]
plt.plot(x1, x2, c='k', label='reg line')
plt.xlabel('x1')
plt.ylabel('x2')
plt.legend()
plt.show()
if __name__ == "__main__":
# load the dataset
dataset = loadCSV('dataset1.csv')
# normalizing feature matrix
X = normalize(dataset[:, :-1])
# stacking columns wth all ones in feature matrix
X = np.hstack((np.matrix(np.ones(X.shape[0])).T, X))
# response vector
y = dataset[:, -1]
# initial beta values
beta = np.matrix(np.zeros(X.shape[1]))
# beta values after running gradient descent
beta, num_iter = grad_desc(X, y, beta)
# estimated beta values and number of iterations
print("Estimated regression coefficients:", beta)
print("No. of iterations:", num_iter)
# predicted labels
y_pred = pred_values(beta, X)
# number of correctly predicted labels
print("Correctly predicted labels:", np.sum(y == y_pred))
# plotting regression line
plot_reg(X, y, beta)
2-ilova. Multinomial Logistik regressiya
from sklearn import datasets, linear_model, metrics
# load the digit dataset
digits = datasets.load_digits()
# defining feature matrix(X) and response vector(y)
X = digits.data
y = digits.target
# splitting X and y into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=1)
# create logistic regression object
reg = linear_model.LogisticRegression()
# train the model using the training sets
reg.fit(X_train, y_train)
# making predictions on the testing set
y_pred = reg.predict(X_test)
# comparing actual response values (y_test) with predicted response values (y_pred)
print("Logistic Regression model accuracy(in %):",
metrics.accuracy_score(y_test, y_pred)*100)
Xulosa
Logistik regressiya kuchli vositadir, ayniqsa epidemiologik tadqiqotlarda, bir vaqtning o'zida bir nechta tushuntirish o'zgaruvchilarini tahlil qilishga imkon beradi, shu bilan birga chalkash omillar ta'sirini kamaytiradi. Biroq, tadqiqotchilar modellarni yaratishga e'tibor berishlari kerak, dasturiy ta'minotni faqat xom ma'lumotlar bilan oziqlantirishdan va natijalarga erishishdan qochish kerak. Model qurish bo'yicha ba'zi qiyin qarorlar butunlay tadqiqotchining ushbu sohadagi tajribasiga bog'liq bo'ladi.
Logistik regressiya haqida ba'zi fikrlar:
Bog'liq o'zgaruvchi va mustaqil o'zgaruvchilar o'rtasida chiziqli bog'liqlik mavjud emas, lekin tushuntiruvchi o'zgaruvchilar logiti va javob o'rtasida chiziqli bog'liqlik mavjud .
Mustaqil o'zgaruvchilar, hatto dastlabki mustaqil o'zgaruvchilarning kuch atamalari yoki boshqa ba'zi bir chiziqli bo'lmagan o'zgarishlar bo'lishi mumkin.
Bog'liq o'zgaruvchini odatda taqsimlash shart emas, lekin u odatda eksponent oiladan (masalan, binomial, Poisson, multinomial, normal,…) taqsimotni qabul qiladi; ikkilik logistik regressiya javobning binomial taqsimlanishini o'z zimmasiga oladi.
Variantlarning bir xilligi qondirilishi shart emas.
Xatolar mustaqil bo'lishi kerak, lekin odatda taqsimlanmaydi.
Parametrlarni baholash uchun oddiy kichik kvadratchalar (OLS) emas, balki maksimal ehtimollik bahosidan (MLE) foydalanadi va shuning uchun katta namunali taxminlarga tayanadi .
Foydalanilgan adabiyotlar
http://cs229.stanford.edu/notes/cs229-notes1.pdf
http://machinelearningmastery.com/logistic-regression-for-machine-learning/
https://onlinecourses.science.psu.edu/stat504/node/164
Do'stlaringiz bilan baham: |