data = load_breast_cancer()
X, y = scale(data['data'][:, :2]), data['target']
# 求出两个维度对应的数据在逻辑回归算法下的最优解
lr = LogisticRegression(fit_intercept=False)
lr.fit(X, y)
# 分别把两个维度所对应的参数 W1 和 W2 取出来
theta1 = lr.coef_[0, 0]
theta2 = lr.coef_[0, 1]
print(theta1, theta2)
# 已知 W1 和 W2 的情况下,传进来数据的 X,返回数据的 y_predict
def p_theta_function(features, w1, w2):
z = w1 * features[0] + w2 * features[1]
return 1 / (1 + np.exp(-z))
def loss_function(samples_features, samples_labels, w1, w2):
result = 0
# 遍历数据集中的每一条样本,并且计算每条样本的损失,加到 result 身上得到整体的数据集损失
for features, label in zip(samples_features, samples_labels):
# 这是计算一条样本的 y_predict
p_result = p_theta_function(features, w1, w2)
loss_result = -1 * label * np.log(p_result) - (1 - label) * np.log(1 - p_result)
result += loss_result
return result
theta1_space = np.linspace(theta1 - 0.6, theta1 + 0.6, 50)
theta2_space = np.linspace(theta2 - 0.6, theta2 + 0.6, 50)
result1_ = np.array([loss_function(X, y, i, theta2) for i in theta1_space])
result2_ = np.array([loss_function(X, y, theta1, i) for i in theta2_space])
plt.show()
z = w1 * features[0] + w2 * features[1]
features最终传入的是x,w1对应的是x的第一列,w2对应的是x的第二列。应该是features[:,0],为什么写成【0】