# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.datasets.samples_generator import make_blobs
from sklearn.metrics import pairwise_distances_argmin
sns.set()
def find_clusters(X, n_clusters, rseed=2):
rng = np.random.RandomState(rseed)
# 随机选择簇中心点
print('rng.permutation(X.shape[0])[:n_clusters]:\n', rng.permutation(X.shape[0])[:n_clusters])
i = rng.permutation(X.shape[0])[:n_clusters] # 随机排序X.shape[0](300个点,取出前nclusters个)
centers = X[i] # 随机选出这四个点坐标
print('centers:\n', centers)
while True: # 一直执行
# 基于最近中心指定标签
labels = pairwise_distances_argmin(X, centers)
# print('labels= ', labels) # 是一个表示分类的列表
# 根据各个分类里的点平均值找到新的中心
new_centers = np.array([X[labels == i].mean(0) for i in range(n_clusters)]) # .mean(0)跨行求平均
# print('new_centers:\n', new_centers)
if np.all(centers == new_centers):
break
centers = new_centers
return centers, labels
X, y_true = make_blobs(n_samples=300, centers=4, cluster_std=0.60, random_state=0)
centers, labels = find_clusters(X, 4)
plt.scatter(X[:, 0], X[:, 1], c=labels,
s=50, cmap='viridis')
plt.show()
矩阵坐标的指定
# -*- coding: utf-8 -*-
import numpy as np
data = np.array([[1, 2, 3],
[4, 5, 6],
[7, 8, 9]])
i = [0, 1, 2]
j = np.array([2, 1, 0])
print('data[i, j]:\n', data[i, j]) # 指定矩阵的坐标的时候,记住需要是numpy的array而不是python自己的列表格式
高斯基函数的正则化
# -*- coding: UTF-8 -*-
from sklearn.pipeline import make_pipeline
import numpy as np
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
from sklearn.base import BaseEstimator, TransformerMixin # 估算器,转换器
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
class GaussianFeatures(BaseEstimator, TransformerMixin):
def __init__(self, N, width_factor=2.0):
self.N = N
self.width_factor = width_factor
@staticmethod # 调用静态方法,可以不实例化
def _gauss_basis(x, y, width, axis=None):
arg = (x - y) / width
print('x:\n', x)
print('y:\n', y)
print('width:\n', width)
print('arg = (x - y) / width:\n', ((x - y) / width))
print('np.exp(-0.5 * np.sum(arg ** 2, axis)).shape: ', np.exp(-0.5 * np.sum(arg ** 2, axis)).shape)
return np.exp(-0.5 * np.sum(arg ** 2, axis)) # 列向求和
def fit(self, X, y=None): # 学习部分
# 在数据区间内创建N个高斯分布中心
self.centers_ = np.linspace(X.min(), X.max(), self.N) # 沿x轴均分点形成的数组
self.width_ = self.width_factor * (self.centers_[1] - self.centers_[0]) # 沿x轴均分点的间距*宽度系数
# print('self.width_:', self.width_)
return self # 返回类对象自己
def transform(self, X): # 预测部分
print('transform.shape: ', self._gauss_basis(X[:, :, np.newaxis], self.centers_,
self.width_, axis=1).shape)
return self._gauss_basis(X[:, :, np.newaxis], self.centers_,
self.width_, axis=1) # 列向
rng = np.random.RandomState(1)
x = 10 * rng.rand(50) # 制作50个随机数
y = np.sin(x) + 0.1 * rng.randn(50) # 目标数组
xfit = np.linspace(0, 10, 1000) # 用做预测的数据
'''
# 预定义模型
gauss_model = make_pipeline(GaussianFeatures(20),
LinearRegression())
print('=========================================================')
gauss_model.fit(x[:, np.newaxis], y) # 代入转置后的x矩阵进行学习
print('---------------------------------------------------------')
yfit = gauss_model.predict(xfit[:, np.newaxis]) # 预测结果,得到y值
print('=========================================================')
print('yfit.shape:', yfit.shape)
plt.scatter(x, y) # 学习数据
plt.plot(xfit, yfit) # 预测效果曲线
plt.xlim(0, 10)
'''
def basis_plot(model, title=None):
fig, ax = plt.subplots(2, sharex=True)
model.fit(x[:, np.newaxis], y)
ax[0].scatter(x, y)
ax[0].plot(xfit, model.predict(xfit[:, np.newaxis]))
ax[0].set(xlabel='x', ylabel='y', ylim=(-1.5, 1.5))
if title:
ax[0].set_title(title)
ax[1].plot(model.steps[0][1].centers_, # model.steps[0][1] 按步骤定位到GaussianFeatures对象
model.steps[1][1].coef_) # model.steps[1][1] 按步骤定位到ridge对象
print('model.steps[0][1].centers_,: \n', model.steps[0][1])
print('model.steps[1][1].coef_: \n', model.steps[1][1])
ax[1].set(xlabel='basis location',
ylabel='coefficient',
xlim=(0, 10))
# model = make_pipeline(GaussianFeatures(30), LinearRegression())
# basis_plot(model)
# model = make_pipeline(GaussianFeatures(30), Ridge(alpha=0.1)) # 用带正则化的岭回归
# basis_plot(model, title='Ridge Regression')
model = make_pipeline(GaussianFeatures(30), Lasso(alpha=0.001))
basis_plot(model, title='Lasso Regression')
plt.show()
lasso更倾向于把系数设为0, 系数越高表示越过拟合
高斯基函数拟合正弦曲线
# -*- coding: UTF-8 -*-
from sklearn.pipeline import make_pipeline
import numpy as np
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
from sklearn.base import BaseEstimator, TransformerMixin # 估算器,转换器
class GaussianFeatures(BaseEstimator, TransformerMixin):
def __init__(self, N, width_factor=2.0):
self.N = N
self.width_factor = width_factor
@staticmethod # 调用静态方法,可以不实例化
def _gauss_basis(x, y, width, axis=None):
arg = (x - y) / width
print('x:\n', x)
print('y:\n', y)
print('width:\n', width)
print('arg = (x - y) / width:\n', ((x - y) / width))
print('np.exp(-0.5 * np.sum(arg ** 2, axis)).shape: ', np.exp(-0.5 * np.sum(arg ** 2, axis)).shape)
return np.exp(-0.5 * np.sum(arg ** 2, axis)) # 列向求和
def fit(self, X, y=None): # 学习部分
# 在数据区间内创建N个高斯分布中心
self.centers_ = np.linspace(X.min(), X.max(), self.N) # 沿x轴均分点形成的数组
self.width_ = self.width_factor * (self.centers_[1] - self.centers_[0]) # 沿x轴均分点的间距*宽度系数
# print('self.width_:', self.width_)
return self # 返回类对象自己
def transform(self, X): # 预测部分
return self._gauss_basis(X[:, :, np.newaxis], self.centers_,
self.width_, axis=1) # 列向
# 预定义模型
gauss_model = make_pipeline(GaussianFeatures(20),
LinearRegression())
rng = np.random.RandomState(1)
x = 10 * rng.rand(50) # 制作50个随机数
y = np.sin(x) + 0.1 * rng.randn(50) # 目标数组
print('=========================================================')
gauss_model.fit(x[:, np.newaxis], y) # 代入转置后的x矩阵进行学习
xfit = np.linspace(0, 10, 1000) # 用做预测的数据
print('---------------------------------------------------------')
yfit = gauss_model.predict(xfit[:, np.newaxis]) # 预测结果,得到y值
print('=========================================================')
print('yfit.shape:', yfit.shape)
plt.scatter(x, y) # 学习数据
plt.plot(xfit, yfit) # 预测效果曲线
plt.xlim(0, 10)
plt.show()
高斯基底函数参照:
https://gihyo.jp/assets/images/dev/serial/01/machine-learning/0009/002.png
装饰器的运用示例
# -*- coding: UTF-8 -*-
def log(func):
def wrapper(*arg, **kw):
print('Start %s: ' % func)
print('arg: ', arg)
print('*arg: ', *arg)
print('kw: ', kw)
print('*kw: ', *kw)
return func(*arg, **kw)
return wrapper
@log # 用上log装饰器
def func_a(*arg, **kw):
print('------------------')
print('ongoing func_a')
def func_b(*arg, **kw):
print('------------------')
print('ongoing func_b')
func_a(1, 2, 3, 5, 6, 7, a=1, b=2)
func_b(1, 2, 3, 5, 6, 7, a=1, b=2)
