In [107]: # zadatak 1 %matplotlib inline import matplotlib.pyplot as plt import numpy as np In [108]: # Napraviti rucno skup od 10 elemenata. # Svaki element ima dva obeležja, npr. koordinate (x1 i x2) X = np.array([[-1,-8],[-1,-3],[-2,-4],[-9,-6],[-7,-1],[2,5],[4,5],[6,1],[3,7],[7,3]]) In [109]: # Neka 5 elemenata pripada klasi 1, a drugih 5 klasi 2 # Vektor Y oznacava pripadnost klasi. Y = np.array([1,1,1,1,1,2,2,2,2,2]) In [110]: # Generisati 2D Gausovsku rapodelu. # Napraviti 3 linearno separabilna klastera, tako da svaki ima po # 400 elemenata za trening i po 100 elemenata za test. mean=(0,0) cov=([1,-0.5],[-0.5,1]) x1,y1=np.random.multivariate_normal(mean, cov, 400).T x1_test,y1_test=np.random.multivariate_normal(mean, cov, 100).T In [111]: mean=(3,3) cov=([1,-0.5],[-0.5,1]) x2,y2=np.random.multivariate_normal(mean, cov, 400).T x2_test,y2_test=np.random.multivariate_normal(mean, cov, 100).T In [112]: mean=(6,6) cov=([1,-0.5],[-0.5,1]) x3,y3=np.random.multivariate_normal(mean, cov, 400).T x3_test,y3_test=np.random.multivariate_normal(mean, cov, 100).T In [113]: plt.scatter(x1,y1) plt.scatter(x1_test,y1_test, c='g') plt.scatter(x2,y2, c='c') plt.scatter(x2_test,y2_test, c='r') plt.scatter(x3,y3, c='y') plt.scatter(x3_test,y3_test, c='m') Out[113]: <matplotlib.collections.pathcollection at 0xd110400>
In [114]: # Isto, samo sa 10-20% preklapanja: mean=(0,0) cov=([1,0],[0,1]) x1,y1=np.random.multivariate_normal(mean, cov, 400).T x1_test,y1_test=np.random.multivariate_normal(mean, cov, 100).T mean=(3,3) cov=([1,0],[0,1]) x2,y2=np.random.multivariate_normal(mean, cov, 400).T x2_test,y2_test=np.random.multivariate_normal(mean, cov, 100).T mean=(6,6) cov=([1,0],[0,1]) x3,y3=np.random.multivariate_normal(mean, cov, 400).T x3_test,y3_test=np.random.multivariate_normal(mean, cov, 100).T plt.scatter(x1,y1) plt.scatter(x1_test,y1_test, c='g') plt.scatter(x2,y2, c='c') plt.scatter(x2_test,y2_test, c='r') plt.scatter(x3,y3, c='y') plt.scatter(x3_test,y3_test, c='m') Out[114]: <matplotlib.collections.pathcollection at 0xd1ef1d0> In [115]:
# zadatak2 # Kreirati LDA klasifikatore koji ce razdvojiti tri data skupa # podataka (iz Zadatka 1.) # Proveriti uspešnost klasifikatora korišcenjem test primera In [116]: from sklearn.discriminant_analysis import LinearDiscriminantAnalysis In [117]: clf=lineardiscriminantanalysis() In [118]: X1=np.hstack([x1,x2,x3]) Y1=np.hstack([y1,y2,y3]) XX=np.vstack([X1,Y1]) XX=XX.T print(xx) [[-1.79895372 0.65528028] [ 0.31695104 0.32787976] [-0.30508304 0.72425341]..., [ 5.1702668 5.92390638] [ 4.68542117 6.44381198] [ 5.5227917 6.28008497]] In [119]: X_test=np.hstack([x1_test,x2_test,x3_test]) Y_test=np.hstack([y1_test,y2_test,y3_test]) XX_test=np.vstack([X_test,Y_test]) XX_test=XX_test.T XX_test.shape Out[119]: (300, 2) In [120]: plt.scatter(xx_test[0:100,0],xx_test[0:100,1]) plt.scatter(xx_test[100:200,0],xx_test[100:200,1], c='m') plt.scatter(xx_test[200:,0],xx_test[200:,1], c='k') Out[120]: <matplotlib.collections.pathcollection at 0xd2879e8>
In [121]: y = np.ones(1200) In [122]: y[600:]=2 In [123]: y.shape Out[123]: (1200,) In [124]: clf.fit(xx,y) Out[124]: LinearDiscriminantAnalysis(n_components=None, priors=none, shrinkage=none, solver='svd', store_covariance=false, tol=0.0001) In [125]: y_predicted = clf.predict(xx_test) In [126]: plt.stem(y_predicted) Out[126]: <Container object of 3 artists> In [127]: # Zadatak 3 from sklearn import datasets import sklearn as sk
from sklearn.cluster import KMeans In [128]: iris = datasets.load_iris() In [129]: X = iris.data[:, :2] In [130]: Y = iris.target In [131]: ind_all = np.arange(0,150) In [132]: np.random.shuffle(ind_all) ind_training = ind_all[0:120] In [133]: ind_test=ind_all[120:150] In [134]: plt.stem(ind_training,np.ones(len(ind_training))) plt.stem(ind_test,np.ones(len(ind_test))*(-1),'r') Out[134]: <Container object of 3 artists> In [135]: # Zadatak 4 - za skupove podataka iz zadatka 1, primeniti kmeans. In [136]: from sklearn.cluster import KMeans In [137]:
In [137]: km=kmeans(n_clusters=2).fit(xx_test) In [138]: km.cluster_centers_ Out[138]: array([[ 5.56448898, 5.50782634], [ 1.1474626, 1.21824887]]) In [139]: km.labels_==1 Out[139]: array([ True, True, False, False, True, False, True, True, False, False, True, False, True, True, True, False, True, True, True, False, True, True, True, True, True, True, True, True, True, True, True, True, False, False, False, False, True, False, True, True, True, True, False, True, True, True, True, True, True, True, True, True, True, False, True, True, True, True, False, False, False, True, False, False, True, True, True, True, True, False, True, True, True, True, True, False, False, True, False, True, True, True, True, False, False, False, False, False, False, False, False, False, False], dtype=bool) In [140]: plt.scatter(xx_test[km.labels_==0,0],xx_test[km.labels_==0,1]) plt.scatter(xx_test[km.labels_==1,0],xx_test[km.labels_==1,1], c='r') plt.scatter(xx_test[km.labels_==2,0],xx_test[km.labels_==2,1], c='g') Out[140]: <matplotlib.collections.pathcollection at 0xec2ab00>
In [141]: km=kmeans(n_clusters=3).fit(xx_test) In [142]: plt.scatter(xx_test[km.labels_==0,0],xx_test[km.labels_==0,1]) plt.scatter(xx_test[km.labels_==1,0],xx_test[km.labels_==1,1], c='r') plt.scatter(xx_test[km.labels_==2,0],xx_test[km.labels_==2,1], c='g') Out[142]: <matplotlib.collections.pathcollection at 0xecc2780> In [143]: km=kmeans(n_clusters=4).fit(xx_test) In [144]: plt.scatter(xx_test[km.labels_==0,0],xx_test[km.labels_==0,1]) plt.scatter(xx_test[km.labels_==1,0],xx_test[km.labels_==1,1], c='r') plt.scatter(xx_test[km.labels_==2,0],xx_test[km.labels_==2,1], c='g') Out[144]: <matplotlib.collections.pathcollection at 0xed55a20>
In [145]: # Algoritam knn za zadatak 1 In [146]: from sklearn.neighbors import KNeighborsClassifier In [147]: knn=kneighborsclassifier(n_neighbors=3) In [148]: knn.fit(xx,y) Out[148]: KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski', metric_params=none, n_jobs=1, n_neighbors=3, p=2, weights='uniform') In [149]: knn.predict(xx_test) Out[149]: array([ 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 1., 1., 1., 1., 1., 2., 1., 2., 1., 1., 2., 1., 1., 1., 2., 2., 1., 1., 1., 1., 1., 2., 1., 2., 1., 1., 2., 1., 1., 2., 2., 1., 2., 2., 1., 2., 1., 2., 2., 1., 2., 1., 1., 1., 2., 2., 2., 1., 1., 1., 1., 2., 1., 1., 2., 1., 1., 2., 1., 1., 2., 2., 2., 1., 1., 2., 2., 1., 2., 1., 1., 2., 1., 1., 1., 2., 2., 2., 1., 1., 1., 1., 1., 2., 1., 1., 2., 1., 2., 1., 1., 2., 2., 2., 1., 2., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
]) 2., 2., 2., 2., 2., 2., 1., 2., 2., 2., 2., 2., 2., 2. In [150]: # OK In [ ]: