728x90
반응형
In [150]:
# 2차원 공간 상에 랜덤 변수 뿌리기 ( 1- 100)
from random import randint
# generating
# D =>0 : (1,1) / 1: (2,2)
# C => 0:0 1:1
data = list()
category = list()
N = 20
for i in range(N):
data.append((randint(1,100), randint(1,100)))
category.append(1 if sum(data[i]) > 100 else 0)
# 100 이상 1, 아니면 0
In [151]:
category
Out[151]:
In [177]:
from math import sqrt
def euclidean(x,y):
return sqrt((x[0] - y[0])**2 + (x[1]-y[1])**2)
In [178]:
import matplotlib.pyplot as plt
x = [row[0] for row in data]
y = [row[1] for row in data]
similarity = list()
sample = (50,50)
plt.rcParams["figure.figsize"] = (5,5) # 정사각형
colorMap = ["b","r"]
for i in range(N):
similarity.append((i,euclidean(data[i],sample)))
plt.scatter(x[i],y[i],facecolor="none",edgecolors=colorMap[category[i]])
plt.scatter(sample[0], sample[1], edgecolors='k' )
plt.show()
In [179]:
K = 3
candidateList = list()
for i, (idx,dist) in enumerate(sorted(similarity, key = lambda x:x[1])):
if (i+1) > K: # 가까운 K개만 뽑아냄
break
candidateList.append(idx)
In [181]:
list(enumerate(sorted(similarity, key = lambda x:x[1])))
Out[181]:
In [155]:
# 가장 가까운 점들
candidateList
Out[155]:
In [156]:
candidateCategory = list([0] * len(set(category)))
for idx in candidateList:
candidateCategory[category[idx]] += 1
In [157]:
# 0 : 2개 , 1 : 1개
candidateCategory
Out[157]:
In [159]:
# candidateCategory.index(max(candidateCategory))
colorMap = ["b","r"]
for i in range(N):
if i in candidateList:
plt.scatter(x[i],y[i],facecolor="g",edgecolors=colorMap[category[i]])
else:
plt.scatter(x[i],y[i],facecolor="none",edgecolors=colorMap[category[i]])
plt.text(x[i]+2,y[i]+1, "({0} :({1},{2})={3:.2f})".format(i,x[i],y[i],similarity[i][1]))
plt.scatter(sample[0], sample[1], facecolors=colorMap[candidateCategory.index(max(candidateCategory))],edgecolors='k' )
plt.show()
In [182]:
# cosine similarity
def cosine(x,y):
innerProduct = 0
xLength = 0.0
yLength = 0.0
for i in range(len(x)):
innerProduct += x[i] * y[i]
xLength = euclidean(x,(0,0))
yLength = euclidean(y,(0,0))
return innerProduct/(xLength*yLength)
In [183]:
import matplotlib.pyplot as plt
x = [row[0] for row in data]
y = [row[1] for row in data]
similarity = list()
sample = (50,50)
plt.rcParams["figure.figsize"] = (5,5) # 정사각형
colorMap = ["b","r"]
for i in range(N):
similarity.append((i,cosine(data[i],sample)))
plt.scatter(x[i],y[i],facecolor="none",edgecolors=colorMap[category[i]])
plt.scatter(sample[0], sample[1], edgecolors='k' )
plt.show()
In [174]:
# distance와는 다르게 cosine은 값이 높은게 좋은 것
K = 3
candidateList = list()
for i, (idx,dist) in enumerate(sorted(similarity, key = lambda x:x[1],reverse=True)):
if (i+1) > K: # 가까운 K개만 뽑아냄
break
candidateList.append(idx)
In [175]:
candidateCategory = list([0] * len(set(category)))
for idx in candidateList:
candidateCategory[category[idx]] += 1
In [176]:
# Linear 한 선 쪽으로 점이 색칠되게 된다.
# cosine = 1 이 제일 좋은 것.
colorMap = ["b","r"]
for i in range(N):
if i in candidateList:
plt.scatter(x[i],y[i],facecolor="g",edgecolors=colorMap[category[i]])
else:
plt.scatter(x[i],y[i],facecolor="none",edgecolors=colorMap[category[i]])
plt.text(x[i]+2,y[i]+1, "({0} :({1},{2})={3:.2f})".format(i,x[i],y[i],similarity[i][1]))
plt.scatter(sample[0], sample[1], facecolors=colorMap[candidateCategory.index(max(candidateCategory))],edgecolors='k' )
plt.show()
In [ ]:
In [ ]:
728x90
반응형