Contents

1
2 覈 /覿り鍵
3 覯貅觚襴 蟆襷...
4 覓語 觜蟲
5 谿瑚襭


1 #

# -*- coding: utf-8 -*-
"""
Created on Thu Feb  7 23:20:11 2019

@author: jhlee
"""

from gensim.models import Word2Vec
import pyodbc
import sys

conn = pyodbc.connect(driver='{SQL Server}', host='192.168.0.1', database='gamelog', user='id', password='pw')
cursor = conn.cursor()

sql = """
    select top 10000
        msg
    ,   count(*) cnt
    from dbo.sentences
    where 1=1
    and date_key = '20190207'
    and hh = 14
    group by
        msg
    order by cnt desc
"""
#sql = sql.replace("@date_key", sys.argv[1])

cursor.execute(sql) 
row = cursor.fetchone() 
i = 1
sentences = []
while row: 
    #print(row[0]) #msg  
    sentences.append(row[0].split(" "))
    i = i + 1
    row = cursor.fetchone()
    
cursor.close()

#print(sentences[0])
#["''", '', '覲']
model = Word2Vec(sentences, size=100, batch_words=10, min_count=50)
model.init_sims(replace=True)
model.wv.most_similar("覲")

2 覈 /覿り鍵 #

#覈 /覿り鍵
model.save("gold_dealer_model")
model = Word2Vec.load("gold_dealer_model")

3 覯貅觚襴 蟆襷... #

s1 = list(filter(lambda x: x in model.wv.vocab, s1.split(" ")))
s2 = list(filter(lambda x: x in model.wv.vocab, s2.split(" ")))

4 覓語 觜蟲 #

s1 = 'the first sentence'
s2 = 'the second text'

model.wv.wmdistance(s1, s2)



5 谿瑚襭 #