# -*- coding: utf-8 -*-
"""
Created on Thu Feb 7 23:20:11 2019
@author: jhlee
"""
from gensim.models import Word2Vec
import pyodbc
import sys
conn = pyodbc.connect(driver='{SQL Server}', host='192.168.0.1', database='gamelog', user='id', password='pw')
cursor = conn.cursor()
sql = """
select top 10000
msg
, count(*) cnt
from dbo.sentences
where 1=1
and date_key = '20190207'
and hh = 14
group by
msg
order by cnt desc
"""
#sql = sql.replace("@date_key", sys.argv[1])
cursor.execute(sql)
row = cursor.fetchone()
i = 1
sentences = []
while row:
#print(row[0]) #msg
sentences.append(row[0].split(" "))
i = i + 1
row = cursor.fetchone()
cursor.close()
#print(sentences[0])
#["''", '', '覲']
model = Word2Vec(sentences, size=100, batch_words=10, min_count=50)
model.init_sims(replace=True)
model.wv.most_similar("覲")