wordsList = np.load('training_data/wordsList.npy')
wordsList = wordsList.tolist() #Originally loaded as numpy array
wordsList = [word.decode('UTF-8') for word in wordsList] #Encode words as UTF-8
wordVectors = np.load('training_data/wordVectors.npy')
loaded some positiveFiles and negativeFiles in the variable
with tf.device('/gpu:0'):
ids = np.zeros((numFiles, maxSeqLength), dtype='int32')
fileCounter = 0
for pf in positiveFiles:
with open(pf, "r") as f:
indexCounter = 0
cleanedLine = cleanSentences(line)
split = cleanedLine.split()
for word in split:
ids[fileCounter][indexCounter] = wordsList.index(word)
except ValueError:
ids[fileCounter][indexCounter] = 399999 #Vector for unkown words
#print('value :' + str(ids))
indexCounter = indexCounter + 1
if indexCounter >= maxSeqLength:
fileCounter = fileCounter + 1
for nf in negativeFiles:
with open(nf, "r") as f:
indexCounter = 0
cleanedLine = cleanSentences(line)
split = cleanedLine.split()
for word in split:
ids[fileCounter][indexCounter] = wordsList.index(word)
except ValueError:
ids[fileCounter][indexCounter] = 399999 #Vector for unkown words
# print('value :' + str(ids))
indexCounter = indexCounter + 1
if indexCounter >= maxSeqLength:
fileCounter = fileCounter + 1
#Pass into embedding function and see if it evaluates.
np.save('idsMatrix', ids)
batchSize = 24
Training and testing methods
def getTrainBatch():
labels = []
arr = np.zeros([batchSize, maxSeqLength])
for i in range(batchSize):
if (i % 2 == 0):
num = randint(1,11499)
num = randint(13499,24999)
arr[i] = ids[num-1:num]
return arr, labels
def getTestBatch():
labels = []
arr = np.zeros([batchSize, maxSeqLength])
for i in range(batchSize):
num = randint(11499,13499)
if (num <= 12499):
arr[i] = ids[num-1:num]
return arr, labels
with tf.device('/gpu:0'):
batchSize = 24
lstmUnits = 64
numClasses = 2
iterations = 100000
labels = tf.placeholder(tf.float32, [batchSize, numClasses])
input_data = tf.placeholder(tf.int32, [batchSize, maxSeqLength])
data = tf.Variable(tf.zeros([batchSize, maxSeqLength, numDimensions]), dtype=tf.float32)
data = tf.nn.embedding_lookup(wordVectors, input_data)
lstmCell = tf.contrib.rnn.BasicLSTMCell(lstmUnits)
lstmCell = tf.contrib.rnn.DropoutWrapper(cell=lstmCell, output_keep_prob=0.75)
value, _ = tf.nn.dynamic_rnn(lstmCell, data, dtype=tf.float32)
with tf.device('/gpu:0'):
weight = tf.Variable(tf.truncated_normal([lstmUnits, numClasses]))
bias = tf.Variable(tf.constant(0.1, shape=[numClasses]))
value = tf.transpose(value, [1, 0, 2])
last = tf.gather(value, int(value.get_shape()[0]) - 1)
prediction = (tf.matmul(last, weight) + bias)
correctPred = tf.equal(tf.argmax(prediction,1), tf.argmax(labels,1))
accuracy = tf.reduce_mean(tf.cast(correctPred, tf.float32))
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=labels))
optimizer = tf.train.AdamOptimizer().minimize(loss)
sess = tf.InteractiveSession()
saver = tf.train.Saver()
with tf.device('/gpu:0'):
for i in range(iterations):
nextBatch, nextBatchLabels = getTrainBatch();
sess.run(optimizer, {input_data: nextBatch, labels: nextBatchLabels})
iterations = 10
for i in range(iterations):
nextBatch, nextBatchLabels = getTestBatch();
sess.run(accuracy, {input_data: nextBatch, labels: nextBatchLabels})
Here I am trying to predict the output in the form of 1 or 0 for a given sentence. after loading this file from the checkpoint by this..How am I suppose to test the sentence is Positive(1) or Negative(0).
new_saver = tf.train.import_meta_graph('models/pretrained....')
new_saver.restore(sess, tf.train.latest_checkpoint('models/./'))
Please help.
Use naming for inputs and output, then retrieve the tensor from graph to do prediction; I have suggested few required changes and additional code to get prediction going
input_data = tf.placeholder(tf.int32, [batchSize, maxSeqLength], name='inputs')
prediction = (tf.matmul(last, weight) + bias)
# you may use softmax if you want probabilities for prediction, but not for calculating the loss
# prediction = tf.nn.softmax(prediction)
prediction = tf.identity(prediction, name='prediction')
with tf.device('/gpu:0'):
for i in range(iterations):
nextBatch, nextBatchLabels = getTrainBatch();
sess.run(optimizer, {input_data: nextBatch, labels: nextBatchLabels}
saver.save(sess, 'model')
code for restoring: here use the relative/absolute path to model.meta and model
new_saver = tf.train.import_meta_graph('/path/to/model.meta')
new_saver.restore(sess, '/path/to/model')
with tf.Session() as sess:
g = tf.get_default_graph()
inputs = g.get_tensor_by_name('inputs:0')
prediction = g.get_tensor_by_name('prediction:0')
prediction_ = sess.run(prediction, {inputs: your_inputs})
