-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathload_model.py
89 lines (60 loc) · 2.46 KB
/
load_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import tensorflow as tf
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
lyrics_file = input('Enter the name of the lyrics file: ')
# Load the dataset
data = open(f'./Lyrics/{lyrics_file}.txt', encoding="utf8").read()
# Lowercase and split the text
corpus = data.lower().split("\n")
# Preview the result
print(corpus)
# Initialize the Tokenizer class and generate the word index dictionary
tokenizer = Tokenizer()
tokenizer.fit_on_texts(corpus)
# Define the total words. You add 1 for the index `0` which is just the padding token.
total_words = len(tokenizer.word_index) + 1
print(f'word index dictionary: {tokenizer.word_index}')
print(f'total words: {total_words}')
# Initialize the sequences list
input_sequences = []
# Loop over every line
for line in corpus:
# Tokenize the current line
token_list = tokenizer.texts_to_sequences([line])[0]
# Loop over the line several times to generate the subphrases
for i in range(1, len(token_list)):
# Generate the subphrase
n_gram_sequence = token_list[:i+1]
# Append the subphrase to the sequences list
input_sequences.append(n_gram_sequence)
# Get the length of the longest line
max_sequence_len = max([len(x) for x in input_sequences])
model_name = input('Enter the name of the model to load: ')
model = tf.keras.models.load_model(f'Models/{model_name}')
model.summary()
# Define seed text
seed_text = input('Enter a seed phrase to start lyrics generation from: ')
# Define total words to predict
next_words = 500
# Loop until desired length is reached
for _ in range(next_words):
# Convert the seed text to a token sequence
token_list = tokenizer.texts_to_sequences([seed_text])[0]
# Pad the sequence
token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
# Feed to the model and get the probabilities for each index
probabilities = model.predict(token_list)
# Get the index with the highest probability
predicted = np.argmax(probabilities, axis=-1)[0]
# Ignore if index is 0 because that is just the padding.
if predicted != 0:
# Look up the word associated with the index.
output_word = tokenizer.index_word[predicted]
# Combine with the seed text
seed_text += " " + output_word
# Print the result
print(seed_text)