add python code

This commit is contained in:
2025-05-01 10:03:09 +02:00
parent 4c1207b57e
commit de6cc17ce7
2 changed files with 249 additions and 0 deletions

248
bengio.py Normal file
View File

@@ -0,0 +1,248 @@
import sys
import random
import numpy as np
class NeuralProbabilisticLanguageModel:
def __init__(self, vocab_size, embedding_dim=100, context_size=3, hidden_dim=50, learning_rate=0.01):
"""
Initialize Bengio's Neural Probabilistic Language Model
Args:
vocab_size: Size of the vocabulary
embedding_dim: Dimension of word embeddings
context_size: Number of previous words to consider
hidden_dim: Dimension of hidden layer
learning_rate: Learning rate for optimization
"""
self.vocab_size = vocab_size
self.embedding_dim = embedding_dim
self.context_size = context_size
self.hidden_dim = hidden_dim
self.learning_rate = learning_rate
# Initialize model parameters
# C: Word embedding matrix
self.C = np.random.randn(vocab_size, embedding_dim) * 0.1
# Parameters for hidden layer
self.H = np.random.randn(context_size * embedding_dim, hidden_dim) * 0.1
self.b_h = np.zeros(hidden_dim)
# Parameters for output layer
self.U = np.random.randn(hidden_dim, vocab_size) * 0.1
self.W = np.random.randn(context_size * embedding_dim, vocab_size) * 0.1
self.b_o = np.zeros(vocab_size)
def forward(self, context_words):
"""
Forward pass of the model
Args:
context_words: List of word indices for the context
Returns:
Probability distribution over next words
"""
# Lookup embeddings for context words
embeddings = self.C[context_words]
x = embeddings.flatten() # Concatenate embeddings
# Compute hidden layer activation
h = np.tanh(np.dot(x, self.H) + self.b_h)
# Compute output layer
# Direct connections from input to output (shortcut connections)
y = np.dot(h, self.U) + np.dot(x, self.W) + self.b_o
# Apply softmax to get probabilities
exp_y = np.exp(y - np.max(y)) # Subtract max for numerical stability
probabilities = exp_y / np.sum(exp_y)
return probabilities, h, x
def compute_loss(self, probabilities, target_word):
"""
Compute cross-entropy loss
Args:
probabilities: Predicted probability distribution
target_word: Index of the target word
Returns:
Cross-entropy loss
"""
return -np.log(probabilities[target_word])
def backward(self, context_words, target_word, probabilities, h, x):
"""
Backward pass for parameter updates
Args:
context_words: List of word indices for the context
target_word: Index of the target word
probabilities: Output probabilities from forward pass
h: Hidden layer activation
x: Input vector (concatenated embeddings)
"""
# Gradient for output layer
d_y = probabilities.copy()
d_y[target_word] -= 1
# Gradients for parameters
d_U = np.outer(h, d_y)
d_W = np.outer(x, d_y)
d_b_o = d_y
# Gradient for hidden layer
d_h = np.dot(d_y, self.U.T)
d_h_input = d_h * (1 - h**2) # Derivative of tanh
d_H = np.outer(x, d_h_input)
d_b_h = d_h_input
# Gradient for embeddings
d_x = np.dot(d_h_input, self.H.T) + np.dot(d_y, self.W.T)
d_C = np.zeros_like(self.C)
# Update embeddings for context words
for i, word_idx in enumerate(context_words):
start = i * self.embedding_dim
end = (i + 1) * self.embedding_dim
d_C[word_idx] += d_x[start:end]
# Update parameters
self.U -= self.learning_rate * d_U
self.W -= self.learning_rate * d_W
self.b_o -= self.learning_rate * d_b_o
self.H -= self.learning_rate * d_H
self.b_h -= self.learning_rate * d_b_h
self.C -= self.learning_rate * d_C
def train_step(self, context_words, target_word):
"""
Perform one training step
Args:
context_words: List of word indices for the context
target_word: Index of the target word
Returns:
Loss for this example
"""
probabilities, h, x = self.forward(context_words)
loss = self.compute_loss(probabilities, target_word)
self.backward(context_words, target_word, probabilities, h, x)
return loss
def train(self, data, n_epochs=5):
"""
Train the model on a dataset
Args:
data: List of (context_words, target_word) tuples
n_epochs: Number of training epochs
Returns:
List of average losses per epoch
"""
losses = []
for epoch in range(n_epochs):
epoch_loss = 0
for context_words, target_word in data:
epoch_loss += self.train_step(context_words, target_word)
avg_loss = epoch_loss / len(data)
losses.append(avg_loss)
print(f"Epoch {epoch+1}/{n_epochs}, Loss: {avg_loss:.4f}")
return losses
def predict_next_word(self, context_words, temperature=1.0):
"""
Predict the next word given a context with temperature sampling
Args:
context_words: List of word indices for the context
temperature: Controls randomness (higher = more random, lower = more deterministic)
temperature=0 is equivalent to argmax (greedy)
temperature=1.0 keeps the original distribution
Returns:
Index of the sampled next word
"""
probabilities, _, _ = self.forward(context_words)
if temperature == 0:
# Greedy sampling (argmax)
return np.argmax(probabilities)
# Apply temperature scaling
scaled_logits = np.log(probabilities) / temperature
# Re-normalize to get a valid probability distribution
exp_scaled = np.exp(scaled_logits - np.max(scaled_logits)) # Subtract max for numerical stability
scaled_probs = exp_scaled / np.sum(exp_scaled)
# Sample from the scaled distribution
return np.random.choice(len(scaled_probs), p=scaled_probs)
# Example usage
def preprocess_text(text, vocab, context_size):
"""Convert text to training examples"""
words = text.split()
word_to_idx = {word: idx for idx, word in enumerate(vocab)}
# Create training examples
examples = []
for i in range(len(words) - context_size):
context = [word_to_idx[words[i+j]] for j in range(context_size)]
target = word_to_idx[words[i+context_size]]
examples.append((context, target))
return examples
# Small example
if __name__ == "__main__":
if len(sys.argv) < 2:
print("synopsis: python3 main.py <file> <n_ctx> <n_epochs>")
exit(1)
with open(sys.argv[1], "r") as buf:
text = buf.read()
if len(sys.argv) > 2:
n_ctx = int(sys.argv[2])
else:
n_ctx = 2
if len(sys.argv) > 3:
n_epochs = int(sys.argv[3])
else:
n_epochs = 10
n_predict = 100
words = text.split()
vocab = sorted(set(words))
vocab.append(" ")
examples = preprocess_text(text, vocab, n_ctx)
model = NeuralProbabilisticLanguageModel(
vocab_size=len(vocab),
embedding_dim=10,
context_size=n_ctx,
hidden_dim=8
)
losses = model.train(examples, n_epochs=n_epochs)
# Test model prediction
pred = [vocab.index(w) for w in words[:n_ctx]]
for i in range(n_predict):
context = pred[-n_ctx:]
predicted_idx = model.predict_next_word(context)
pred.append(predicted_idx)
output = [vocab[i] for i in pred]
print(" ".join(output))

1
requirements.txt Normal file
View File

@@ -0,0 +1 @@
numpy