-
Notifications
You must be signed in to change notification settings - Fork 14
Added new project #6
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
5bad8f6
b438df6
eecc00a
ac82749
03716ea
d0a0239
225e493
b31cfc3
6b754e2
2980068
bb5e904
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,113 @@ | ||
| import random | ||
|
|
||
| def choose_word(): | ||
| words = ["python", "hangman", "computer", "programming", "algorithm", "variable", "function", "loop", "condition", "string"] | ||
| return random.choice(words).upper() | ||
|
|
||
| def display_hangman(tries): | ||
| stages = [ | ||
| """ | ||
| -------- | ||
| | | | ||
| | O | ||
| | \\|/ | ||
| | | | ||
| | / \\ | ||
| - | ||
| """, | ||
| """ | ||
| -------- | ||
| | | | ||
| | O | ||
| | \\|/ | ||
| | | | ||
| | / | ||
| - | ||
| """, | ||
| """ | ||
| -------- | ||
| | | | ||
| | O | ||
| | \\|/ | ||
| | | | ||
| | | ||
| - | ||
| """, | ||
| """ | ||
| -------- | ||
| | | | ||
| | O | ||
| | \\| | ||
| | | | ||
| | | ||
| - | ||
| """, | ||
| """ | ||
| -------- | ||
| | | | ||
| | O | ||
| | | | ||
| | | | ||
| | | ||
| - | ||
| """, | ||
| """ | ||
| -------- | ||
| | | | ||
| | O | ||
| | | ||
| | | ||
| | | ||
| - | ||
| """, | ||
| """ | ||
| -------- | ||
| | | | ||
| | | ||
| | | ||
| | | ||
| | | ||
| - | ||
| """ | ||
| ] | ||
| return stages[tries] | ||
|
|
||
| def play_hangman(): | ||
| word = choose_word() | ||
| word_letters = set(word) | ||
| guessed_letters = set() | ||
| tries = 6 | ||
|
|
||
| print("Welcome to Hangman!") | ||
| print(display_hangman(tries)) | ||
| print("_ " * len(word)) | ||
|
|
||
| while tries > 0 and word_letters: | ||
| while True: | ||
| guess = input("Guess a letter: ").strip().upper() | ||
| if len(guess) == 1 and guess.isalpha(): | ||
| break | ||
| print("Invalid input. Please enter a single alphabetic character.") | ||
|
|
||
| if guess in guessed_letters: | ||
| print("You already guessed that letter.") | ||
| elif guess in word_letters: | ||
| word_letters.remove(guess) | ||
| guessed_letters.add(guess) | ||
| print("Good guess!") | ||
| else: | ||
| tries -= 1 | ||
| guessed_letters.add(guess) | ||
| print("Wrong guess!") | ||
|
|
||
| print(display_hangman(tries)) | ||
| word_display = [letter if letter in guessed_letters else "_" for letter in word] | ||
| print(" ".join(word_display)) | ||
|
|
||
| if not word_letters: | ||
| print(f"Congratulations! You guessed the word: {word}") | ||
| else: | ||
| print(f"Sorry, you ran out of tries. The word was: {word}") | ||
|
|
||
| if __name__ == "__main__": | ||
| play_hangman() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,112 @@ | ||
| import numpy as np | ||
| import pickle | ||
| import os | ||
|
|
||
| class QLearningAgent: | ||
| def __init__(self, alpha=0.1, gamma=0.9, epsilon=0.1): | ||
| self.alpha = alpha # Learning rate | ||
| self.gamma = gamma # Discount factor | ||
| self.epsilon = epsilon # Exploration rate | ||
| self.q_table = {} | ||
| self.load_q_table() | ||
|
|
||
| def get_state_key(self, state): | ||
| return tuple(state) | ||
|
|
||
| def get_q_value(self, state, action): | ||
| state_key = self.get_state_key(state) | ||
| if state_key not in self.q_table: | ||
| self.q_table[state_key] = np.zeros(9) | ||
| return self.q_table[state_key][action] | ||
|
|
||
| def set_q_value(self, state, action, value): | ||
| state_key = self.get_state_key(state) | ||
| if state_key not in self.q_table: | ||
| self.q_table[state_key] = np.zeros(9) | ||
| self.q_table[state_key][action] = value | ||
|
|
||
| def choose_action(self, state, available_actions): | ||
| if np.random.random() < self.epsilon: | ||
| return np.random.choice(available_actions) | ||
| else: | ||
| q_values = [self.get_q_value(state, action) for action in available_actions] | ||
| max_q = max(q_values) | ||
| best_actions = [action for action, q in zip(available_actions, q_values) if q == max_q] | ||
| return np.random.choice(best_actions) | ||
|
|
||
| def learn(self, state, action, reward, next_state, next_available_actions, done): | ||
| current_q = self.get_q_value(state, action) | ||
|
|
||
| if done: | ||
| target = reward | ||
| else: | ||
| next_q_values = [self.get_q_value(next_state, next_action) for next_action in next_available_actions] | ||
| max_next_q = max(next_q_values) if next_q_values else 0 | ||
| target = reward + self.gamma * max_next_q | ||
|
|
||
| new_q = current_q + self.alpha * (target - current_q) | ||
| self.set_q_value(state, action, new_q) | ||
|
|
||
| def save_q_table(self): | ||
| with open('q_table.pkl', 'wb') as f: | ||
| pickle.dump(self.q_table, f) | ||
| print("Q-table saved.") | ||
|
|
||
| def load_q_table(self): | ||
| if os.path.exists('q_table.pkl'): | ||
| with open('q_table.pkl', 'rb') as f: | ||
| self.q_table = pickle.load(f) | ||
| print("Q-table loaded.") | ||
| else: | ||
| print("No saved Q-table found. Starting fresh.") | ||
|
|
||
| def train(self, game, episodes=10000): | ||
| print("Training the agent...") | ||
| for episode in range(episodes): | ||
| state = game.reset() | ||
| done = False | ||
|
|
||
| while not done: | ||
| available_actions = game.get_available_actions() | ||
| action = self.choose_action(state, available_actions) | ||
|
|
||
| next_state, reward, done = game.make_move(action) | ||
| next_available_actions = game.get_available_actions() if not done else [] | ||
|
|
||
| self.learn(state, action, reward, next_state, next_available_actions, done) | ||
|
|
||
| state = next_state | ||
|
|
||
| if (episode + 1) % 1000 == 0: | ||
| print(f"Episode {episode + 1}/{episodes} completed.") | ||
|
|
||
| self.save_q_table() | ||
| print("Training completed!") | ||
|
|
||
| def play(self, game): | ||
| state = game.reset() | ||
| done = False | ||
|
|
||
| while not done: | ||
| available_actions = game.get_available_actions() | ||
| action = self.choose_action(state, available_actions) | ||
| print(f"Agent plays at position {action}") | ||
|
|
||
| next_state, reward, done = game.make_move(action) | ||
| state = next_state | ||
| game.print_board() | ||
|
|
||
| if not done: | ||
| print("\nYour turn:") | ||
| human_action = game.get_human_move() | ||
| next_state, reward, done = game.make_move(human_action) | ||
| state = next_state | ||
| game.print_board() | ||
|
|
||
| game.print_board() | ||
| if game.winner == 1: | ||
| print("Agent wins!") | ||
| elif game.winner == -1: | ||
| print("You win!") | ||
| else: | ||
| print("It's a draw!") | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,71 @@ | ||||||
| import numpy as np | ||||||
|
|
||||||
| class TicTacToe: | ||||||
| def __init__(self): | ||||||
| self.board = np.zeros((3, 3), dtype=int) | ||||||
| self.current_player = 1 # 1 for X, -1 for O | ||||||
| self.winner = None | ||||||
| self.game_over = False | ||||||
|
|
||||||
| def reset(self): | ||||||
| self.board = np.zeros((3, 3), dtype=int) | ||||||
| self.current_player = 1 | ||||||
| self.winner = None | ||||||
| self.game_over = False | ||||||
| return self.get_state() | ||||||
|
|
||||||
| def get_state(self): | ||||||
| return self.board.flatten() | ||||||
|
|
||||||
| def get_available_actions(self): | ||||||
| return [i for i in range(9) if self.board[i//3, i%3] == 0] | ||||||
|
|
||||||
| def make_move(self, action): | ||||||
| if self.game_over: | ||||||
| return self.get_state(), 0, True | ||||||
|
|
||||||
| row, col = action // 3, action % 3 | ||||||
| if self.board[row, col] != 0: | ||||||
| return self.get_state(), -10, True # Invalid move penalty | ||||||
|
|
||||||
| self.board[row, col] = self.current_player | ||||||
|
|
||||||
| if self.check_winner(): | ||||||
| self.winner = self.current_player | ||||||
| self.game_over = True | ||||||
| return self.get_state(), 1, True | ||||||
|
||||||
| return self.get_state(), 1, True | |
| return self.get_state(), self.current_player, True |
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,39 @@ | ||||||||||||||||||||||
| from game import TicTacToe | ||||||||||||||||||||||
| from agent import QLearningAgent | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| def main(): | ||||||||||||||||||||||
| game = TicTacToe() | ||||||||||||||||||||||
| agent = QLearningAgent() | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| print("Reinforcement Learning Tic-Tac-Toe Agent") | ||||||||||||||||||||||
| print("The agent uses Q-learning to play Tic-Tac-Toe against you.") | ||||||||||||||||||||||
| print() | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| while True: | ||||||||||||||||||||||
| print("Menu:") | ||||||||||||||||||||||
| print("1. Train the agent") | ||||||||||||||||||||||
| print("2. Play against the agent") | ||||||||||||||||||||||
| print("3. Exit") | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| choice = input("Enter your choice: ") | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| if choice == "1": | ||||||||||||||||||||||
| episodes = int(input("Enter number of training episodes (default 10000): ") or "10000") | ||||||||||||||||||||||
|
||||||||||||||||||||||
| episodes = int(input("Enter number of training episodes (default 10000): ") or "10000") | |
| while True: | |
| episodes_input = input("Enter number of training episodes (default 10000): ") | |
| if not episodes_input: | |
| episodes_input = "10000" | |
| try: | |
| episodes = int(episodes_input) | |
| break | |
| except ValueError: | |
| print("Invalid number. Please enter a valid integer.") |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| numpy==1.24.3 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The training loop has the agent make all moves in the game, essentially playing against itself by making both X and O moves. However, the agent only learns from its own perspective without considering that in a real game, the opponent's moves alternate with its own. This training approach may not produce an effective agent. Consider implementing proper self-play where two agents alternate, or having the agent play against a random opponent.