Ritesh-456 · Unknown27s · Jan 31, 2026 · Jan 31, 2026 · Jan 31, 2026 · Jan 31, 2026
diff --git a/Easy/26_Hangman.py b/Easy/26_Hangman.py
@@ -0,0 +1,113 @@
+import random
+
+def choose_word():
+    words = ["python", "hangman", "computer", "programming", "algorithm", "variable", "function", "loop", "condition", "string"]
+    return random.choice(words).upper()
+
+def display_hangman(tries):
+    stages = [
+        """
+           --------
+           |      |
+           |      O
+           |     \\|/
+           |      |
+           |     / \\
+           -
+        """,
+        """
+           --------
+           |      |
+           |      O
+           |     \\|/
+           |      |
+           |     /
+           -
+        """,
+        """
+           --------
+           |      |
+           |      O
+           |     \\|/
+           |      |
+           |
+           -
+        """,
+        """
+           --------
+           |      |
+           |      O
+           |     \\|
+           |      |
+           |
+           -
+        """,
+        """
+           --------
+           |      |
+           |      O
+           |      |
+           |      |
+           |
+           -
+        """,
+        """
+           --------
+           |      |
+           |      O
+           |
+           |
+           |
+           -
+        """,
+        """
+           --------
+           |      |
+           |
+           |
+           |
+           |
+           -
+        """
+    ]
+    return stages[tries]
+
+def play_hangman():
+    word = choose_word()
+    word_letters = set(word)
+    guessed_letters = set()
+    tries = 6
+
+    print("Welcome to Hangman!")
+    print(display_hangman(tries))
+    print("_ " * len(word))
+
+    while tries > 0 and word_letters:
+        while True:
+            guess = input("Guess a letter: ").strip().upper()
+            if len(guess) == 1 and guess.isalpha():
+                break
+            print("Invalid input. Please enter a single alphabetic character.")
+
+        if guess in guessed_letters:
+            print("You already guessed that letter.")
+        elif guess in word_letters:
+            word_letters.remove(guess)
+            guessed_letters.add(guess)
+            print("Good guess!")
+        else:
+            tries -= 1
+            guessed_letters.add(guess)
+            print("Wrong guess!")
+
+        print(display_hangman(tries))
+        word_display = [letter if letter in guessed_letters else "_" for letter in word]
+        print(" ".join(word_display))
+
+    if not word_letters:
+        print(f"Congratulations! You guessed the word: {word}")
+    else:
+        print(f"Sorry, you ran out of tries. The word was: {word}")
+
+if __name__ == "__main__":
+    play_hangman()
diff --git a/Expert/26_Reinforcement_Learning_Game_Agent/agent.py b/Expert/26_Reinforcement_Learning_Game_Agent/agent.py
@@ -0,0 +1,112 @@
+import numpy as np
+import pickle
+import os
+
+class QLearningAgent:
+    def __init__(self, alpha=0.1, gamma=0.9, epsilon=0.1):
+        self.alpha = alpha  # Learning rate
+        self.gamma = gamma  # Discount factor
+        self.epsilon = epsilon  # Exploration rate
+        self.q_table = {}
+        self.load_q_table()
+
+    def get_state_key(self, state):
+        return tuple(state)
+
+    def get_q_value(self, state, action):
+        state_key = self.get_state_key(state)
+        if state_key not in self.q_table:
+            self.q_table[state_key] = np.zeros(9)
+        return self.q_table[state_key][action]
+
+    def set_q_value(self, state, action, value):
+        state_key = self.get_state_key(state)
+        if state_key not in self.q_table:
+            self.q_table[state_key] = np.zeros(9)
+        self.q_table[state_key][action] = value
+
+    def choose_action(self, state, available_actions):
+        if np.random.random() < self.epsilon:
+            return np.random.choice(available_actions)
+        else:
+            q_values = [self.get_q_value(state, action) for action in available_actions]
+            max_q = max(q_values)
+            best_actions = [action for action, q in zip(available_actions, q_values) if q == max_q]
+            return np.random.choice(best_actions)
+
+    def learn(self, state, action, reward, next_state, next_available_actions, done):
+        current_q = self.get_q_value(state, action)
+
+        if done:
+            target = reward
+        else:
+            next_q_values = [self.get_q_value(next_state, next_action) for next_action in next_available_actions]
+            max_next_q = max(next_q_values) if next_q_values else 0
+            target = reward + self.gamma * max_next_q
+
+        new_q = current_q + self.alpha * (target - current_q)
+        self.set_q_value(state, action, new_q)
+
+    def save_q_table(self):
+        with open('q_table.pkl', 'wb') as f:
+            pickle.dump(self.q_table, f)
+        print("Q-table saved.")
+
+    def load_q_table(self):
+        if os.path.exists('q_table.pkl'):
+            with open('q_table.pkl', 'rb') as f:
+                self.q_table = pickle.load(f)
+            print("Q-table loaded.")
+        else:
+            print("No saved Q-table found. Starting fresh.")
+
+    def train(self, game, episodes=10000):
+        print("Training the agent...")
+        for episode in range(episodes):
+            state = game.reset()
+            done = False
+
+            while not done:
+                available_actions = game.get_available_actions()
+                action = self.choose_action(state, available_actions)
+
+                next_state, reward, done = game.make_move(action)
+                next_available_actions = game.get_available_actions() if not done else []
+
+                self.learn(state, action, reward, next_state, next_available_actions, done)
+
+                state = next_state
+
-            while not done:
-                available_actions = game.get_available_actions()
-                action = self.choose_action(state, available_actions)
-
-                next_state, reward, done = game.make_move(action)
-                next_available_actions = game.get_available_actions() if not done else []
-
-                self.learn(state, action, reward, next_state, next_available_actions, done)
-
-                state = next_state
+            # Training with alternating turns: agent vs. random opponent
+            while not done:
+                # Agent's turn
+                available_actions = game.get_available_actions()
+                action = self.choose_action(state, available_actions)
+                next_state, reward, done = game.make_move(action)
+
+                if done:
+                    # Game ended immediately after the agent's move
+                    self.learn(state, action, reward, next_state, [], True)
+                    break
+
+                # Opponent's turn (random policy)
+                opponent_available_actions = game.get_available_actions()
+                if not opponent_available_actions:
+                    # No moves left for opponent; treat as terminal from agent's perspective
+                    self.learn(state, action, reward, next_state, [], True)
+                    break
+
+                opponent_action = np.random.choice(opponent_available_actions)
+                next_state_opp, reward_opp, done = game.make_move(opponent_action)
+
+                # From the agent's perspective, the next state is after the opponent's response
+                next_available_actions = game.get_available_actions() if not done else []
+                self.learn(state, action, reward_opp, next_state_opp, next_available_actions, done)
+
+                state = next_state_opp
-            while not done:
-                available_actions = game.get_available_actions()
-                action = self.choose_action(state, available_actions)
-
-                next_state, reward, done = game.make_move(action)
-                next_available_actions = game.get_available_actions() if not done else []
-
-                self.learn(state, action, reward, next_state, next_available_actions, done)
-
-                state = next_state
+            # Training with alternating turns: agent vs. random opponent
+            while not done:
+                # Agent's turn
+                available_actions = game.get_available_actions()
+                action = self.choose_action(state, available_actions)
+                next_state, reward, done = game.make_move(action)
+
+                if done:
+                    # Game ended immediately after the agent's move
+                    self.learn(state, action, reward, next_state, [], True)
+                    break
+
+                # Opponent's turn (random policy)
+                opponent_available_actions = game.get_available_actions()
+                if not opponent_available_actions:
+                    # No moves left for opponent; treat as terminal from agent's perspective
+                    self.learn(state, action, reward, next_state, [], True)
+                    break
+
+                opponent_action = np.random.choice(opponent_available_actions)
+                next_state_opp, reward_opp, done = game.make_move(opponent_action)
+
+                # From the agent's perspective, the next state is after the opponent's response
+                next_available_actions = game.get_available_actions() if not done else []
+                self.learn(state, action, reward_opp, next_state_opp, next_available_actions, done)
+
+                state = next_state_opp
+            if (episode + 1) % 1000 == 0:
+                print(f"Episode {episode + 1}/{episodes} completed.")
+
+        self.save_q_table()
+        print("Training completed!")
+
+    def play(self, game):
+        state = game.reset()
+        done = False
+
+        while not done:
+            available_actions = game.get_available_actions()
+            action = self.choose_action(state, available_actions)
+            print(f"Agent plays at position {action}")
+
+            next_state, reward, done = game.make_move(action)
+            state = next_state
+            game.print_board()
+
+            if not done:
+                print("\nYour turn:")
+                human_action = game.get_human_move()
+                next_state, reward, done = game.make_move(human_action)
+                state = next_state
+                game.print_board()
+
+        game.print_board()
+        if game.winner == 1:
+            print("Agent wins!")
+        elif game.winner == -1:
+            print("You win!")
+        else:
+            print("It's a draw!")
diff --git a/Expert/26_Reinforcement_Learning_Game_Agent/game.py b/Expert/26_Reinforcement_Learning_Game_Agent/game.py
@@ -0,0 +1,71 @@
+import numpy as np
+
+class TicTacToe:
+    def __init__(self):
+        self.board = np.zeros((3, 3), dtype=int)
+        self.current_player = 1  # 1 for X, -1 for O
+        self.winner = None
+        self.game_over = False
+
+    def reset(self):
+        self.board = np.zeros((3, 3), dtype=int)
+        self.current_player = 1
+        self.winner = None
+        self.game_over = False
+        return self.get_state()
+
+    def get_state(self):
+        return self.board.flatten()
+
+    def get_available_actions(self):
+        return [i for i in range(9) if self.board[i//3, i%3] == 0]
+
+    def make_move(self, action):
+        if self.game_over:
+            return self.get_state(), 0, True
+
+        row, col = action // 3, action % 3
+        if self.board[row, col] != 0:
+            return self.get_state(), -10, True  # Invalid move penalty
+
+        self.board[row, col] = self.current_player
+
+        if self.check_winner():
+            self.winner = self.current_player
+            self.game_over = True
+            return self.get_state(), 1, True
-            return self.get_state(), 1, True
+            return self.get_state(), self.current_player, True
-            return self.get_state(), 1, True
+            return self.get_state(), self.current_player, True
+        elif len(self.get_available_actions()) == 0:
+            self.game_over = True
+            return self.get_state(), 0.5, True  # Draw
+        else:
+            self.current_player = -self.current_player
+            return self.get_state(), 0, False
+
+    def check_winner(self):
+        # Check rows, columns, and diagonals
+        for i in range(3):
+            if abs(sum(self.board[i, :])) == 3 or abs(sum(self.board[:, i])) == 3:
+                return True
+        if abs(self.board[0, 0] + self.board[1, 1] + self.board[2, 2]) == 3:
+            return True
+        if abs(self.board[0, 2] + self.board[1, 1] + self.board[2, 0]) == 3:
+            return True
+        return False
+
+    def print_board(self):
+        symbols = {0: ' ', 1: 'X', -1: 'O'}
+        for i in range(3):
+            print(' | '.join(symbols[self.board[i, j]] for j in range(3)))
+            if i < 2:
+                print('---------')
+
+    def get_human_move(self):
+        while True:
+            try:
+                move = int(input("Enter your move (0-8): "))
+                if move in self.get_available_actions():
+                    return move
+                else:
+                    print("Invalid move. Try again.")
+            except ValueError:
+                print("Please enter a number between 0-8.")
diff --git a/Expert/26_Reinforcement_Learning_Game_Agent/main.py b/Expert/26_Reinforcement_Learning_Game_Agent/main.py
@@ -0,0 +1,39 @@
+from game import TicTacToe
+from agent import QLearningAgent
+
+def main():
+    game = TicTacToe()
+    agent = QLearningAgent()
+
+    print("Reinforcement Learning Tic-Tac-Toe Agent")
+    print("The agent uses Q-learning to play Tic-Tac-Toe against you.")
+    print()
+
+    while True:
+        print("Menu:")
+        print("1. Train the agent")
+        print("2. Play against the agent")
+        print("3. Exit")
+
+        choice = input("Enter your choice: ")
+
+        if choice == "1":
+            episodes = int(input("Enter number of training episodes (default 10000): ") or "10000")
-            episodes = int(input("Enter number of training episodes (default 10000): ") or "10000")
+            while True:
+                episodes_input = input("Enter number of training episodes (default 10000): ")
+                if not episodes_input:
+                    episodes_input = "10000"
+                try:
+                    episodes = int(episodes_input)
+                    break
+                except ValueError:
+                    print("Invalid number. Please enter a valid integer.")
-            episodes = int(input("Enter number of training episodes (default 10000): ") or "10000")
+            while True:
+                episodes_input = input("Enter number of training episodes (default 10000): ")
+                if not episodes_input:
+                    episodes_input = "10000"
+                try:
+                    episodes = int(episodes_input)
+                    break
+                except ValueError:
+                    print("Invalid number. Please enter a valid integer.")
+            agent.train(game, episodes)
+        elif choice == "2":
+            print("You are O, Agent is X. You go second.")
+            print("Board positions:")
+            print("0 | 1 | 2")
+            print("---------")
+            print("3 | 4 | 5")
+            print("---------")
+            print("6 | 7 | 8")
+            print()
+            agent.play(game)
+        elif choice == "3":
+            break
+        else:
+            print("Invalid choice. Please try again.")
+
+if __name__ == "__main__":
+    main()
diff --git a/Expert/26_Reinforcement_Learning_Game_Agent/requirements.txt b/Expert/26_Reinforcement_Learning_Game_Agent/requirements.txt
@@ -0,0 +1 @@
+numpy==1.24.3