Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 113 additions & 0 deletions Easy/26_Hangman.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import random

def choose_word():
words = ["python", "hangman", "computer", "programming", "algorithm", "variable", "function", "loop", "condition", "string"]
return random.choice(words).upper()

def display_hangman(tries):
stages = [
"""
--------
| |
| O
| \\|/
| |
| / \\
-
""",
"""
--------
| |
| O
| \\|/
| |
| /
-
""",
"""
--------
| |
| O
| \\|/
| |
|
-
""",
"""
--------
| |
| O
| \\|
| |
|
-
""",
"""
--------
| |
| O
| |
| |
|
-
""",
"""
--------
| |
| O
|
|
|
-
""",
"""
--------
| |
|
|
|
|
-
"""
]
return stages[tries]

def play_hangman():
word = choose_word()
word_letters = set(word)
guessed_letters = set()
tries = 6

print("Welcome to Hangman!")
print(display_hangman(tries))
print("_ " * len(word))

while tries > 0 and word_letters:
while True:
guess = input("Guess a letter: ").strip().upper()
if len(guess) == 1 and guess.isalpha():
break
print("Invalid input. Please enter a single alphabetic character.")

if guess in guessed_letters:
print("You already guessed that letter.")
elif guess in word_letters:
word_letters.remove(guess)
guessed_letters.add(guess)
print("Good guess!")
else:
tries -= 1
guessed_letters.add(guess)
print("Wrong guess!")

print(display_hangman(tries))
word_display = [letter if letter in guessed_letters else "_" for letter in word]
print(" ".join(word_display))

if not word_letters:
print(f"Congratulations! You guessed the word: {word}")
else:
print(f"Sorry, you ran out of tries. The word was: {word}")

if __name__ == "__main__":
play_hangman()
112 changes: 112 additions & 0 deletions Expert/26_Reinforcement_Learning_Game_Agent/agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import numpy as np
import pickle
import os

class QLearningAgent:
def __init__(self, alpha=0.1, gamma=0.9, epsilon=0.1):
self.alpha = alpha # Learning rate
self.gamma = gamma # Discount factor
self.epsilon = epsilon # Exploration rate
self.q_table = {}
self.load_q_table()

def get_state_key(self, state):
return tuple(state)

def get_q_value(self, state, action):
state_key = self.get_state_key(state)
if state_key not in self.q_table:
self.q_table[state_key] = np.zeros(9)
return self.q_table[state_key][action]

def set_q_value(self, state, action, value):
state_key = self.get_state_key(state)
if state_key not in self.q_table:
self.q_table[state_key] = np.zeros(9)
self.q_table[state_key][action] = value

def choose_action(self, state, available_actions):
if np.random.random() < self.epsilon:
return np.random.choice(available_actions)
else:
q_values = [self.get_q_value(state, action) for action in available_actions]
max_q = max(q_values)
best_actions = [action for action, q in zip(available_actions, q_values) if q == max_q]
return np.random.choice(best_actions)

def learn(self, state, action, reward, next_state, next_available_actions, done):
current_q = self.get_q_value(state, action)

if done:
target = reward
else:
next_q_values = [self.get_q_value(next_state, next_action) for next_action in next_available_actions]
max_next_q = max(next_q_values) if next_q_values else 0
target = reward + self.gamma * max_next_q

new_q = current_q + self.alpha * (target - current_q)
self.set_q_value(state, action, new_q)

def save_q_table(self):
with open('q_table.pkl', 'wb') as f:
pickle.dump(self.q_table, f)
print("Q-table saved.")

def load_q_table(self):
if os.path.exists('q_table.pkl'):
with open('q_table.pkl', 'rb') as f:
self.q_table = pickle.load(f)
print("Q-table loaded.")
else:
print("No saved Q-table found. Starting fresh.")

def train(self, game, episodes=10000):
print("Training the agent...")
for episode in range(episodes):
state = game.reset()
done = False

while not done:
available_actions = game.get_available_actions()
action = self.choose_action(state, available_actions)

next_state, reward, done = game.make_move(action)
next_available_actions = game.get_available_actions() if not done else []

self.learn(state, action, reward, next_state, next_available_actions, done)

state = next_state

Comment on lines +69 to +79
Copy link

Copilot AI Jan 31, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The training loop has the agent make all moves in the game, essentially playing against itself by making both X and O moves. However, the agent only learns from its own perspective without considering that in a real game, the opponent's moves alternate with its own. This training approach may not produce an effective agent. Consider implementing proper self-play where two agents alternate, or having the agent play against a random opponent.

Suggested change
while not done:
available_actions = game.get_available_actions()
action = self.choose_action(state, available_actions)
next_state, reward, done = game.make_move(action)
next_available_actions = game.get_available_actions() if not done else []
self.learn(state, action, reward, next_state, next_available_actions, done)
state = next_state
# Training with alternating turns: agent vs. random opponent
while not done:
# Agent's turn
available_actions = game.get_available_actions()
action = self.choose_action(state, available_actions)
next_state, reward, done = game.make_move(action)
if done:
# Game ended immediately after the agent's move
self.learn(state, action, reward, next_state, [], True)
break
# Opponent's turn (random policy)
opponent_available_actions = game.get_available_actions()
if not opponent_available_actions:
# No moves left for opponent; treat as terminal from agent's perspective
self.learn(state, action, reward, next_state, [], True)
break
opponent_action = np.random.choice(opponent_available_actions)
next_state_opp, reward_opp, done = game.make_move(opponent_action)
# From the agent's perspective, the next state is after the opponent's response
next_available_actions = game.get_available_actions() if not done else []
self.learn(state, action, reward_opp, next_state_opp, next_available_actions, done)
state = next_state_opp

Copilot uses AI. Check for mistakes.
if (episode + 1) % 1000 == 0:
print(f"Episode {episode + 1}/{episodes} completed.")

self.save_q_table()
print("Training completed!")

def play(self, game):
state = game.reset()
done = False

while not done:
available_actions = game.get_available_actions()
action = self.choose_action(state, available_actions)
print(f"Agent plays at position {action}")

next_state, reward, done = game.make_move(action)
state = next_state
game.print_board()

if not done:
print("\nYour turn:")
human_action = game.get_human_move()
next_state, reward, done = game.make_move(human_action)
state = next_state
game.print_board()

game.print_board()
if game.winner == 1:
print("Agent wins!")
elif game.winner == -1:
print("You win!")
else:
print("It's a draw!")
71 changes: 71 additions & 0 deletions Expert/26_Reinforcement_Learning_Game_Agent/game.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import numpy as np

class TicTacToe:
def __init__(self):
self.board = np.zeros((3, 3), dtype=int)
self.current_player = 1 # 1 for X, -1 for O
self.winner = None
self.game_over = False

def reset(self):
self.board = np.zeros((3, 3), dtype=int)
self.current_player = 1
self.winner = None
self.game_over = False
return self.get_state()

def get_state(self):
return self.board.flatten()

def get_available_actions(self):
return [i for i in range(9) if self.board[i//3, i%3] == 0]

def make_move(self, action):
if self.game_over:
return self.get_state(), 0, True

row, col = action // 3, action % 3
if self.board[row, col] != 0:
return self.get_state(), -10, True # Invalid move penalty

self.board[row, col] = self.current_player

if self.check_winner():
self.winner = self.current_player
self.game_over = True
return self.get_state(), 1, True
Copy link

Copilot AI Jan 31, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reward structure in make_move always returns +1 for any win, regardless of which player wins. This means if the opponent (human or second agent) wins, the last action taken by that player also gets a +1 reward. During training, this could lead to incorrect learning. Consider returning a negative reward when the opponent wins, or structure the reward based on which player won relative to who's being trained.

Suggested change
return self.get_state(), 1, True
return self.get_state(), self.current_player, True

Copilot uses AI. Check for mistakes.
elif len(self.get_available_actions()) == 0:
self.game_over = True
return self.get_state(), 0.5, True # Draw
else:
self.current_player = -self.current_player
return self.get_state(), 0, False

def check_winner(self):
# Check rows, columns, and diagonals
for i in range(3):
if abs(sum(self.board[i, :])) == 3 or abs(sum(self.board[:, i])) == 3:
return True
if abs(self.board[0, 0] + self.board[1, 1] + self.board[2, 2]) == 3:
return True
if abs(self.board[0, 2] + self.board[1, 1] + self.board[2, 0]) == 3:
return True
return False

def print_board(self):
symbols = {0: ' ', 1: 'X', -1: 'O'}
for i in range(3):
print(' | '.join(symbols[self.board[i, j]] for j in range(3)))
if i < 2:
print('---------')

def get_human_move(self):
while True:
try:
move = int(input("Enter your move (0-8): "))
if move in self.get_available_actions():
return move
else:
print("Invalid move. Try again.")
except ValueError:
print("Please enter a number between 0-8.")
39 changes: 39 additions & 0 deletions Expert/26_Reinforcement_Learning_Game_Agent/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from game import TicTacToe
from agent import QLearningAgent

def main():
game = TicTacToe()
agent = QLearningAgent()

print("Reinforcement Learning Tic-Tac-Toe Agent")
print("The agent uses Q-learning to play Tic-Tac-Toe against you.")
print()

while True:
print("Menu:")
print("1. Train the agent")
print("2. Play against the agent")
print("3. Exit")

choice = input("Enter your choice: ")

if choice == "1":
episodes = int(input("Enter number of training episodes (default 10000): ") or "10000")
Copy link

Copilot AI Jan 31, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The code uses int(input(...) or "10000") which will raise a ValueError if the user enters a non-numeric value other than an empty string. For example, if a user enters "abc", this will crash. Consider wrapping this in a try-except block or validating the input before conversion.

Suggested change
episodes = int(input("Enter number of training episodes (default 10000): ") or "10000")
while True:
episodes_input = input("Enter number of training episodes (default 10000): ")
if not episodes_input:
episodes_input = "10000"
try:
episodes = int(episodes_input)
break
except ValueError:
print("Invalid number. Please enter a valid integer.")

Copilot uses AI. Check for mistakes.
agent.train(game, episodes)
elif choice == "2":
print("You are O, Agent is X. You go second.")
print("Board positions:")
print("0 | 1 | 2")
print("---------")
print("3 | 4 | 5")
print("---------")
print("6 | 7 | 8")
print()
agent.play(game)
elif choice == "3":
break
else:
print("Invalid choice. Please try again.")

if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
numpy==1.24.3
Loading