Skip to content
This repository was archived by the owner on Apr 24, 2025. It is now read-only.

Commit d877c5a

Browse files
Merge pull request #512 from nik-6174/main
Create main.py
2 parents 4b7a2db + 878bceb commit d877c5a

1 file changed

Lines changed: 100 additions & 0 deletions

File tree

projects/Word_Predictor/main.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
"""
2+
This is a word predictor algorithm, that uses you own Whatsapp chats and shows you prediction.
3+
It takes into account a words and their frequencies after a given word in the texts you have exchanged with a Whatsapp contact.
4+
5+
Requirements:
6+
1 ) You should have pandas installed -- pip install pandas
7+
2 ) You should have whatsapp chats downloaded in you PC as Chats.txt
8+
9+
## To download the whatsapp chats:
10+
1) Open whatsapp on your phone.
11+
2) Pick a contact of your coice, and open their chat
12+
3) Click on the 3 dots on the upper-right side of the chat
13+
4) Go to More -> Export Chat (The chats will the be downloaded)
14+
5) Put it into your PC, and replace the Chats.txt in the Code with the name of the Downloaded txt file (and directly if required)
15+
16+
"""
17+
18+
19+
import pandas as pd
20+
21+
class WhatsAppChat:
22+
def __init__(self, filename):
23+
# Initialize the object by reading in the chat file, and counting word frequency and next words
24+
self.titles = self.read_chat_file(filename)
25+
self.word_freq_dict = self.count_word_frequency()
26+
self.next_words_dict = self.find_next_words()
27+
28+
def read_chat_file(self, filename):
29+
"""Reads in a WhatsApp chat text file and returns a list of titles."""
30+
chat_df = pd.read_fwf(filename, header=None)
31+
# Extract the chat titles from the DataFrame and return them as a list
32+
titles = [title for title in chat_df[2]]
33+
return titles
34+
35+
def count_word_frequency(self):
36+
"""Counts the frequency of each word in a list of titles."""
37+
word_freq_dict = {}
38+
for title in self.titles:
39+
# Split each title into words and count their frequency
40+
for word in str(title).split():
41+
if len(word) > 1:
42+
if word in word_freq_dict.keys():
43+
word_freq_dict[word] += 1
44+
else:
45+
word_freq_dict[word] = 1
46+
# Sort the word frequency dictionary by value in descending order and return it
47+
sorted_word_freq_dict = dict(sorted(word_freq_dict.items(),
48+
key=lambda x: -1*int(x[1])))
49+
return sorted_word_freq_dict
50+
51+
def find_next_words(self):
52+
"""Finds the most common next word for each word in a list of titles."""
53+
next_words_dict = {}
54+
for title in self.titles:
55+
# Split each title into words and count the frequency of each next word for each word
56+
words = str(title).split()
57+
for i in range(len(words)):
58+
if i != len(words) - 1:
59+
current_word = words[i]
60+
next_word = words[i+1].replace("\n", "")
61+
if current_word in next_words_dict:
62+
if next_word in next_words_dict[current_word]:
63+
next_words_dict[current_word][next_word] += 1
64+
else:
65+
next_words_dict[current_word][next_word] = 1
66+
else:
67+
next_words_dict[current_word] = {next_word: 1}
68+
# Return the dictionary of next words
69+
return next_words_dict
70+
71+
72+
class NextWordPredictor:
73+
def __init__(self, chat, num_predictions=1):
74+
# Initialize the object with a WhatsAppChat object and the number of predictions to make
75+
self.chat = chat
76+
self.num_predictions = num_predictions
77+
78+
def predict_next_word(self, current_word):
79+
"""Predicts the next word given a current word and a dictionary of next words."""
80+
if current_word in self.chat.next_words_dict:
81+
# Get the dictionary of next words for the current word and sort it by frequency
82+
next_word_freq_dict = self.chat.next_words_dict[current_word]
83+
sorted_next_words = sorted(next_word_freq_dict.items(),
84+
key=lambda x: x[1],
85+
reverse=True)
86+
# Get the top num_predictions words and return them
87+
next_words = [word[0] for word in sorted_next_words[:self.num_predictions]]
88+
return next_words
89+
else:
90+
# If the current word is not in the dictionary of next words, return None
91+
return None
92+
93+
# Create a WhatsAppChat object and a NextWordPredictor object after loading Chats.txt file
94+
# set n as the length of words you want to predict after a given word
95+
chat = WhatsAppChat('Chats.txt')
96+
n, given_word = 3, 'good'
97+
predictor = NextWordPredictor(chat, num_predictions=n)
98+
99+
# This prints the next n word predictions after the given_word
100+
print(predictor.predict_next_word(given_word))

0 commit comments

Comments
 (0)