|
| 1 | +""" |
| 2 | +This is a word predictor algorithm, that uses you own Whatsapp chats and shows you prediction. |
| 3 | +It takes into account a words and their frequencies after a given word in the texts you have exchanged with a Whatsapp contact. |
| 4 | +
|
| 5 | +Requirements: |
| 6 | +1 ) You should have pandas installed -- pip install pandas |
| 7 | +2 ) You should have whatsapp chats downloaded in you PC as Chats.txt |
| 8 | +
|
| 9 | +## To download the whatsapp chats: |
| 10 | +1) Open whatsapp on your phone. |
| 11 | +2) Pick a contact of your coice, and open their chat |
| 12 | +3) Click on the 3 dots on the upper-right side of the chat |
| 13 | +4) Go to More -> Export Chat (The chats will the be downloaded) |
| 14 | +5) Put it into your PC, and replace the Chats.txt in the Code with the name of the Downloaded txt file (and directly if required) |
| 15 | +
|
| 16 | +""" |
| 17 | + |
| 18 | + |
| 19 | +import pandas as pd |
| 20 | + |
| 21 | +class WhatsAppChat: |
| 22 | + def __init__(self, filename): |
| 23 | + # Initialize the object by reading in the chat file, and counting word frequency and next words |
| 24 | + self.titles = self.read_chat_file(filename) |
| 25 | + self.word_freq_dict = self.count_word_frequency() |
| 26 | + self.next_words_dict = self.find_next_words() |
| 27 | + |
| 28 | + def read_chat_file(self, filename): |
| 29 | + """Reads in a WhatsApp chat text file and returns a list of titles.""" |
| 30 | + chat_df = pd.read_fwf(filename, header=None) |
| 31 | + # Extract the chat titles from the DataFrame and return them as a list |
| 32 | + titles = [title for title in chat_df[2]] |
| 33 | + return titles |
| 34 | + |
| 35 | + def count_word_frequency(self): |
| 36 | + """Counts the frequency of each word in a list of titles.""" |
| 37 | + word_freq_dict = {} |
| 38 | + for title in self.titles: |
| 39 | + # Split each title into words and count their frequency |
| 40 | + for word in str(title).split(): |
| 41 | + if len(word) > 1: |
| 42 | + if word in word_freq_dict.keys(): |
| 43 | + word_freq_dict[word] += 1 |
| 44 | + else: |
| 45 | + word_freq_dict[word] = 1 |
| 46 | + # Sort the word frequency dictionary by value in descending order and return it |
| 47 | + sorted_word_freq_dict = dict(sorted(word_freq_dict.items(), |
| 48 | + key=lambda x: -1*int(x[1]))) |
| 49 | + return sorted_word_freq_dict |
| 50 | + |
| 51 | + def find_next_words(self): |
| 52 | + """Finds the most common next word for each word in a list of titles.""" |
| 53 | + next_words_dict = {} |
| 54 | + for title in self.titles: |
| 55 | + # Split each title into words and count the frequency of each next word for each word |
| 56 | + words = str(title).split() |
| 57 | + for i in range(len(words)): |
| 58 | + if i != len(words) - 1: |
| 59 | + current_word = words[i] |
| 60 | + next_word = words[i+1].replace("\n", "") |
| 61 | + if current_word in next_words_dict: |
| 62 | + if next_word in next_words_dict[current_word]: |
| 63 | + next_words_dict[current_word][next_word] += 1 |
| 64 | + else: |
| 65 | + next_words_dict[current_word][next_word] = 1 |
| 66 | + else: |
| 67 | + next_words_dict[current_word] = {next_word: 1} |
| 68 | + # Return the dictionary of next words |
| 69 | + return next_words_dict |
| 70 | + |
| 71 | + |
| 72 | +class NextWordPredictor: |
| 73 | + def __init__(self, chat, num_predictions=1): |
| 74 | + # Initialize the object with a WhatsAppChat object and the number of predictions to make |
| 75 | + self.chat = chat |
| 76 | + self.num_predictions = num_predictions |
| 77 | + |
| 78 | + def predict_next_word(self, current_word): |
| 79 | + """Predicts the next word given a current word and a dictionary of next words.""" |
| 80 | + if current_word in self.chat.next_words_dict: |
| 81 | + # Get the dictionary of next words for the current word and sort it by frequency |
| 82 | + next_word_freq_dict = self.chat.next_words_dict[current_word] |
| 83 | + sorted_next_words = sorted(next_word_freq_dict.items(), |
| 84 | + key=lambda x: x[1], |
| 85 | + reverse=True) |
| 86 | + # Get the top num_predictions words and return them |
| 87 | + next_words = [word[0] for word in sorted_next_words[:self.num_predictions]] |
| 88 | + return next_words |
| 89 | + else: |
| 90 | + # If the current word is not in the dictionary of next words, return None |
| 91 | + return None |
| 92 | + |
| 93 | +# Create a WhatsAppChat object and a NextWordPredictor object after loading Chats.txt file |
| 94 | +# set n as the length of words you want to predict after a given word |
| 95 | +chat = WhatsAppChat('Chats.txt') |
| 96 | +n, given_word = 3, 'good' |
| 97 | +predictor = NextWordPredictor(chat, num_predictions=n) |
| 98 | + |
| 99 | +# This prints the next n word predictions after the given_word |
| 100 | +print(predictor.predict_next_word(given_word)) |
0 commit comments