From e7adc80bc4f678634bcbb0d4c798c77b759d25bf Mon Sep 17 00:00:00 2001 From: Atridad Lahiji Date: Mon, 22 Dec 2025 23:36:58 -0700 Subject: [PATCH] Updated deps + cleaned up mchain --- Dockerfile | 2 +- command/markov.go | 1160 +++++++++++++-------------------------------- go.mod | 2 +- 3 files changed, 322 insertions(+), 842 deletions(-) diff --git a/Dockerfile b/Dockerfile index 9d8a960..d34562b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # Build stage -FROM golang:1.24.3 AS build +FROM golang:1.25.5 AS build WORKDIR /app diff --git a/command/markov.go b/command/markov.go index b8fb341..c7843c0 100644 --- a/command/markov.go +++ b/command/markov.go @@ -13,25 +13,23 @@ import ( "github.com/bwmarrin/discordgo" ) -// Cache for Markov chains to avoid rebuilding for the same channel/message count +// MarkovData holds the Markov chain data for different n-gram sizes +type MarkovData struct { + // n-gram size -> prefix -> list of suffixes + Chains map[int]map[string][]string +} + +// MarkovCache caches chains to avoid rebuilding type MarkovCache struct { - chains map[string]map[string][]string - twoGrams map[string]map[string]map[string][]string - threeGrams map[string]map[string]map[string]map[string][]string - fourGrams map[string]map[string]map[string]map[string]map[string][]string - fiveGrams map[string]map[string]map[string]map[string]map[string]map[string][]string - hashes map[string]string - mu sync.RWMutex + data map[string]*MarkovData + hashes map[string]string + mu sync.RWMutex } var ( markovCache = &MarkovCache{ - chains: make(map[string]map[string][]string), - twoGrams: make(map[string]map[string]map[string][]string), - threeGrams: make(map[string]map[string]map[string]map[string][]string), - fourGrams: make(map[string]map[string]map[string]map[string]map[string][]string), - fiveGrams: make(map[string]map[string]map[string]map[string]map[string]map[string][]string), - hashes: make(map[string]string), + data: make(map[string]*MarkovData), + hashes: make(map[string]string), } // Regex for cleaning text urlRegex = regexp.MustCompile(`https?://[^\s]+`) @@ -42,22 +40,22 @@ var ( func MarkovCommand(s *discordgo.Session, i *discordgo.InteractionCreate) (string, error) { channelID := i.ChannelID - numMessages := lib.AppConfig.MarkovDefaultMessages // Default value from config + numMessages := lib.AppConfig.MarkovDefaultMessages if len(i.ApplicationCommandData().Options) > 0 { if i.ApplicationCommandData().Options[0].Name == "messages" { numMessages = int(i.ApplicationCommandData().Options[0].IntValue()) if numMessages <= 0 { numMessages = lib.AppConfig.MarkovDefaultMessages } else if numMessages > lib.AppConfig.MarkovMaxMessages { - numMessages = lib.AppConfig.MarkovMaxMessages // Limit from config + numMessages = lib.AppConfig.MarkovMaxMessages } } } - // Check cache first + // Check cache cacheKey := fmt.Sprintf("%s:%d", channelID, numMessages) - if chain := getCachedChain(cacheKey); chain != nil { - newMessage := generateMessage(chain) + if data := getCachedChain(cacheKey); data != nil { + newMessage := generateAdvancedMessage(data) if newMessage != "" { return newMessage, nil } @@ -69,16 +67,16 @@ func MarkovCommand(s *discordgo.Session, i *discordgo.InteractionCreate) (string return "", err } - // Build the Markov chain from the fetched messages - chain, twoGramChain, threeGramChain, fourGramChain, fiveGramChain := buildMarkovChain(allMessages) + // Build chain + data := buildMarkovChain(allMessages) - // Cache the chain - setCachedChain(cacheKey, chain, twoGramChain, threeGramChain, fourGramChain, fiveGramChain, allMessages) + // Cache chain + setCachedChain(cacheKey, data, allMessages) - // Generate a new message using the improved Markov chain - newMessage := generateAdvancedMessage(chain, twoGramChain, threeGramChain, fourGramChain, fiveGramChain) + // Generate message + newMessage := generateAdvancedMessage(data) - // Check if the generated message is empty and provide a fallback message + // Fallback if empty if newMessage == "" { newMessage = "I couldn't generate a message. The channel might be empty or contain no usable text." } @@ -86,78 +84,31 @@ func MarkovCommand(s *discordgo.Session, i *discordgo.InteractionCreate) (string return newMessage, nil } -func getCachedChain(cacheKey string) map[string][]string { +func getCachedChain(cacheKey string) *MarkovData { markovCache.mu.RLock() defer markovCache.mu.RUnlock() - - if chain, exists := markovCache.chains[cacheKey]; exists { - return chain + + if data, exists := markovCache.data[cacheKey]; exists { + return data } return nil } -func getCachedTwoGramChain(cacheKey string) map[string]map[string][]string { - markovCache.mu.RLock() - defer markovCache.mu.RUnlock() - - if twoGram, exists := markovCache.twoGrams[cacheKey]; exists { - return twoGram - } - return nil -} - -func getCachedThreeGramChain(cacheKey string) map[string]map[string]map[string][]string { - markovCache.mu.RLock() - defer markovCache.mu.RUnlock() - - if threeGram, exists := markovCache.threeGrams[cacheKey]; exists { - return threeGram - } - return nil -} - -func getCachedFourGramChain(cacheKey string) map[string]map[string]map[string]map[string][]string { - markovCache.mu.RLock() - defer markovCache.mu.RUnlock() - - if fourGram, exists := markovCache.fourGrams[cacheKey]; exists { - return fourGram - } - return nil -} - -func getCachedFiveGramChain(cacheKey string) map[string]map[string]map[string]map[string]map[string][]string { - markovCache.mu.RLock() - defer markovCache.mu.RUnlock() - - if fiveGram, exists := markovCache.fiveGrams[cacheKey]; exists { - return fiveGram - } - return nil -} - -func setCachedChain(cacheKey string, chain map[string][]string, twoGramChain map[string]map[string][]string, threeGramChain map[string]map[string]map[string][]string, fourGramChain map[string]map[string]map[string]map[string][]string, fiveGramChain map[string]map[string]map[string]map[string]map[string][]string, messages []*discordgo.Message) { +func setCachedChain(cacheKey string, data *MarkovData, messages []*discordgo.Message) { hash := hashMessages(messages) - + markovCache.mu.Lock() defer markovCache.mu.Unlock() - - if len(chain) > 10 { - markovCache.chains[cacheKey] = chain - markovCache.twoGrams[cacheKey] = twoGramChain - markovCache.threeGrams[cacheKey] = threeGramChain - markovCache.fourGrams[cacheKey] = fourGramChain - markovCache.fiveGrams[cacheKey] = fiveGramChain + + // Only cache if we have some data + if len(data.Chains[1]) > 10 { + markovCache.data[cacheKey] = data markovCache.hashes[cacheKey] = hash - + // Simple FIFO cache cleanup - if len(markovCache.chains) > lib.AppConfig.MarkovCacheSize { - for k := range markovCache.chains { - delete(markovCache.chains, k) - delete(markovCache.twoGrams, k) - delete(markovCache.threeGrams, k) - delete(markovCache.fourGrams, k) - delete(markovCache.fiveGrams, k) + if len(markovCache.data) > lib.AppConfig.MarkovCacheSize { + for k := range markovCache.data { + delete(markovCache.data, k) delete(markovCache.hashes, k) break } @@ -178,6 +129,9 @@ func fetchMessages(s *discordgo.Session, channelID string, numMessages int) ([]* var allMessages []*discordgo.Message var lastMessageID string + // Pre-allocate + allMessages = make([]*discordgo.Message, 0, numMessages) + for len(allMessages) < numMessages { batchSize := 100 if numMessages-len(allMessages) < 100 { @@ -190,10 +144,10 @@ func fetchMessages(s *discordgo.Session, channelID string, numMessages int) ([]* } if len(batch) == 0 { - break // No more messages to fetch + break } - // Filter out bot messages and empty messages during fetch + // Filter messages for _, msg := range batch { if !msg.Author.Bot && len(strings.TrimSpace(msg.Content)) > 0 { allMessages = append(allMessages, msg) @@ -203,35 +157,29 @@ func fetchMessages(s *discordgo.Session, channelID string, numMessages int) ([]* lastMessageID = batch[len(batch)-1].ID if len(batch) < 100 { - break // Less than 100 messages returned, we've reached the end + break } } return allMessages, nil } -// cleanText removes URLs, mentions, emojis, and normalizes text +// cleanText normalizes text func cleanText(text string) string { - // Remove URLs text = urlRegex.ReplaceAllString(text, "") - // Remove mentions text = mentionRegex.ReplaceAllString(text, "") - // Remove custom emojis text = emojiRegex.ReplaceAllString(text, "") - // Normalize whitespace text = strings.Join(strings.Fields(text), " ") return strings.TrimSpace(text) } -// buildMarkovChain creates an improved Markov chain from a list of messages -func buildMarkovChain(messages []*discordgo.Message) (map[string][]string, map[string]map[string][]string, map[string]map[string]map[string][]string, map[string]map[string]map[string]map[string][]string, map[string]map[string]map[string]map[string]map[string][]string) { - chain := make(map[string][]string) - twoGramChain := make(map[string]map[string][]string) - threeGramChain := make(map[string]map[string]map[string][]string) - fourGramChain := make(map[string]map[string]map[string]map[string][]string) - fiveGramChain := make(map[string]map[string]map[string]map[string]map[string][]string) - - // Count total words for memory estimation +// buildMarkovChain creates a Markov chain from messages +func buildMarkovChain(messages []*discordgo.Message) *MarkovData { + data := &MarkovData{ + Chains: make(map[int]map[string][]string), + } + + // Count words totalWords := 0 for _, msg := range messages { cleanedContent := cleanText(msg.Content) @@ -240,255 +188,103 @@ func buildMarkovChain(messages []*discordgo.Message) (map[string][]string, map[s totalWords += len(words) } } - - // Estimate memory usage and adjust max n-gram level + + // Adjust n-gram level based on memory maxNGram := lib.AppConfig.MarkovMaxNGram estimatedMemoryMB := estimateMemoryUsage(totalWords, maxNGram) if estimatedMemoryMB > lib.AppConfig.MarkovMemoryLimit { - // Reduce n-gram level to stay within memory limits for maxNGram > 2 && estimateMemoryUsage(totalWords, maxNGram) > lib.AppConfig.MarkovMemoryLimit { maxNGram-- } } - + + // Init maps + for i := 1; i <= maxNGram; i++ { + data.Chains[i] = make(map[string][]string) + } + for _, msg := range messages { cleanedContent := cleanText(msg.Content) if len(cleanedContent) < 3 { continue } - + words := strings.Fields(cleanedContent) if len(words) < 2 { continue } - - // Build 1-gram chain - for i := 0; i < len(words)-1; i++ { - currentWord := strings.ToLower(words[i]) - nextWord := words[i+1] - - if len(currentWord) < 2 || strings.ContainsAny(currentWord, "!@#$%^&*()[]{}") { + + // Build chains + for n := 1; n <= maxNGram; n++ { + if len(words) <= n { continue } - - chain[currentWord] = append(chain[currentWord], nextWord) - } - - // Build 2-gram chain - if maxNGram >= 2 { - for i := 0; i < len(words)-2; i++ { - word1 := strings.ToLower(words[i]) - word2 := strings.ToLower(words[i+1]) - nextWord := words[i+2] - - if len(word1) < 2 || len(word2) < 2 || - strings.ContainsAny(word1, "!@#$%^&*()[]{}") || - strings.ContainsAny(word2, "!@#$%^&*()[]{}") { + + for i := 0; i < len(words)-n; i++ { + // Validate sequence + validSequence := true + for j := 0; j < n; j++ { + word := words[i+j] + if len(word) < 2 || strings.ContainsAny(word, "!@#$%^&*()[]{}") { + validSequence = false + break + } + } + if !validSequence { continue } - - if twoGramChain[word1] == nil { - twoGramChain[word1] = make(map[string][]string) + + // Build prefix + var prefixBuilder strings.Builder + for j := 0; j < n; j++ { + if j > 0 { + prefixBuilder.WriteString(" ") + } + prefixBuilder.WriteString(strings.ToLower(words[i+j])) } - twoGramChain[word1][word2] = append(twoGramChain[word1][word2], nextWord) - } - } - - // Build 3-gram chain - if maxNGram >= 3 { - for i := 0; i < len(words)-3; i++ { - word1 := strings.ToLower(words[i]) - word2 := strings.ToLower(words[i+1]) - word3 := strings.ToLower(words[i+2]) - nextWord := words[i+3] - - if len(word1) < 2 || len(word2) < 2 || len(word3) < 2 || - strings.ContainsAny(word1, "!@#$%^&*()[]{}") || - strings.ContainsAny(word2, "!@#$%^&*()[]{}") || - strings.ContainsAny(word3, "!@#$%^&*()[]{}") { - continue - } - - if threeGramChain[word1] == nil { - threeGramChain[word1] = make(map[string]map[string][]string) - } - if threeGramChain[word1][word2] == nil { - threeGramChain[word1][word2] = make(map[string][]string) - } - threeGramChain[word1][word2][word3] = append(threeGramChain[word1][word2][word3], nextWord) - } - } - - // Build 4-gram chain - if maxNGram >= 4 { - for i := 0; i < len(words)-4; i++ { - word1 := strings.ToLower(words[i]) - word2 := strings.ToLower(words[i+1]) - word3 := strings.ToLower(words[i+2]) - word4 := strings.ToLower(words[i+3]) - nextWord := words[i+4] - - if len(word1) < 2 || len(word2) < 2 || len(word3) < 2 || len(word4) < 2 || - strings.ContainsAny(word1, "!@#$%^&*()[]{}") || - strings.ContainsAny(word2, "!@#$%^&*()[]{}") || - strings.ContainsAny(word3, "!@#$%^&*()[]{}") || - strings.ContainsAny(word4, "!@#$%^&*()[]{}") { - continue - } - - if fourGramChain[word1] == nil { - fourGramChain[word1] = make(map[string]map[string]map[string][]string) - } - if fourGramChain[word1][word2] == nil { - fourGramChain[word1][word2] = make(map[string]map[string][]string) - } - if fourGramChain[word1][word2][word3] == nil { - fourGramChain[word1][word2][word3] = make(map[string][]string) - } - fourGramChain[word1][word2][word3][word4] = append(fourGramChain[word1][word2][word3][word4], nextWord) - } - } - - // Build 5-gram chain for maximum coherence - if maxNGram >= 5 { - for i := 0; i < len(words)-5; i++ { - word1 := strings.ToLower(words[i]) - word2 := strings.ToLower(words[i+1]) - word3 := strings.ToLower(words[i+2]) - word4 := strings.ToLower(words[i+3]) - word5 := strings.ToLower(words[i+4]) - nextWord := words[i+5] - - if len(word1) < 2 || len(word2) < 2 || len(word3) < 2 || len(word4) < 2 || len(word5) < 2 || - strings.ContainsAny(word1, "!@#$%^&*()[]{}") || - strings.ContainsAny(word2, "!@#$%^&*()[]{}") || - strings.ContainsAny(word3, "!@#$%^&*()[]{}") || - strings.ContainsAny(word4, "!@#$%^&*()[]{}") || - strings.ContainsAny(word5, "!@#$%^&*()[]{}") { - continue - } - - if fiveGramChain[word1] == nil { - fiveGramChain[word1] = make(map[string]map[string]map[string]map[string][]string) - } - if fiveGramChain[word1][word2] == nil { - fiveGramChain[word1][word2] = make(map[string]map[string]map[string][]string) - } - if fiveGramChain[word1][word2][word3] == nil { - fiveGramChain[word1][word2][word3] = make(map[string]map[string][]string) - } - if fiveGramChain[word1][word2][word3][word4] == nil { - fiveGramChain[word1][word2][word3][word4] = make(map[string][]string) - } - fiveGramChain[word1][word2][word3][word4][word5] = append(fiveGramChain[word1][word2][word3][word4][word5], nextWord) + prefix := prefixBuilder.String() + + nextWord := words[i+n] + data.Chains[n][prefix] = append(data.Chains[n][prefix], nextWord) } } } - - return chain, twoGramChain, threeGramChain, fourGramChain, fiveGramChain + + return data } -// estimateMemoryUsage estimates memory usage in MB for given word count and n-gram level +// estimateMemoryUsage estimates memory usage in MB func estimateMemoryUsage(wordCount int, maxNGram int) int { - // Rough estimates based on typical Discord channel patterns - baseMB := wordCount / 1000 // ~1MB per 1000 words for 1-gram - + baseMB := wordCount / 2000 + switch maxNGram { case 2: - return baseMB * 5 + return baseMB * 3 case 3: - return baseMB * 15 + return baseMB * 8 case 4: - return baseMB * 35 + return baseMB * 15 case 5: - return baseMB * 75 + return baseMB * 25 case 6: - return baseMB * 150 + return baseMB * 40 default: return baseMB } } -// generateMessage creates a new message using the Markov chain with improved logic -func generateMessage(chain map[string][]string) string { - if len(chain) == 0 { - return "" - } - - words := []string{} - var currentWord string - - // Start with a random word that has good follow-ups - attempts := 0 - for word, nextWords := range chain { - if len(nextWords) >= 2 && len(word) > 2 { // Prefer words with multiple options - currentWord = word - break - } - attempts++ - if attempts > 50 { // Fallback to any word - currentWord = word - break - } - } - - if currentWord == "" { - return "" - } - - // Generate between 5 and 25 words - maxWords := 5 + rand.Intn(20) - for i := 0; i < maxWords; i++ { - // Add current word (capitalize first word) - if i == 0 { - words = append(words, strings.Title(currentWord)) - } else { - words = append(words, currentWord) - } - - if nextWords, ok := chain[strings.ToLower(currentWord)]; ok && len(nextWords) > 0 { - // Randomly select the next word from the possible follow-ups - currentWord = nextWords[rand.Intn(len(nextWords))] - } else { - // Try to find a new starting point - found := false - for word, nextWords := range chain { - if len(nextWords) > 0 && len(word) > 2 { - currentWord = word - found = true - break - } - } - if !found { - break - } - } - } - - result := strings.Join(words, " ") - - // Add punctuation if missing - if len(result) > 0 && !strings.ContainsAny(result[len(result)-1:], ".!?") { - // Randomly add punctuation - punctuation := []string{".", "!", "?"} - result += punctuation[rand.Intn(len(punctuation))] - } - - return result -} - func init() { - // Seed random number generator + // Seed RNG rand.Seed(time.Now().UnixNano()) } -// MarkovQuestionCommand generates a markov chain answer to a question based on channel contents +// MarkovQuestionCommand generates an answer func MarkovQuestionCommand(s *discordgo.Session, i *discordgo.InteractionCreate) (string, error) { channelID := i.ChannelID - + var question string var numMessages int = lib.AppConfig.MarkovDefaultMessages - + for _, option := range i.ApplicationCommandData().Options { switch option.Name { case "question": @@ -502,71 +298,44 @@ func MarkovQuestionCommand(s *discordgo.Session, i *discordgo.InteractionCreate) } } } - + if question == "" { return "Please provide a question!", nil } - + cacheKey := fmt.Sprintf("%s:%d", channelID, numMessages) - var chain map[string][]string - var twoGramChain map[string]map[string][]string - var threeGramChain map[string]map[string]map[string][]string - var fourGramChain map[string]map[string]map[string]map[string][]string - var fiveGramChain map[string]map[string]map[string]map[string]map[string][]string - - if cachedChain := getCachedChain(cacheKey); cachedChain != nil { - chain = cachedChain - twoGramChain = getCachedTwoGramChain(cacheKey) - threeGramChain = getCachedThreeGramChain(cacheKey) - fourGramChain = getCachedFourGramChain(cacheKey) - fiveGramChain = getCachedFiveGramChain(cacheKey) + var data *MarkovData + + if cachedData := getCachedChain(cacheKey); cachedData != nil { + data = cachedData } else { allMessages, err := fetchMessages(s, channelID, numMessages) if err != nil { return "", err } - - chain, twoGramChain, threeGramChain, fourGramChain, fiveGramChain = buildMarkovChain(allMessages) - setCachedChain(cacheKey, chain, twoGramChain, threeGramChain, fourGramChain, fiveGramChain, allMessages) + + data = buildMarkovChain(allMessages) + setCachedChain(cacheKey, data, allMessages) } - - answer := generateAdvancedQuestionAnswer(chain, twoGramChain, threeGramChain, fourGramChain, fiveGramChain, question) - + + answer := generateAdvancedQuestionAnswer(data, question) + if answer == "" { answer = "I couldn't generate an answer to that question. The channel might not have enough relevant content." } - + return fmt.Sprintf("**Q:** %s\n**A:** %s", question, answer), nil } -// generateQuestionAnswer generates a markov chain response that attempts to answer the given question -func generateQuestionAnswer(chain map[string][]string, twoGramChain map[string]map[string][]string, question string) string { - if len(chain) == 0 { - return "" - } - - // Clean and analyze the question to find relevant starting words - cleanedQuestion := cleanText(question) - questionWords := strings.Fields(strings.ToLower(cleanedQuestion)) - - // Categorize the question type for better response generation - questionType := categorizeQuestion(cleanedQuestion) - - // Find potential starting words with weighted scoring - startingCandidates := findBestStartingWords(chain, questionWords, questionType) - - if len(startingCandidates) == 0 { - return "" - } - - // Generate response using the best starting candidate - return generateCoherentResponse(chain, twoGramChain, startingCandidates, questionType) +// generateQuestionAnswer generates answer +func generateQuestionAnswer(data *MarkovData, question string) string { + return generateAdvancedQuestionAnswer(data, question) } -// categorizeQuestion determines the type of question for better response generation +// categorizeQuestion determines question type func categorizeQuestion(question string) string { question = strings.ToLower(question) - + if strings.Contains(question, "what") { return "what" } else if strings.Contains(question, "how") { @@ -584,53 +353,53 @@ func categorizeQuestion(question string) string { } else if strings.Contains(question, "is") || strings.Contains(question, "are") || strings.Contains(question, "do") || strings.Contains(question, "does") { return "yesno" } - + return "general" } -// WordCandidate represents a potential starting word with its relevance score +// WordCandidate holds word score type WordCandidate struct { Word string Score int } -// findBestStartingWords finds and scores potential starting words based on question relevance -func findBestStartingWords(chain map[string][]string, questionWords []string, questionType string) []WordCandidate { +// findBestStartingWords scores starting words +func findBestStartingWords(data *MarkovData, questionWords []string, questionType string) []WordCandidate { candidates := make(map[string]int) - - // Score words from the question that exist in our chain + chain := data.Chains[1] + + // Score question words for _, word := range questionWords { if len(word) > 2 && !isStopWord(word) { if nextWords, exists := chain[word]; exists && len(nextWords) > 0 { - candidates[word] += 10 // High score for direct question words + candidates[word] += 10 } } } - - // Add contextually relevant words based on question type + + // Add context words contextWords := getContextualWords(questionType) for _, word := range contextWords { if nextWords, exists := chain[word]; exists && len(nextWords) > 0 { - candidates[word] += 5 // Medium score for contextual words + candidates[word] += 5 } } - - // Add high-frequency words as fallback + + // Add fallback words for word, nextWords := range chain { if len(nextWords) >= 3 && len(word) > 2 && !isStopWord(word) { if _, exists := candidates[word]; !exists { - candidates[word] = len(nextWords) / 2 // Score based on frequency + candidates[word] = len(nextWords) / 2 } } } - - // Convert to sorted slice + + // Sort candidates var result []WordCandidate for word, score := range candidates { result = append(result, WordCandidate{Word: word, Score: score}) } - - // Sort by score (highest first) + for i := 0; i < len(result)-1; i++ { for j := i + 1; j < len(result); j++ { if result[j].Score > result[i].Score { @@ -638,16 +407,16 @@ func findBestStartingWords(chain map[string][]string, questionWords []string, qu } } } - - // Return top candidates + + // Top 10 if len(result) > 10 { result = result[:10] } - + return result } -// getContextualWords returns words that are contextually relevant to the question type +// getContextualWords returns relevant words func getContextualWords(questionType string) []string { switch questionType { case "what": @@ -671,131 +440,31 @@ func getContextualWords(questionType string) []string { } } -// generateCoherentResponse creates a more coherent response using improved algorithms -func generateCoherentResponse(chain map[string][]string, twoGramChain map[string]map[string][]string, candidates []WordCandidate, questionType string) string { - if len(candidates) == 0 { - return "" - } - - // Try multiple generation attempts and pick the best one - var bestResponse string - bestScore := 0 - - for attempt := 0; attempt < 3; attempt++ { - // Select starting word (bias towards higher scored candidates) - candidateIndex := 0 - if len(candidates) > 1 { - // 70% chance to pick top candidate, 30% for others - if rand.Float32() > 0.7 && len(candidates) > 1 { - candidateIndex = rand.Intn(min(3, len(candidates))) - } - } - - currentWord := candidates[candidateIndex].Word - words := []string{} - - // Generate response with improved coherence - maxWords := 8 + rand.Intn(22) // 8-22 words - lastWord := "" - - for i := 0; i < maxWords; i++ { - // Add current word (capitalize first word) - if i == 0 { - words = append(words, strings.Title(currentWord)) - } else { - words = append(words, currentWord) - } - - var nextWord string - - // Try 2-gram chain first for better coherence - if lastWord != "" { - if twoGramOptions, exists := twoGramChain[strings.ToLower(lastWord)][strings.ToLower(currentWord)]; exists && len(twoGramOptions) > 0 { - nextWord = twoGramOptions[rand.Intn(len(twoGramOptions))] - } - } - - // Fallback to regular chain - if nextWord == "" { - if nextWords, exists := chain[strings.ToLower(currentWord)]; exists && len(nextWords) > 0 { - // Prefer longer words for better content - var goodOptions []string - for _, word := range nextWords { - if len(word) > 2 && !isStopWord(strings.ToLower(word)) { - goodOptions = append(goodOptions, word) - } - } - - if len(goodOptions) > 0 { - nextWord = goodOptions[rand.Intn(len(goodOptions))] - } else { - nextWord = nextWords[rand.Intn(len(nextWords))] - } - } - } - - // If we can't find a next word, try to restart with a good candidate - if nextWord == "" { - found := false - for _, candidate := range candidates { - if nextWords, exists := chain[candidate.Word]; exists && len(nextWords) > 0 { - nextWord = candidate.Word - found = true - break - } - } - if !found { - break - } - } - - lastWord = currentWord - currentWord = nextWord - } - - response := strings.Join(words, " ") - - // Score this response - score := scoreResponse(response, questionType) - - if score > bestScore { - bestScore = score - bestResponse = response - } - } - - // Add appropriate punctuation - if len(bestResponse) > 0 && !strings.ContainsAny(bestResponse[len(bestResponse)-1:], ".!?") { - punctuation := getPunctuationForQuestionType(questionType) - bestResponse += punctuation[rand.Intn(len(punctuation))] - } - - return bestResponse -} - -// scoreResponse scores a response based on various quality metrics +// scoreResponse scores the response func scoreResponse(response string, questionType string) int { score := 0 words := strings.Fields(response) - - // Length score (prefer 8-16 words) + + // Length score if len(words) >= 8 && len(words) <= 16 { score += 10 } else if len(words) >= 6 && len(words) <= 20 { score += 5 } - - // Diversity score (prefer responses with varied word lengths) + + // Diversity score totalLength := 0 for _, word := range words { totalLength += len(word) } - avgWordLength := float64(totalLength) / float64(len(words)) - if avgWordLength > 3.5 && avgWordLength < 6.0 { - score += 5 + if len(words) > 0 { + avgWordLength := float64(totalLength) / float64(len(words)) + if avgWordLength > 3.5 && avgWordLength < 6.0 { + score += 5 + } } - - // Content word score (prefer responses with meaningful words) + + // Content score contentWords := 0 for _, word := range words { if len(word) > 3 && !isStopWord(strings.ToLower(word)) { @@ -803,11 +472,11 @@ func scoreResponse(response string, questionType string) int { } } score += contentWords - + return score } -// getPunctuationForQuestionType returns appropriate punctuation for the question type +// getPunctuationForQuestionType returns punctuation func getPunctuationForQuestionType(questionType string) []string { switch questionType { case "yesno": @@ -819,7 +488,7 @@ func getPunctuationForQuestionType(questionType string) []string { } } -// min returns the minimum of two integers +// min helper func min(a, b int) int { if a < b { return a @@ -827,7 +496,7 @@ func min(a, b int) int { return b } -// isStopWord checks if a word is a common stop word that shouldn't be used as starting points +// isStopWord checks for common words func isStopWord(word string) bool { stopWords := map[string]bool{ "a": true, "an": true, "and": true, "are": true, "as": true, "at": true, "be": true, "by": true, @@ -838,191 +507,23 @@ func isStopWord(word string) bool { return stopWords[word] } -// buildTwoGramChain creates a 2-gram chain for better sentence flow from existing 1-gram chain -func buildTwoGramChain(chain map[string][]string) map[string]map[string][]string { - // This creates transitions between word pairs from the 1-gram chain - twoGramChain := make(map[string]map[string][]string) - - for word1, nextWords := range chain { - for _, word2 := range nextWords { - if twoGramChain[word1] == nil { - twoGramChain[word1] = make(map[string][]string) - } - // For each word2 that follows word1, find what follows word2 - if nextNextWords, exists := chain[strings.ToLower(word2)]; exists { - twoGramChain[word1][strings.ToLower(word2)] = nextNextWords - } - } - } - - return twoGramChain -} - -// generateImprovedMessage creates a new message using both 1-gram and 2-gram chains for better coherence -func generateImprovedMessage(chain map[string][]string, twoGramChain map[string]map[string][]string) string { - if len(chain) == 0 { +// generateAdvancedMessage generates a message +func generateAdvancedMessage(data *MarkovData) string { + if len(data.Chains[1]) == 0 { return "" } - // Try multiple generation attempts and pick the best one + // Try multiple attempts var bestMessage string bestScore := 0 - - for attempt := 0; attempt < 3; attempt++ { - words := []string{} - var currentWord string - - // Start with a random word that has good follow-ups - attempts := 0 - for word, nextWords := range chain { - if len(nextWords) >= 2 && len(word) > 2 && !isStopWord(word) { // Prefer words with multiple options - currentWord = word - break - } - attempts++ - if attempts > 50 { // Fallback to any word - currentWord = word - break - } - } - if currentWord == "" { - continue - } - - // Generate between 8 and 20 words for better content - maxWords := 8 + rand.Intn(12) - lastWord := "" - - for i := 0; i < maxWords; i++ { - // Add current word (capitalize first word) - if i == 0 { - words = append(words, strings.Title(currentWord)) - } else { - words = append(words, currentWord) - } - - var nextWord string - - // Try 2-gram chain first for better coherence - if lastWord != "" && twoGramChain != nil { - if twoGramOptions, exists := twoGramChain[strings.ToLower(lastWord)][strings.ToLower(currentWord)]; exists && len(twoGramOptions) > 0 { - nextWord = twoGramOptions[rand.Intn(len(twoGramOptions))] - } - } - - // Fallback to regular chain - if nextWord == "" { - if nextWords, exists := chain[strings.ToLower(currentWord)]; exists && len(nextWords) > 0 { - // Prefer longer, more meaningful words - var goodOptions []string - for _, word := range nextWords { - if len(word) > 2 && !isStopWord(strings.ToLower(word)) { - goodOptions = append(goodOptions, word) - } - } - - if len(goodOptions) > 0 { - nextWord = goodOptions[rand.Intn(len(goodOptions))] - } else { - nextWord = nextWords[rand.Intn(len(nextWords))] - } - } - } - - // If we can't find a next word, try to restart - if nextWord == "" { - found := false - for word, nextWords := range chain { - if len(nextWords) > 0 && len(word) > 2 && !isStopWord(word) { - nextWord = word - found = true - break - } - } - if !found { - break - } - } - - lastWord = currentWord - currentWord = nextWord - } - - message := strings.Join(words, " ") - - // Score this message - score := scoreGeneratedMessage(message) - - if score > bestScore { - bestScore = score - bestMessage = message - } - } - - // Add punctuation if missing - if len(bestMessage) > 0 && !strings.ContainsAny(bestMessage[len(bestMessage)-1:], ".!?") { - // Randomly add punctuation - punctuation := []string{".", "!", "?", "."} - bestMessage += punctuation[rand.Intn(len(punctuation))] - } - - return bestMessage -} - -// scoreGeneratedMessage scores a generated message based on quality metrics -func scoreGeneratedMessage(message string) int { - score := 0 - words := strings.Fields(message) - - // Length score (prefer 8-16 words) - if len(words) >= 8 && len(words) <= 16 { - score += 10 - } else if len(words) >= 6 && len(words) <= 20 { - score += 5 - } - - // Diversity score (prefer responses with varied word lengths) - totalLength := 0 - for _, word := range words { - totalLength += len(word) - } - if len(words) > 0 { - avgWordLength := float64(totalLength) / float64(len(words)) - if avgWordLength > 3.0 && avgWordLength < 7.0 { - score += 5 - } - } - - // Content word score (prefer messages with meaningful words) - contentWords := 0 - for _, word := range words { - if len(word) > 3 && !isStopWord(strings.ToLower(word)) { - contentWords++ - } - } - score += contentWords - - return score -} - -// generateAdvancedMessage creates a new message using all n-gram chains for maximum coherence -func generateAdvancedMessage(chain map[string][]string, twoGramChain map[string]map[string][]string, threeGramChain map[string]map[string]map[string][]string, fourGramChain map[string]map[string]map[string]map[string][]string, fiveGramChain map[string]map[string]map[string]map[string]map[string][]string) string { - if len(chain) == 0 { - return "" - } - - // Try multiple generation attempts and pick the best one - var bestMessage string - bestScore := 0 - for attempt := 0; attempt < 5; attempt++ { words := []string{} var currentWord string - - // Start with a random word that has good follow-ups + + // Pick start word attempts := 0 - for word, nextWords := range chain { + for word, nextWords := range data.Chains[1] { if len(nextWords) >= 2 && len(word) > 2 && !isStopWord(word) { currentWord = word break @@ -1038,64 +539,54 @@ func generateAdvancedMessage(chain map[string][]string, twoGramChain map[string] continue } - // Generate between 10 and 18 words for better content + // Generate words maxWords := 10 + rand.Intn(8) wordHistory := []string{currentWord} - + for i := 0; i < maxWords; i++ { - // Add current word (capitalize first word) + // Add word if i == 0 { words = append(words, strings.Title(currentWord)) } else { words = append(words, currentWord) } - + var nextWord string historyLen := len(wordHistory) - - // Try 5-gram chain first (highest coherence) - if historyLen >= 5 && fiveGramChain != nil { - w1, w2, w3, w4, w5 := strings.ToLower(wordHistory[historyLen-5]), strings.ToLower(wordHistory[historyLen-4]), strings.ToLower(wordHistory[historyLen-3]), strings.ToLower(wordHistory[historyLen-2]), strings.ToLower(wordHistory[historyLen-1]) - if options, exists := fiveGramChain[w1][w2][w3][w4][w5]; exists && len(options) > 0 { - nextWord = selectBestNextWord(options, wordHistory) + + // Try n-grams + for n := 5; n >= 2; n-- { + if historyLen >= n && data.Chains[n] != nil { + // Build prefix + var prefixBuilder strings.Builder + for j := 0; j < n; j++ { + if j > 0 { + prefixBuilder.WriteString(" ") + } + prefixBuilder.WriteString(strings.ToLower(wordHistory[historyLen-n+j])) + } + prefix := prefixBuilder.String() + + if options, exists := data.Chains[n][prefix]; exists && len(options) > 0 { + nextWord = selectBestNextWord(options, wordHistory) + if nextWord != "" { + break + } + } } } - - // Try 4-gram chain if 5-gram failed - if nextWord == "" && historyLen >= 4 && fourGramChain != nil { - w1, w2, w3, w4 := strings.ToLower(wordHistory[historyLen-4]), strings.ToLower(wordHistory[historyLen-3]), strings.ToLower(wordHistory[historyLen-2]), strings.ToLower(wordHistory[historyLen-1]) - if options, exists := fourGramChain[w1][w2][w3][w4]; exists && len(options) > 0 { - nextWord = selectBestNextWord(options, wordHistory) - } - } - - // Try 3-gram chain if 4-gram failed - if nextWord == "" && historyLen >= 3 && threeGramChain != nil { - w1, w2, w3 := strings.ToLower(wordHistory[historyLen-3]), strings.ToLower(wordHistory[historyLen-2]), strings.ToLower(wordHistory[historyLen-1]) - if options, exists := threeGramChain[w1][w2][w3]; exists && len(options) > 0 { - nextWord = selectBestNextWord(options, wordHistory) - } - } - - // Try 2-gram chain if 3-gram failed - if nextWord == "" && historyLen >= 2 && twoGramChain != nil { - w1, w2 := strings.ToLower(wordHistory[historyLen-2]), strings.ToLower(wordHistory[historyLen-1]) - if options, exists := twoGramChain[w1][w2]; exists && len(options) > 0 { - nextWord = selectBestNextWord(options, wordHistory) - } - } - - // Fallback to 1-gram chain + + // Fallback to 1-gram if nextWord == "" { - if nextWords, exists := chain[strings.ToLower(currentWord)]; exists && len(nextWords) > 0 { + if nextWords, exists := data.Chains[1][strings.ToLower(currentWord)]; exists && len(nextWords) > 0 { nextWord = selectBestNextWord(nextWords, wordHistory) } } - - // If we still can't find a next word, try to restart + + // Restart if needed if nextWord == "" { found := false - for word, nextWords := range chain { + for word, nextWords := range data.Chains[1] { if len(nextWords) > 0 && len(word) > 2 && !isStopWord(word) { nextWord = word found = true @@ -1106,28 +597,28 @@ func generateAdvancedMessage(chain map[string][]string, twoGramChain map[string] break } } - + currentWord = nextWord wordHistory = append(wordHistory, currentWord) - - // Keep history manageable + + // Trim history if len(wordHistory) > 10 { wordHistory = wordHistory[1:] } } message := strings.Join(words, " ") - - // Score this message with enhanced scoring + + // Score message score := scoreAdvancedMessage(message) - + if score > bestScore { bestScore = score bestMessage = message } } - - // Add punctuation if missing + + // Add punctuation if len(bestMessage) > 0 && !strings.ContainsAny(bestMessage[len(bestMessage)-1:], ".!?") { punctuation := []string{".", "!", "?", "."} bestMessage += punctuation[rand.Intn(len(punctuation))] @@ -1136,112 +627,101 @@ func generateAdvancedMessage(chain map[string][]string, twoGramChain map[string] return bestMessage } -// generateAdvancedQuestionAnswer generates a markov chain response using all n-gram levels -func generateAdvancedQuestionAnswer(chain map[string][]string, twoGramChain map[string]map[string][]string, threeGramChain map[string]map[string]map[string][]string, fourGramChain map[string]map[string]map[string]map[string][]string, fiveGramChain map[string]map[string]map[string]map[string]map[string][]string, question string) string { - if len(chain) == 0 { +// generateAdvancedQuestionAnswer generates answer +func generateAdvancedQuestionAnswer(data *MarkovData, question string) string { + if len(data.Chains[1]) == 0 { return "" } - - // Clean and analyze the question to find relevant starting words + + // Analyze question cleanedQuestion := cleanText(question) questionWords := strings.Fields(strings.ToLower(cleanedQuestion)) - - // Categorize the question type for better response generation + + // Categorize question questionType := categorizeQuestion(cleanedQuestion) - - // Find potential starting words with weighted scoring - startingCandidates := findBestStartingWords(chain, questionWords, questionType) - + + // Find starting words + startingCandidates := findBestStartingWords(data, questionWords, questionType) + if len(startingCandidates) == 0 { return "" } - - // Generate response using the best starting candidate with advanced n-gram chains - return generateAdvancedCoherentResponse(chain, twoGramChain, threeGramChain, fourGramChain, fiveGramChain, startingCandidates, questionType) + + // Generate response + return generateAdvancedCoherentResponse(data, startingCandidates, questionType) } -// generateAdvancedCoherentResponse creates a more coherent response using all n-gram levels -func generateAdvancedCoherentResponse(chain map[string][]string, twoGramChain map[string]map[string][]string, threeGramChain map[string]map[string]map[string][]string, fourGramChain map[string]map[string]map[string]map[string][]string, fiveGramChain map[string]map[string]map[string]map[string]map[string][]string, candidates []WordCandidate, questionType string) string { +// generateAdvancedCoherentResponse generates response +func generateAdvancedCoherentResponse(data *MarkovData, candidates []WordCandidate, questionType string) string { if len(candidates) == 0 { return "" } - - // Try multiple generation attempts and pick the best one + + // Try multiple attempts var bestResponse string bestScore := 0 - + for attempt := 0; attempt < 5; attempt++ { - // Select starting word (bias towards higher scored candidates) + // Pick candidate candidateIndex := 0 if len(candidates) > 1 { - // 70% chance to pick top candidate, 30% for others if rand.Float32() > 0.7 && len(candidates) > 1 { candidateIndex = rand.Intn(min(3, len(candidates))) } } - + currentWord := candidates[candidateIndex].Word words := []string{} wordHistory := []string{currentWord} - - // Generate response with improved coherence using all n-gram levels - maxWords := 12 + rand.Intn(10) // 12-22 words for substantial answers - + + // Generate response + maxWords := 12 + rand.Intn(10) + for i := 0; i < maxWords; i++ { - // Add current word (capitalize first word) + // Add word if i == 0 { words = append(words, strings.Title(currentWord)) } else { words = append(words, currentWord) } - + var nextWord string historyLen := len(wordHistory) - - // Try 5-gram chain first (highest coherence) - if historyLen >= 5 && fiveGramChain != nil { - w1, w2, w3, w4, w5 := strings.ToLower(wordHistory[historyLen-5]), strings.ToLower(wordHistory[historyLen-4]), strings.ToLower(wordHistory[historyLen-3]), strings.ToLower(wordHistory[historyLen-2]), strings.ToLower(wordHistory[historyLen-1]) - if options, exists := fiveGramChain[w1][w2][w3][w4][w5]; exists && len(options) > 0 { - nextWord = selectBestNextWord(options, wordHistory) + + // Try n-grams + for n := 5; n >= 2; n-- { + if historyLen >= n && data.Chains[n] != nil { + // Build prefix + var prefixBuilder strings.Builder + for j := 0; j < n; j++ { + if j > 0 { + prefixBuilder.WriteString(" ") + } + prefixBuilder.WriteString(strings.ToLower(wordHistory[historyLen-n+j])) + } + prefix := prefixBuilder.String() + + if options, exists := data.Chains[n][prefix]; exists && len(options) > 0 { + nextWord = selectBestNextWord(options, wordHistory) + if nextWord != "" { + break + } + } } } - - // Try 4-gram chain if 5-gram failed - if nextWord == "" && historyLen >= 4 && fourGramChain != nil { - w1, w2, w3, w4 := strings.ToLower(wordHistory[historyLen-4]), strings.ToLower(wordHistory[historyLen-3]), strings.ToLower(wordHistory[historyLen-2]), strings.ToLower(wordHistory[historyLen-1]) - if options, exists := fourGramChain[w1][w2][w3][w4]; exists && len(options) > 0 { - nextWord = selectBestNextWord(options, wordHistory) - } - } - - // Try 3-gram chain if 4-gram failed - if nextWord == "" && historyLen >= 3 && threeGramChain != nil { - w1, w2, w3 := strings.ToLower(wordHistory[historyLen-3]), strings.ToLower(wordHistory[historyLen-2]), strings.ToLower(wordHistory[historyLen-1]) - if options, exists := threeGramChain[w1][w2][w3]; exists && len(options) > 0 { - nextWord = selectBestNextWord(options, wordHistory) - } - } - - // Try 2-gram chain if 3-gram failed - if nextWord == "" && historyLen >= 2 && twoGramChain != nil { - w1, w2 := strings.ToLower(wordHistory[historyLen-2]), strings.ToLower(wordHistory[historyLen-1]) - if options, exists := twoGramChain[w1][w2]; exists && len(options) > 0 { - nextWord = selectBestNextWord(options, wordHistory) - } - } - - // Fallback to regular chain with preference for meaningful words + + // Fallback if nextWord == "" { - if nextWords, exists := chain[strings.ToLower(currentWord)]; exists && len(nextWords) > 0 { + if nextWords, exists := data.Chains[1][strings.ToLower(currentWord)]; exists && len(nextWords) > 0 { nextWord = selectBestNextWord(nextWords, wordHistory) } } - - // If we can't find a next word, try to restart with a good candidate + + // Restart if nextWord == "" { found := false for _, candidate := range candidates { - if nextWords, exists := chain[candidate.Word]; exists && len(nextWords) > 0 { + if nextWords, exists := data.Chains[1][candidate.Word]; exists && len(nextWords) > 0 { nextWord = candidate.Word found = true break @@ -1251,42 +731,42 @@ func generateAdvancedCoherentResponse(chain map[string][]string, twoGramChain ma break } } - + currentWord = nextWord wordHistory = append(wordHistory, currentWord) - - // Keep history manageable + + // Trim history if len(wordHistory) > 10 { wordHistory = wordHistory[1:] } } - + response := strings.Join(words, " ") - - // Score this response with enhanced scoring + + // Score response score := scoreAdvancedResponse(response, questionType) - + if score > bestScore { bestScore = score bestResponse = response } } - - // Add appropriate punctuation + + // Add punctuation if len(bestResponse) > 0 && !strings.ContainsAny(bestResponse[len(bestResponse)-1:], ".!?") { punctuation := getPunctuationForQuestionType(questionType) bestResponse += punctuation[rand.Intn(len(punctuation))] } - + return bestResponse } -// scoreAdvancedMessage scores a generated message with enhanced metrics +// scoreAdvancedMessage scores message func scoreAdvancedMessage(message string) int { score := 0 words := strings.Fields(message) - - // Length score (prefer 10-16 words) + + // Length score if len(words) >= 10 && len(words) <= 16 { score += 15 } else if len(words) >= 8 && len(words) <= 18 { @@ -1294,22 +774,22 @@ func scoreAdvancedMessage(message string) int { } else if len(words) >= 6 && len(words) <= 20 { score += 5 } - - // Diversity score (prefer responses with varied word lengths) + + // Diversity score totalLength := 0 uniqueWords := make(map[string]bool) for _, word := range words { totalLength += len(word) uniqueWords[strings.ToLower(word)] = true } - + if len(words) > 0 { avgWordLength := float64(totalLength) / float64(len(words)) if avgWordLength > 3.5 && avgWordLength < 6.5 { score += 8 } - - // Uniqueness score (penalize repetition) + + // Uniqueness score uniqueRatio := float64(len(uniqueWords)) / float64(len(words)) if uniqueRatio > 0.8 { score += 10 @@ -1317,8 +797,8 @@ func scoreAdvancedMessage(message string) int { score += 5 } } - - // Content word score (prefer messages with meaningful words) + + // Content score contentWords := 0 for _, word := range words { if len(word) > 3 && !isStopWord(strings.ToLower(word)) { @@ -1326,74 +806,74 @@ func scoreAdvancedMessage(message string) int { } } score += contentWords * 2 - - // Grammar coherence bonus (simple heuristics) + + // Grammar bonus if !strings.Contains(message, " a a ") && !strings.Contains(message, " the the ") && !strings.Contains(message, " you you ") { score += 5 } - + return score } -// scoreAdvancedResponse scores a response with enhanced question-specific metrics +// scoreAdvancedResponse scores response func scoreAdvancedResponse(response string, questionType string) int { score := scoreAdvancedMessage(response) // Base score - - // Question-specific bonuses + + // Question bonuses responseLower := strings.ToLower(response) switch questionType { case "yesno": - if strings.Contains(responseLower, "yes") || strings.Contains(responseLower, "no") || - strings.Contains(responseLower, "maybe") || strings.Contains(responseLower, "definitely") { + if strings.Contains(responseLower, "yes") || strings.Contains(responseLower, "no") || + strings.Contains(responseLower, "maybe") || strings.Contains(responseLower, "definitely") { score += 8 } case "why": - if strings.Contains(responseLower, "because") || strings.Contains(responseLower, "reason") || - strings.Contains(responseLower, "since") || strings.Contains(responseLower, "due") { + if strings.Contains(responseLower, "because") || strings.Contains(responseLower, "reason") || + strings.Contains(responseLower, "since") || strings.Contains(responseLower, "due") { score += 8 } case "how": - if strings.Contains(responseLower, "way") || strings.Contains(responseLower, "method") || - strings.Contains(responseLower, "process") || strings.Contains(responseLower, "steps") { + if strings.Contains(responseLower, "way") || strings.Contains(responseLower, "method") || + strings.Contains(responseLower, "process") || strings.Contains(responseLower, "steps") { score += 8 } case "when": - if strings.Contains(responseLower, "time") || strings.Contains(responseLower, "day") || - strings.Contains(responseLower, "hour") || strings.Contains(responseLower, "moment") { + if strings.Contains(responseLower, "time") || strings.Contains(responseLower, "day") || + strings.Contains(responseLower, "hour") || strings.Contains(responseLower, "moment") { score += 8 } case "where": - if strings.Contains(responseLower, "place") || strings.Contains(responseLower, "location") || - strings.Contains(responseLower, "here") || strings.Contains(responseLower, "there") { + if strings.Contains(responseLower, "place") || strings.Contains(responseLower, "location") || + strings.Contains(responseLower, "here") || strings.Contains(responseLower, "there") { score += 8 } } - + return score } -// isValidNextWord checks if a word would create repetitive or grammatical issues +// isValidNextWord checks validity func isValidNextWord(wordHistory []string, nextWord string) bool { if len(wordHistory) == 0 { return true } - + nextWordLower := strings.ToLower(nextWord) - - // Prevent immediate repetition + + // No immediate repetition if len(wordHistory) >= 1 && strings.ToLower(wordHistory[len(wordHistory)-1]) == nextWordLower { return false } - - // Prevent "a a", "the the", "you you" patterns + + // No double articles if len(wordHistory) >= 1 { lastWord := strings.ToLower(wordHistory[len(wordHistory)-1]) if (lastWord == "a" || lastWord == "the" || lastWord == "you") && lastWord == nextWordLower { return false } } - - // Prevent triple repetition in recent history + + // No triple repetition if len(wordHistory) >= 3 { count := 0 for i := len(wordHistory) - 3; i < len(wordHistory); i++ { @@ -1405,45 +885,45 @@ func isValidNextWord(wordHistory []string, nextWord string) bool { return false } } - - // Prevent common grammatical errors + + // Grammar checks if len(wordHistory) >= 1 { lastWord := strings.ToLower(wordHistory[len(wordHistory)-1]) - - // Don't put "a" after "you" in most cases + + // No "you a" if lastWord == "you" && nextWordLower == "a" { return false } - - // Don't put articles after articles - if (lastWord == "a" || lastWord == "an" || lastWord == "the") && - (nextWordLower == "a" || nextWordLower == "an" || nextWordLower == "the") { + + // No double articles + if (lastWord == "a" || lastWord == "an" || lastWord == "the") && + (nextWordLower == "a" || nextWordLower == "an" || nextWordLower == "the") { return false } } - + return true } -// selectBestNextWord chooses the best next word from available options +// selectBestNextWord picks next word func selectBestNextWord(options []string, wordHistory []string) string { if len(options) == 0 { return "" } - - // Filter out invalid options + + // Filter invalid var validOptions []string for _, option := range options { if isValidNextWord(wordHistory, option) { validOptions = append(validOptions, option) } } - - // If no valid options, fall back to original options but try to avoid the worst ones + + // Fallback if len(validOptions) == 0 { var fallbackOptions []string for _, option := range options { - // At least avoid immediate repetition + // Avoid repetition if len(wordHistory) == 0 || strings.ToLower(wordHistory[len(wordHistory)-1]) != strings.ToLower(option) { fallbackOptions = append(fallbackOptions, option) } @@ -1454,18 +934,18 @@ func selectBestNextWord(options []string, wordHistory []string) string { validOptions = options } } - - // Prefer longer, more meaningful words + + // Prefer meaningful words var goodOptions []string for _, option := range validOptions { if len(option) > 2 && !isStopWord(strings.ToLower(option)) { goodOptions = append(goodOptions, option) } } - + if len(goodOptions) > 0 { return goodOptions[rand.Intn(len(goodOptions))] } - + return validOptions[rand.Intn(len(validOptions))] } diff --git a/go.mod b/go.mod index 90e62aa..d61e8e9 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module himbot -go 1.24 +go 1.25.5 require ( github.com/antlr4-go/antlr/v4 v4.13.1 // indirect