himbot/command/markov.go

package command

import (
	"crypto/md5"
	"fmt"
	"himbot/lib"
	"math/rand"
	"regexp"
	"strings"
	"sync"
	"time"

	"github.com/bwmarrin/discordgo"
)

// Cache for Markov chains to avoid rebuilding for the same channel/message count
type MarkovCache struct {
	chains     map[string]map[string][]string
	twoGrams   map[string]map[string]map[string][]string
	threeGrams map[string]map[string]map[string]map[string][]string
	fourGrams  map[string]map[string]map[string]map[string]map[string][]string
	fiveGrams  map[string]map[string]map[string]map[string]map[string]map[string][]string
	hashes     map[string]string
	mu         sync.RWMutex
}

var (
	markovCache = &MarkovCache{
		chains:     make(map[string]map[string][]string),
		twoGrams:   make(map[string]map[string]map[string][]string),
		threeGrams: make(map[string]map[string]map[string]map[string][]string),
		fourGrams:  make(map[string]map[string]map[string]map[string]map[string][]string),
		fiveGrams:  make(map[string]map[string]map[string]map[string]map[string]map[string][]string),
		hashes:     make(map[string]string),
	}
	// Regex for cleaning text
	urlRegex     = regexp.MustCompile(`https?://[^\s]+`)
	mentionRegex = regexp.MustCompile(`<[@#&!][^>]+>`)
	emojiRegex   = regexp.MustCompile(`<a?:[^:]+:\d+>`)
)

func MarkovCommand(s *discordgo.Session, i *discordgo.InteractionCreate) (string, error) {
	channelID := i.ChannelID

	numMessages := lib.AppConfig.MarkovDefaultMessages // Default value from config
	if len(i.ApplicationCommandData().Options) > 0 {
		if i.ApplicationCommandData().Options[0].Name == "messages" {
			numMessages = int(i.ApplicationCommandData().Options[0].IntValue())
			if numMessages <= 0 {
				numMessages = lib.AppConfig.MarkovDefaultMessages
			} else if numMessages > lib.AppConfig.MarkovMaxMessages {
				numMessages = lib.AppConfig.MarkovMaxMessages // Limit from config
			}
		}
	}

	// Check cache first
	cacheKey := fmt.Sprintf("%s:%d", channelID, numMessages)
	if chain := getCachedChain(cacheKey); chain != nil {
		newMessage := generateMessage(chain)
		if newMessage != "" {
			return newMessage, nil
		}
	}

	// Fetch messages
	allMessages, err := fetchMessages(s, channelID, numMessages)
	if err != nil {
		return "", err
	}

	// Build the Markov chain from the fetched messages
	chain, twoGramChain, threeGramChain, fourGramChain, fiveGramChain := buildMarkovChain(allMessages)

	// Cache the chain
	setCachedChain(cacheKey, chain, twoGramChain, threeGramChain, fourGramChain, fiveGramChain, allMessages)

	// Generate a new message using the improved Markov chain
	newMessage := generateAdvancedMessage(chain, twoGramChain, threeGramChain, fourGramChain, fiveGramChain)

	// Check if the generated message is empty and provide a fallback message
	if newMessage == "" {
		newMessage = "I couldn't generate a message. The channel might be empty or contain no usable text."
	}

	return newMessage, nil
}

func getCachedChain(cacheKey string) map[string][]string {
	markovCache.mu.RLock()
	defer markovCache.mu.RUnlock()

	if chain, exists := markovCache.chains[cacheKey]; exists {
		return chain
	}
	return nil
}

func getCachedTwoGramChain(cacheKey string) map[string]map[string][]string {
	markovCache.mu.RLock()
	defer markovCache.mu.RUnlock()

	if twoGram, exists := markovCache.twoGrams[cacheKey]; exists {
		return twoGram
	}
	return nil
}

func getCachedThreeGramChain(cacheKey string) map[string]map[string]map[string][]string {
	markovCache.mu.RLock()
	defer markovCache.mu.RUnlock()

	if threeGram, exists := markovCache.threeGrams[cacheKey]; exists {
		return threeGram
	}
	return nil
}

func getCachedFourGramChain(cacheKey string) map[string]map[string]map[string]map[string][]string {
	markovCache.mu.RLock()
	defer markovCache.mu.RUnlock()

	if fourGram, exists := markovCache.fourGrams[cacheKey]; exists {
		return fourGram
	}
	return nil
}

func getCachedFiveGramChain(cacheKey string) map[string]map[string]map[string]map[string]map[string][]string {
	markovCache.mu.RLock()
	defer markovCache.mu.RUnlock()

	if fiveGram, exists := markovCache.fiveGrams[cacheKey]; exists {
		return fiveGram
	}
	return nil
}

func setCachedChain(cacheKey string, chain map[string][]string, twoGramChain map[string]map[string][]string, threeGramChain map[string]map[string]map[string][]string, fourGramChain map[string]map[string]map[string]map[string][]string, fiveGramChain map[string]map[string]map[string]map[string]map[string][]string, messages []*discordgo.Message) {
	hash := hashMessages(messages)

	markovCache.mu.Lock()
	defer markovCache.mu.Unlock()

	if len(chain) > 10 {
		markovCache.chains[cacheKey] = chain
		markovCache.twoGrams[cacheKey] = twoGramChain
		markovCache.threeGrams[cacheKey] = threeGramChain
		markovCache.fourGrams[cacheKey] = fourGramChain
		markovCache.fiveGrams[cacheKey] = fiveGramChain
		markovCache.hashes[cacheKey] = hash

		// Simple FIFO cache cleanup
		if len(markovCache.chains) > lib.AppConfig.MarkovCacheSize {
			for k := range markovCache.chains {
				delete(markovCache.chains, k)
				delete(markovCache.twoGrams, k)
				delete(markovCache.threeGrams, k)
				delete(markovCache.fourGrams, k)
				delete(markovCache.fiveGrams, k)
				delete(markovCache.hashes, k)
				break
			}
		}
	}
}

func hashMessages(messages []*discordgo.Message) string {
	var content strings.Builder
	for _, msg := range messages {
		content.WriteString(msg.ID)
		content.WriteString(msg.Content)
	}
	return fmt.Sprintf("%x", md5.Sum([]byte(content.String())))
}

func fetchMessages(s *discordgo.Session, channelID string, numMessages int) ([]*discordgo.Message, error) {
	var allMessages []*discordgo.Message
	var lastMessageID string

	for len(allMessages) < numMessages {
		batchSize := 100
		if numMessages-len(allMessages) < 100 {
			batchSize = numMessages - len(allMessages)
		}

		batch, err := s.ChannelMessages(channelID, batchSize, lastMessageID, "", "")
		if err != nil {
			return nil, err
		}

		if len(batch) == 0 {
			break // No more messages to fetch
		}

		// Filter out bot messages and empty messages during fetch
		for _, msg := range batch {
			if !msg.Author.Bot && len(strings.TrimSpace(msg.Content)) > 0 {
				allMessages = append(allMessages, msg)
			}
		}

		lastMessageID = batch[len(batch)-1].ID

		if len(batch) < 100 {
			break // Less than 100 messages returned, we've reached the end
		}
	}

	return allMessages, nil
}

// cleanText removes URLs, mentions, emojis, and normalizes text
func cleanText(text string) string {
	// Remove URLs
	text = urlRegex.ReplaceAllString(text, "")
	// Remove mentions
	text = mentionRegex.ReplaceAllString(text, "")
	// Remove custom emojis
	text = emojiRegex.ReplaceAllString(text, "")
	// Normalize whitespace
	text = strings.Join(strings.Fields(text), " ")
	return strings.TrimSpace(text)
}

// buildMarkovChain creates an improved Markov chain from a list of messages
func buildMarkovChain(messages []*discordgo.Message) (map[string][]string, map[string]map[string][]string, map[string]map[string]map[string][]string, map[string]map[string]map[string]map[string][]string, map[string]map[string]map[string]map[string]map[string][]string) {
	chain := make(map[string][]string)
	twoGramChain := make(map[string]map[string][]string)
	threeGramChain := make(map[string]map[string]map[string][]string)
	fourGramChain := make(map[string]map[string]map[string]map[string][]string)
	fiveGramChain := make(map[string]map[string]map[string]map[string]map[string][]string)

	// Count total words for memory estimation
	totalWords := 0
	for _, msg := range messages {
		cleanedContent := cleanText(msg.Content)
		if len(cleanedContent) >= 3 {
			words := strings.Fields(cleanedContent)
			totalWords += len(words)
		}
	}

	// Estimate memory usage and adjust max n-gram level
	maxNGram := lib.AppConfig.MarkovMaxNGram
	estimatedMemoryMB := estimateMemoryUsage(totalWords, maxNGram)
	if estimatedMemoryMB > lib.AppConfig.MarkovMemoryLimit {
		// Reduce n-gram level to stay within memory limits
		for maxNGram > 2 && estimateMemoryUsage(totalWords, maxNGram) > lib.AppConfig.MarkovMemoryLimit {
			maxNGram--
		}
	}

	for _, msg := range messages {
		cleanedContent := cleanText(msg.Content)
		if len(cleanedContent) < 3 {
			continue
		}

		words := strings.Fields(cleanedContent)
		if len(words) < 2 {
			continue
		}

		// Build 1-gram chain
		for i := 0; i < len(words)-1; i++ {
			currentWord := strings.ToLower(words[i])
			nextWord := words[i+1]

			if len(currentWord) < 2 || strings.ContainsAny(currentWord, "!@#$%^&*()[]{}") {
				continue
			}

			chain[currentWord] = append(chain[currentWord], nextWord)
		}

		// Build 2-gram chain
		if maxNGram >= 2 {
			for i := 0; i < len(words)-2; i++ {
				word1 := strings.ToLower(words[i])
				word2 := strings.ToLower(words[i+1])
				nextWord := words[i+2]

				if len(word1) < 2 || len(word2) < 2 ||
				   strings.ContainsAny(word1, "!@#$%^&*()[]{}") ||
				   strings.ContainsAny(word2, "!@#$%^&*()[]{}") {
					continue
				}

				if twoGramChain[word1] == nil {
					twoGramChain[word1] = make(map[string][]string)
				}
				twoGramChain[word1][word2] = append(twoGramChain[word1][word2], nextWord)
			}
		}

		// Build 3-gram chain
		if maxNGram >= 3 {
			for i := 0; i < len(words)-3; i++ {
				word1 := strings.ToLower(words[i])
				word2 := strings.ToLower(words[i+1])
				word3 := strings.ToLower(words[i+2])
				nextWord := words[i+3]

				if len(word1) < 2 || len(word2) < 2 || len(word3) < 2 ||
				   strings.ContainsAny(word1, "!@#$%^&*()[]{}") ||
				   strings.ContainsAny(word2, "!@#$%^&*()[]{}") ||
				   strings.ContainsAny(word3, "!@#$%^&*()[]{}") {
					continue
				}

				if threeGramChain[word1] == nil {
					threeGramChain[word1] = make(map[string]map[string][]string)
				}
				if threeGramChain[word1][word2] == nil {
					threeGramChain[word1][word2] = make(map[string][]string)
				}
				threeGramChain[word1][word2][word3] = append(threeGramChain[word1][word2][word3], nextWord)
			}
		}

		// Build 4-gram chain
		if maxNGram >= 4 {
			for i := 0; i < len(words)-4; i++ {
				word1 := strings.ToLower(words[i])
				word2 := strings.ToLower(words[i+1])
				word3 := strings.ToLower(words[i+2])
				word4 := strings.ToLower(words[i+3])
				nextWord := words[i+4]

				if len(word1) < 2 || len(word2) < 2 || len(word3) < 2 || len(word4) < 2 ||
				   strings.ContainsAny(word1, "!@#$%^&*()[]{}") ||
				   strings.ContainsAny(word2, "!@#$%^&*()[]{}") ||
				   strings.ContainsAny(word3, "!@#$%^&*()[]{}") ||
				   strings.ContainsAny(word4, "!@#$%^&*()[]{}") {
					continue
				}

				if fourGramChain[word1] == nil {
					fourGramChain[word1] = make(map[string]map[string]map[string][]string)
				}
				if fourGramChain[word1][word2] == nil {
					fourGramChain[word1][word2] = make(map[string]map[string][]string)
				}
				if fourGramChain[word1][word2][word3] == nil {
					fourGramChain[word1][word2][word3] = make(map[string][]string)
				}
				fourGramChain[word1][word2][word3][word4] = append(fourGramChain[word1][word2][word3][word4], nextWord)
			}
		}

		// Build 5-gram chain for maximum coherence
		if maxNGram >= 5 {
			for i := 0; i < len(words)-5; i++ {
				word1 := strings.ToLower(words[i])
				word2 := strings.ToLower(words[i+1])
				word3 := strings.ToLower(words[i+2])
				word4 := strings.ToLower(words[i+3])
				word5 := strings.ToLower(words[i+4])
				nextWord := words[i+5]

				if len(word1) < 2 || len(word2) < 2 || len(word3) < 2 || len(word4) < 2 || len(word5) < 2 ||
				   strings.ContainsAny(word1, "!@#$%^&*()[]{}") ||
				   strings.ContainsAny(word2, "!@#$%^&*()[]{}") ||
				   strings.ContainsAny(word3, "!@#$%^&*()[]{}") ||
				   strings.ContainsAny(word4, "!@#$%^&*()[]{}") ||
				   strings.ContainsAny(word5, "!@#$%^&*()[]{}") {
					continue
				}

				if fiveGramChain[word1] == nil {
					fiveGramChain[word1] = make(map[string]map[string]map[string]map[string][]string)
				}
				if fiveGramChain[word1][word2] == nil {
					fiveGramChain[word1][word2] = make(map[string]map[string]map[string][]string)
				}
				if fiveGramChain[word1][word2][word3] == nil {
					fiveGramChain[word1][word2][word3] = make(map[string]map[string][]string)
				}
				if fiveGramChain[word1][word2][word3][word4] == nil {
					fiveGramChain[word1][word2][word3][word4] = make(map[string][]string)
				}
				fiveGramChain[word1][word2][word3][word4][word5] = append(fiveGramChain[word1][word2][word3][word4][word5], nextWord)
			}
		}
	}

	return chain, twoGramChain, threeGramChain, fourGramChain, fiveGramChain
}

// estimateMemoryUsage estimates memory usage in MB for given word count and n-gram level
func estimateMemoryUsage(wordCount int, maxNGram int) int {
	// Rough estimates based on typical Discord channel patterns
	baseMB := wordCount / 1000 // ~1MB per 1000 words for 1-gram

	switch maxNGram {
	case 2:
		return baseMB * 5
	case 3:
		return baseMB * 15
	case 4:
		return baseMB * 35
	case 5:
		return baseMB * 75
	case 6:
		return baseMB * 150
	default:
		return baseMB
	}
}

// generateMessage creates a new message using the Markov chain with improved logic
func generateMessage(chain map[string][]string) string {
	if len(chain) == 0 {
		return ""
	}

	words := []string{}
	var currentWord string

	// Start with a random word that has good follow-ups
	attempts := 0
	for word, nextWords := range chain {
		if len(nextWords) >= 2 && len(word) > 2 { // Prefer words with multiple options
			currentWord = word
			break
		}
		attempts++
		if attempts > 50 { // Fallback to any word
			currentWord = word
			break
		}
	}

	if currentWord == "" {
		return ""
	}

	// Generate between 5 and 25 words
	maxWords := 5 + rand.Intn(20)
	for i := 0; i < maxWords; i++ {
		// Add current word (capitalize first word)
		if i == 0 {
			words = append(words, strings.Title(currentWord))
		} else {
			words = append(words, currentWord)
		}

		if nextWords, ok := chain[strings.ToLower(currentWord)]; ok && len(nextWords) > 0 {
			// Randomly select the next word from the possible follow-ups
			currentWord = nextWords[rand.Intn(len(nextWords))]
		} else {
			// Try to find a new starting point
			found := false
			for word, nextWords := range chain {
				if len(nextWords) > 0 && len(word) > 2 {
					currentWord = word
					found = true
					break
				}
			}
			if !found {
				break
			}
		}
	}

	result := strings.Join(words, " ")

	// Add punctuation if missing
	if len(result) > 0 && !strings.ContainsAny(result[len(result)-1:], ".!?") {
		// Randomly add punctuation
		punctuation := []string{".", "!", "?"}
		result += punctuation[rand.Intn(len(punctuation))]
	}

	return result
}

func init() {
	// Seed random number generator
	rand.Seed(time.Now().UnixNano())
}

// MarkovQuestionCommand generates a markov chain answer to a question based on channel contents
func MarkovQuestionCommand(s *discordgo.Session, i *discordgo.InteractionCreate) (string, error) {
	channelID := i.ChannelID

	var question string
	var numMessages int = lib.AppConfig.MarkovDefaultMessages

	for _, option := range i.ApplicationCommandData().Options {
		switch option.Name {
		case "question":
			question = option.StringValue()
		case "messages":
			numMessages = int(option.IntValue())
			if numMessages <= 0 {
				numMessages = lib.AppConfig.MarkovDefaultMessages
			} else if numMessages > lib.AppConfig.MarkovMaxMessages {
				numMessages = lib.AppConfig.MarkovMaxMessages
			}
		}
	}

	if question == "" {
		return "Please provide a question!", nil
	}

	cacheKey := fmt.Sprintf("%s:%d", channelID, numMessages)
	var chain map[string][]string
	var twoGramChain map[string]map[string][]string
	var threeGramChain map[string]map[string]map[string][]string
	var fourGramChain map[string]map[string]map[string]map[string][]string
	var fiveGramChain map[string]map[string]map[string]map[string]map[string][]string

	if cachedChain := getCachedChain(cacheKey); cachedChain != nil {
		chain = cachedChain
		twoGramChain = getCachedTwoGramChain(cacheKey)
		threeGramChain = getCachedThreeGramChain(cacheKey)
		fourGramChain = getCachedFourGramChain(cacheKey)
		fiveGramChain = getCachedFiveGramChain(cacheKey)
	} else {
		allMessages, err := fetchMessages(s, channelID, numMessages)
		if err != nil {
			return "", err
		}

		chain, twoGramChain, threeGramChain, fourGramChain, fiveGramChain = buildMarkovChain(allMessages)
		setCachedChain(cacheKey, chain, twoGramChain, threeGramChain, fourGramChain, fiveGramChain, allMessages)
	}

	answer := generateAdvancedQuestionAnswer(chain, twoGramChain, threeGramChain, fourGramChain, fiveGramChain, question)

	if answer == "" {
		answer = "I couldn't generate an answer to that question. The channel might not have enough relevant content."
	}

	return fmt.Sprintf("**Q:** %s\n**A:** %s", question, answer), nil
}

// generateQuestionAnswer generates a markov chain response that attempts to answer the given question
func generateQuestionAnswer(chain map[string][]string, twoGramChain map[string]map[string][]string, question string) string {
	if len(chain) == 0 {
		return ""
	}

	// Clean and analyze the question to find relevant starting words
	cleanedQuestion := cleanText(question)
	questionWords := strings.Fields(strings.ToLower(cleanedQuestion))

	// Categorize the question type for better response generation
	questionType := categorizeQuestion(cleanedQuestion)

	// Find potential starting words with weighted scoring
	startingCandidates := findBestStartingWords(chain, questionWords, questionType)

	if len(startingCandidates) == 0 {
		return ""
	}

	// Generate response using the best starting candidate
	return generateCoherentResponse(chain, twoGramChain, startingCandidates, questionType)
}

// categorizeQuestion determines the type of question for better response generation
func categorizeQuestion(question string) string {
	question = strings.ToLower(question)

	if strings.Contains(question, "what") {
		return "what"
	} else if strings.Contains(question, "how") {
		return "how"
	} else if strings.Contains(question, "why") {
		return "why"
	} else if strings.Contains(question, "when") {
		return "when"
	} else if strings.Contains(question, "where") {
		return "where"
	} else if strings.Contains(question, "who") {
		return "who"
	} else if strings.Contains(question, "which") {
		return "which"
	} else if strings.Contains(question, "is") || strings.Contains(question, "are") || strings.Contains(question, "do") || strings.Contains(question, "does") {
		return "yesno"
	}

	return "general"
}

// WordCandidate represents a potential starting word with its relevance score
type WordCandidate struct {
	Word  string
	Score int
}

// findBestStartingWords finds and scores potential starting words based on question relevance
func findBestStartingWords(chain map[string][]string, questionWords []string, questionType string) []WordCandidate {
	candidates := make(map[string]int)

	// Score words from the question that exist in our chain
	for _, word := range questionWords {
		if len(word) > 2 && !isStopWord(word) {
			if nextWords, exists := chain[word]; exists && len(nextWords) > 0 {
				candidates[word] += 10 // High score for direct question words
			}
		}
	}

	// Add contextually relevant words based on question type
	contextWords := getContextualWords(questionType)
	for _, word := range contextWords {
		if nextWords, exists := chain[word]; exists && len(nextWords) > 0 {
			candidates[word] += 5 // Medium score for contextual words
		}
	}

	// Add high-frequency words as fallback
	for word, nextWords := range chain {
		if len(nextWords) >= 3 && len(word) > 2 && !isStopWord(word) {
			if _, exists := candidates[word]; !exists {
				candidates[word] = len(nextWords) / 2 // Score based on frequency
			}
		}
	}

	// Convert to sorted slice
	var result []WordCandidate
	for word, score := range candidates {
		result = append(result, WordCandidate{Word: word, Score: score})
	}

	// Sort by score (highest first)
	for i := 0; i < len(result)-1; i++ {
		for j := i + 1; j < len(result); j++ {
			if result[j].Score > result[i].Score {
				result[i], result[j] = result[j], result[i]
			}
		}
	}

	// Return top candidates
	if len(result) > 10 {
		result = result[:10]
	}

	return result
}

// getContextualWords returns words that are contextually relevant to the question type
func getContextualWords(questionType string) []string {
	switch questionType {
	case "what":
		return []string{"thing", "something", "object", "idea", "concept", "stuff", "item"}
	case "how":
		return []string{"way", "method", "process", "steps", "technique", "approach"}
	case "why":
		return []string{"because", "reason", "cause", "since", "due", "explanation"}
	case "when":
		return []string{"time", "moment", "day", "hour", "yesterday", "today", "tomorrow", "now", "then"}
	case "where":
		return []string{"place", "location", "here", "there", "somewhere", "anywhere"}
	case "who":
		return []string{"person", "people", "someone", "anyone", "everybody", "nobody"}
	case "which":
		return []string{"choice", "option", "selection", "pick", "prefer"}
	case "yesno":
		return []string{"yes", "no", "maybe", "definitely", "probably", "possibly", "sure", "absolutely"}
	default:
		return []string{"think", "believe", "know", "understand", "feel", "seem"}
	}
}

// generateCoherentResponse creates a more coherent response using improved algorithms
func generateCoherentResponse(chain map[string][]string, twoGramChain map[string]map[string][]string, candidates []WordCandidate, questionType string) string {
	if len(candidates) == 0 {
		return ""
	}

	// Try multiple generation attempts and pick the best one
	var bestResponse string
	bestScore := 0

	for attempt := 0; attempt < 3; attempt++ {
		// Select starting word (bias towards higher scored candidates)
		candidateIndex := 0
		if len(candidates) > 1 {
			// 70% chance to pick top candidate, 30% for others
			if rand.Float32() > 0.7 && len(candidates) > 1 {
				candidateIndex = rand.Intn(min(3, len(candidates)))
			}
		}

		currentWord := candidates[candidateIndex].Word
		words := []string{}

		// Generate response with improved coherence
		maxWords := 8 + rand.Intn(22) // 8-22 words
		lastWord := ""

		for i := 0; i < maxWords; i++ {
			// Add current word (capitalize first word)
			if i == 0 {
				words = append(words, strings.Title(currentWord))
			} else {
				words = append(words, currentWord)
			}

			var nextWord string

			// Try 2-gram chain first for better coherence
			if lastWord != "" {
				if twoGramOptions, exists := twoGramChain[strings.ToLower(lastWord)][strings.ToLower(currentWord)]; exists && len(twoGramOptions) > 0 {
					nextWord = twoGramOptions[rand.Intn(len(twoGramOptions))]
				}
			}

			// Fallback to regular chain
			if nextWord == "" {
				if nextWords, exists := chain[strings.ToLower(currentWord)]; exists && len(nextWords) > 0 {
					// Prefer longer words for better content
					var goodOptions []string
					for _, word := range nextWords {
						if len(word) > 2 && !isStopWord(strings.ToLower(word)) {
							goodOptions = append(goodOptions, word)
						}
					}

					if len(goodOptions) > 0 {
						nextWord = goodOptions[rand.Intn(len(goodOptions))]
					} else {
						nextWord = nextWords[rand.Intn(len(nextWords))]
					}
				}
			}

			// If we can't find a next word, try to restart with a good candidate
			if nextWord == "" {
				found := false
				for _, candidate := range candidates {
					if nextWords, exists := chain[candidate.Word]; exists && len(nextWords) > 0 {
						nextWord = candidate.Word
						found = true
						break
					}
				}
				if !found {
					break
				}
			}

			lastWord = currentWord
			currentWord = nextWord
		}

		response := strings.Join(words, " ")

		// Score this response
		score := scoreResponse(response, questionType)

		if score > bestScore {
			bestScore = score
			bestResponse = response
		}
	}

	// Add appropriate punctuation
	if len(bestResponse) > 0 && !strings.ContainsAny(bestResponse[len(bestResponse)-1:], ".!?") {
		punctuation := getPunctuationForQuestionType(questionType)
		bestResponse += punctuation[rand.Intn(len(punctuation))]
	}

	return bestResponse
}

// scoreResponse scores a response based on various quality metrics
func scoreResponse(response string, questionType string) int {
	score := 0
	words := strings.Fields(response)

	// Length score (prefer 8-16 words)
	if len(words) >= 8 && len(words) <= 16 {
		score += 10
	} else if len(words) >= 6 && len(words) <= 20 {
		score += 5
	}

	// Diversity score (prefer responses with varied word lengths)
	totalLength := 0
	for _, word := range words {
		totalLength += len(word)
	}
	avgWordLength := float64(totalLength) / float64(len(words))
	if avgWordLength > 3.5 && avgWordLength < 6.0 {
		score += 5
	}

	// Content word score (prefer responses with meaningful words)
	contentWords := 0
	for _, word := range words {
		if len(word) > 3 && !isStopWord(strings.ToLower(word)) {
			contentWords++
		}
	}
	score += contentWords

	return score
}

// getPunctuationForQuestionType returns appropriate punctuation for the question type
func getPunctuationForQuestionType(questionType string) []string {
	switch questionType {
	case "yesno":
		return []string{".", "!", "."}
	case "why", "how":
		return []string{".", ".", "!"}
	default:
		return []string{".", ".", "!", "."}
	}
}

// min returns the minimum of two integers
func min(a, b int) int {
	if a < b {
		return a
	}
	return b
}

// isStopWord checks if a word is a common stop word that shouldn't be used as starting points
func isStopWord(word string) bool {
	stopWords := map[string]bool{
		"a": true, "an": true, "and": true, "are": true, "as": true, "at": true, "be": true, "by": true,
		"for": true, "from": true, "has": true, "he": true, "in": true, "is": true, "it": true,
		"its": true, "of": true, "on": true, "that": true, "the": true, "to": true, "was": true,
		"will": true, "with": true, "or": true, "but": true, "if": true, "so": true, "do": true,
	}
	return stopWords[word]
}

// buildTwoGramChain creates a 2-gram chain for better sentence flow from existing 1-gram chain
func buildTwoGramChain(chain map[string][]string) map[string]map[string][]string {
	// This creates transitions between word pairs from the 1-gram chain
	twoGramChain := make(map[string]map[string][]string)

	for word1, nextWords := range chain {
		for _, word2 := range nextWords {
			if twoGramChain[word1] == nil {
				twoGramChain[word1] = make(map[string][]string)
			}
			// For each word2 that follows word1, find what follows word2
			if nextNextWords, exists := chain[strings.ToLower(word2)]; exists {
				twoGramChain[word1][strings.ToLower(word2)] = nextNextWords
			}
		}
	}

	return twoGramChain
}

// generateImprovedMessage creates a new message using both 1-gram and 2-gram chains for better coherence
func generateImprovedMessage(chain map[string][]string, twoGramChain map[string]map[string][]string) string {
	if len(chain) == 0 {
		return ""
	}

	// Try multiple generation attempts and pick the best one
	var bestMessage string
	bestScore := 0

	for attempt := 0; attempt < 3; attempt++ {
		words := []string{}
		var currentWord string

		// Start with a random word that has good follow-ups
		attempts := 0
		for word, nextWords := range chain {
			if len(nextWords) >= 2 && len(word) > 2 && !isStopWord(word) { // Prefer words with multiple options
				currentWord = word
				break
			}
			attempts++
			if attempts > 50 { // Fallback to any word
				currentWord = word
				break
			}
		}

		if currentWord == "" {
			continue
		}

		// Generate between 8 and 20 words for better content
		maxWords := 8 + rand.Intn(12)
		lastWord := ""

		for i := 0; i < maxWords; i++ {
			// Add current word (capitalize first word)
			if i == 0 {
				words = append(words, strings.Title(currentWord))
			} else {
				words = append(words, currentWord)
			}

			var nextWord string

			// Try 2-gram chain first for better coherence
			if lastWord != "" && twoGramChain != nil {
				if twoGramOptions, exists := twoGramChain[strings.ToLower(lastWord)][strings.ToLower(currentWord)]; exists && len(twoGramOptions) > 0 {
					nextWord = twoGramOptions[rand.Intn(len(twoGramOptions))]
				}
			}

			// Fallback to regular chain
			if nextWord == "" {
				if nextWords, exists := chain[strings.ToLower(currentWord)]; exists && len(nextWords) > 0 {
					// Prefer longer, more meaningful words
					var goodOptions []string
					for _, word := range nextWords {
						if len(word) > 2 && !isStopWord(strings.ToLower(word)) {
							goodOptions = append(goodOptions, word)
						}
					}

					if len(goodOptions) > 0 {
						nextWord = goodOptions[rand.Intn(len(goodOptions))]
					} else {
						nextWord = nextWords[rand.Intn(len(nextWords))]
					}
				}
			}

			// If we can't find a next word, try to restart
			if nextWord == "" {
				found := false
				for word, nextWords := range chain {
					if len(nextWords) > 0 && len(word) > 2 && !isStopWord(word) {
						nextWord = word
						found = true
						break
					}
				}
				if !found {
					break
				}
			}

			lastWord = currentWord
			currentWord = nextWord
		}

		message := strings.Join(words, " ")

		// Score this message
		score := scoreGeneratedMessage(message)

		if score > bestScore {
			bestScore = score
			bestMessage = message
		}
	}

	// Add punctuation if missing
	if len(bestMessage) > 0 && !strings.ContainsAny(bestMessage[len(bestMessage)-1:], ".!?") {
		// Randomly add punctuation
		punctuation := []string{".", "!", "?", "."}
		bestMessage += punctuation[rand.Intn(len(punctuation))]
	}

	return bestMessage
}

// scoreGeneratedMessage scores a generated message based on quality metrics
func scoreGeneratedMessage(message string) int {
	score := 0
	words := strings.Fields(message)

	// Length score (prefer 8-16 words)
	if len(words) >= 8 && len(words) <= 16 {
		score += 10
	} else if len(words) >= 6 && len(words) <= 20 {
		score += 5
	}

	// Diversity score (prefer responses with varied word lengths)
	totalLength := 0
	for _, word := range words {
		totalLength += len(word)
	}
	if len(words) > 0 {
		avgWordLength := float64(totalLength) / float64(len(words))
		if avgWordLength > 3.0 && avgWordLength < 7.0 {
			score += 5
		}
	}

	// Content word score (prefer messages with meaningful words)
	contentWords := 0
	for _, word := range words {
		if len(word) > 3 && !isStopWord(strings.ToLower(word)) {
			contentWords++
		}
	}
	score += contentWords

	return score
}

// generateAdvancedMessage creates a new message using all n-gram chains for maximum coherence
func generateAdvancedMessage(chain map[string][]string, twoGramChain map[string]map[string][]string, threeGramChain map[string]map[string]map[string][]string, fourGramChain map[string]map[string]map[string]map[string][]string, fiveGramChain map[string]map[string]map[string]map[string]map[string][]string) string {
	if len(chain) == 0 {
		return ""
	}

	// Try multiple generation attempts and pick the best one
	var bestMessage string
	bestScore := 0

	for attempt := 0; attempt < 5; attempt++ {
		words := []string{}
		var currentWord string

		// Start with a random word that has good follow-ups
		attempts := 0
		for word, nextWords := range chain {
			if len(nextWords) >= 2 && len(word) > 2 && !isStopWord(word) {
				currentWord = word
				break
			}
			attempts++
			if attempts > 50 {
				currentWord = word
				break
			}
		}

		if currentWord == "" {
			continue
		}

		// Generate between 10 and 18 words for better content
		maxWords := 10 + rand.Intn(8)
		wordHistory := []string{currentWord}

		for i := 0; i < maxWords; i++ {
			// Add current word (capitalize first word)
			if i == 0 {
				words = append(words, strings.Title(currentWord))
			} else {
				words = append(words, currentWord)
			}

			var nextWord string
			historyLen := len(wordHistory)

			// Try 5-gram chain first (highest coherence)
			if historyLen >= 5 && fiveGramChain != nil {
				w1, w2, w3, w4, w5 := strings.ToLower(wordHistory[historyLen-5]), strings.ToLower(wordHistory[historyLen-4]), strings.ToLower(wordHistory[historyLen-3]), strings.ToLower(wordHistory[historyLen-2]), strings.ToLower(wordHistory[historyLen-1])
				if options, exists := fiveGramChain[w1][w2][w3][w4][w5]; exists && len(options) > 0 {
					nextWord = selectBestNextWord(options, wordHistory)
				}
			}

			// Try 4-gram chain if 5-gram failed
			if nextWord == "" && historyLen >= 4 && fourGramChain != nil {
				w1, w2, w3, w4 := strings.ToLower(wordHistory[historyLen-4]), strings.ToLower(wordHistory[historyLen-3]), strings.ToLower(wordHistory[historyLen-2]), strings.ToLower(wordHistory[historyLen-1])
				if options, exists := fourGramChain[w1][w2][w3][w4]; exists && len(options) > 0 {
					nextWord = selectBestNextWord(options, wordHistory)
				}
			}

			// Try 3-gram chain if 4-gram failed
			if nextWord == "" && historyLen >= 3 && threeGramChain != nil {
				w1, w2, w3 := strings.ToLower(wordHistory[historyLen-3]), strings.ToLower(wordHistory[historyLen-2]), strings.ToLower(wordHistory[historyLen-1])
				if options, exists := threeGramChain[w1][w2][w3]; exists && len(options) > 0 {
					nextWord = selectBestNextWord(options, wordHistory)
				}
			}

			// Try 2-gram chain if 3-gram failed
			if nextWord == "" && historyLen >= 2 && twoGramChain != nil {
				w1, w2 := strings.ToLower(wordHistory[historyLen-2]), strings.ToLower(wordHistory[historyLen-1])
				if options, exists := twoGramChain[w1][w2]; exists && len(options) > 0 {
					nextWord = selectBestNextWord(options, wordHistory)
				}
			}

			// Fallback to 1-gram chain
			if nextWord == "" {
				if nextWords, exists := chain[strings.ToLower(currentWord)]; exists && len(nextWords) > 0 {
					nextWord = selectBestNextWord(nextWords, wordHistory)
				}
			}

			// If we still can't find a next word, try to restart
			if nextWord == "" {
				found := false
				for word, nextWords := range chain {
					if len(nextWords) > 0 && len(word) > 2 && !isStopWord(word) {
						nextWord = word
						found = true
						break
					}
				}
				if !found {
					break
				}
			}

			currentWord = nextWord
			wordHistory = append(wordHistory, currentWord)

			// Keep history manageable
			if len(wordHistory) > 10 {
				wordHistory = wordHistory[1:]
			}
		}

		message := strings.Join(words, " ")

		// Score this message with enhanced scoring
		score := scoreAdvancedMessage(message)

		if score > bestScore {
			bestScore = score
			bestMessage = message
		}
	}

	// Add punctuation if missing
	if len(bestMessage) > 0 && !strings.ContainsAny(bestMessage[len(bestMessage)-1:], ".!?") {
		punctuation := []string{".", "!", "?", "."}
		bestMessage += punctuation[rand.Intn(len(punctuation))]
	}

	return bestMessage
}

// generateAdvancedQuestionAnswer generates a markov chain response using all n-gram levels
func generateAdvancedQuestionAnswer(chain map[string][]string, twoGramChain map[string]map[string][]string, threeGramChain map[string]map[string]map[string][]string, fourGramChain map[string]map[string]map[string]map[string][]string, fiveGramChain map[string]map[string]map[string]map[string]map[string][]string, question string) string {
	if len(chain) == 0 {
		return ""
	}

	// Clean and analyze the question to find relevant starting words
	cleanedQuestion := cleanText(question)
	questionWords := strings.Fields(strings.ToLower(cleanedQuestion))

	// Categorize the question type for better response generation
	questionType := categorizeQuestion(cleanedQuestion)

	// Find potential starting words with weighted scoring
	startingCandidates := findBestStartingWords(chain, questionWords, questionType)

	if len(startingCandidates) == 0 {
		return ""
	}

	// Generate response using the best starting candidate with advanced n-gram chains
	return generateAdvancedCoherentResponse(chain, twoGramChain, threeGramChain, fourGramChain, fiveGramChain, startingCandidates, questionType)
}

// generateAdvancedCoherentResponse creates a more coherent response using all n-gram levels
func generateAdvancedCoherentResponse(chain map[string][]string, twoGramChain map[string]map[string][]string, threeGramChain map[string]map[string]map[string][]string, fourGramChain map[string]map[string]map[string]map[string][]string, fiveGramChain map[string]map[string]map[string]map[string]map[string][]string, candidates []WordCandidate, questionType string) string {
	if len(candidates) == 0 {
		return ""
	}

	// Try multiple generation attempts and pick the best one
	var bestResponse string
	bestScore := 0

	for attempt := 0; attempt < 5; attempt++ {
		// Select starting word (bias towards higher scored candidates)
		candidateIndex := 0
		if len(candidates) > 1 {
			// 70% chance to pick top candidate, 30% for others
			if rand.Float32() > 0.7 && len(candidates) > 1 {
				candidateIndex = rand.Intn(min(3, len(candidates)))
			}
		}

		currentWord := candidates[candidateIndex].Word
		words := []string{}
		wordHistory := []string{currentWord}

		// Generate response with improved coherence using all n-gram levels
		maxWords := 12 + rand.Intn(10) // 12-22 words for substantial answers

		for i := 0; i < maxWords; i++ {
			// Add current word (capitalize first word)
			if i == 0 {
				words = append(words, strings.Title(currentWord))
			} else {
				words = append(words, currentWord)
			}

			var nextWord string
			historyLen := len(wordHistory)

			// Try 5-gram chain first (highest coherence)
			if historyLen >= 5 && fiveGramChain != nil {
				w1, w2, w3, w4, w5 := strings.ToLower(wordHistory[historyLen-5]), strings.ToLower(wordHistory[historyLen-4]), strings.ToLower(wordHistory[historyLen-3]), strings.ToLower(wordHistory[historyLen-2]), strings.ToLower(wordHistory[historyLen-1])
				if options, exists := fiveGramChain[w1][w2][w3][w4][w5]; exists && len(options) > 0 {
					nextWord = selectBestNextWord(options, wordHistory)
				}
			}

			// Try 4-gram chain if 5-gram failed
			if nextWord == "" && historyLen >= 4 && fourGramChain != nil {
				w1, w2, w3, w4 := strings.ToLower(wordHistory[historyLen-4]), strings.ToLower(wordHistory[historyLen-3]), strings.ToLower(wordHistory[historyLen-2]), strings.ToLower(wordHistory[historyLen-1])
				if options, exists := fourGramChain[w1][w2][w3][w4]; exists && len(options) > 0 {
					nextWord = selectBestNextWord(options, wordHistory)
				}
			}

			// Try 3-gram chain if 4-gram failed
			if nextWord == "" && historyLen >= 3 && threeGramChain != nil {
				w1, w2, w3 := strings.ToLower(wordHistory[historyLen-3]), strings.ToLower(wordHistory[historyLen-2]), strings.ToLower(wordHistory[historyLen-1])
				if options, exists := threeGramChain[w1][w2][w3]; exists && len(options) > 0 {
					nextWord = selectBestNextWord(options, wordHistory)
				}
			}

			// Try 2-gram chain if 3-gram failed
			if nextWord == "" && historyLen >= 2 && twoGramChain != nil {
				w1, w2 := strings.ToLower(wordHistory[historyLen-2]), strings.ToLower(wordHistory[historyLen-1])
				if options, exists := twoGramChain[w1][w2]; exists && len(options) > 0 {
					nextWord = selectBestNextWord(options, wordHistory)
				}
			}

			// Fallback to regular chain with preference for meaningful words
			if nextWord == "" {
				if nextWords, exists := chain[strings.ToLower(currentWord)]; exists && len(nextWords) > 0 {
					nextWord = selectBestNextWord(nextWords, wordHistory)
				}
			}

			// If we can't find a next word, try to restart with a good candidate
			if nextWord == "" {
				found := false
				for _, candidate := range candidates {
					if nextWords, exists := chain[candidate.Word]; exists && len(nextWords) > 0 {
						nextWord = candidate.Word
						found = true
						break
					}
				}
				if !found {
					break
				}
			}

			currentWord = nextWord
			wordHistory = append(wordHistory, currentWord)

			// Keep history manageable
			if len(wordHistory) > 10 {
				wordHistory = wordHistory[1:]
			}
		}

		response := strings.Join(words, " ")

		// Score this response with enhanced scoring
		score := scoreAdvancedResponse(response, questionType)

		if score > bestScore {
			bestScore = score
			bestResponse = response
		}
	}

	// Add appropriate punctuation
	if len(bestResponse) > 0 && !strings.ContainsAny(bestResponse[len(bestResponse)-1:], ".!?") {
		punctuation := getPunctuationForQuestionType(questionType)
		bestResponse += punctuation[rand.Intn(len(punctuation))]
	}

	return bestResponse
}

// scoreAdvancedMessage scores a generated message with enhanced metrics
func scoreAdvancedMessage(message string) int {
	score := 0
	words := strings.Fields(message)

	// Length score (prefer 10-16 words)
	if len(words) >= 10 && len(words) <= 16 {
		score += 15
	} else if len(words) >= 8 && len(words) <= 18 {
		score += 10
	} else if len(words) >= 6 && len(words) <= 20 {
		score += 5
	}

	// Diversity score (prefer responses with varied word lengths)
	totalLength := 0
	uniqueWords := make(map[string]bool)
	for _, word := range words {
		totalLength += len(word)
		uniqueWords[strings.ToLower(word)] = true
	}

	if len(words) > 0 {
		avgWordLength := float64(totalLength) / float64(len(words))
		if avgWordLength > 3.5 && avgWordLength < 6.5 {
			score += 8
		}

		// Uniqueness score (penalize repetition)
		uniqueRatio := float64(len(uniqueWords)) / float64(len(words))
		if uniqueRatio > 0.8 {
			score += 10
		} else if uniqueRatio > 0.6 {
			score += 5
		}
	}

	// Content word score (prefer messages with meaningful words)
	contentWords := 0
	for _, word := range words {
		if len(word) > 3 && !isStopWord(strings.ToLower(word)) {
			contentWords++
		}
	}
	score += contentWords * 2

	// Grammar coherence bonus (simple heuristics)
	if !strings.Contains(message, " a a ") && !strings.Contains(message, " the the ") && !strings.Contains(message, " you you ") {
		score += 5
	}

	return score
}

// scoreAdvancedResponse scores a response with enhanced question-specific metrics
func scoreAdvancedResponse(response string, questionType string) int {
	score := scoreAdvancedMessage(response) // Base score

	// Question-specific bonuses
	responseLower := strings.ToLower(response)
	switch questionType {
	case "yesno":
		if strings.Contains(responseLower, "yes") || strings.Contains(responseLower, "no") ||
		   strings.Contains(responseLower, "maybe") || strings.Contains(responseLower, "definitely") {
			score += 8
		}
	case "why":
		if strings.Contains(responseLower, "because") || strings.Contains(responseLower, "reason") ||
		   strings.Contains(responseLower, "since") || strings.Contains(responseLower, "due") {
			score += 8
		}
	case "how":
		if strings.Contains(responseLower, "way") || strings.Contains(responseLower, "method") ||
		   strings.Contains(responseLower, "process") || strings.Contains(responseLower, "steps") {
			score += 8
		}
	case "when":
		if strings.Contains(responseLower, "time") || strings.Contains(responseLower, "day") ||
		   strings.Contains(responseLower, "hour") || strings.Contains(responseLower, "moment") {
			score += 8
		}
	case "where":
		if strings.Contains(responseLower, "place") || strings.Contains(responseLower, "location") ||
		   strings.Contains(responseLower, "here") || strings.Contains(responseLower, "there") {
			score += 8
		}
	}

	return score
}

// isValidNextWord checks if a word would create repetitive or grammatical issues
func isValidNextWord(wordHistory []string, nextWord string) bool {
	if len(wordHistory) == 0 {
		return true
	}

	nextWordLower := strings.ToLower(nextWord)

	// Prevent immediate repetition
	if len(wordHistory) >= 1 && strings.ToLower(wordHistory[len(wordHistory)-1]) == nextWordLower {
		return false
	}

	// Prevent "a a", "the the", "you you" patterns
	if len(wordHistory) >= 1 {
		lastWord := strings.ToLower(wordHistory[len(wordHistory)-1])
		if (lastWord == "a" || lastWord == "the" || lastWord == "you") && lastWord == nextWordLower {
			return false
		}
	}

	// Prevent triple repetition in recent history
	if len(wordHistory) >= 3 {
		count := 0
		for i := len(wordHistory) - 3; i < len(wordHistory); i++ {
			if strings.ToLower(wordHistory[i]) == nextWordLower {
				count++
			}
		}
		if count >= 2 {
			return false
		}
	}

	// Prevent common grammatical errors
	if len(wordHistory) >= 1 {
		lastWord := strings.ToLower(wordHistory[len(wordHistory)-1])

		// Don't put "a" after "you" in most cases
		if lastWord == "you" && nextWordLower == "a" {
			return false
		}

		// Don't put articles after articles
		if (lastWord == "a" || lastWord == "an" || lastWord == "the") &&
		   (nextWordLower == "a" || nextWordLower == "an" || nextWordLower == "the") {
			return false
		}
	}

	return true
}

// selectBestNextWord chooses the best next word from available options
func selectBestNextWord(options []string, wordHistory []string) string {
	if len(options) == 0 {
		return ""
	}

	// Filter out invalid options
	var validOptions []string
	for _, option := range options {
		if isValidNextWord(wordHistory, option) {
			validOptions = append(validOptions, option)
		}
	}

	// If no valid options, fall back to original options but try to avoid the worst ones
	if len(validOptions) == 0 {
		var fallbackOptions []string
		for _, option := range options {
			// At least avoid immediate repetition
			if len(wordHistory) == 0 || strings.ToLower(wordHistory[len(wordHistory)-1]) != strings.ToLower(option) {
				fallbackOptions = append(fallbackOptions, option)
			}
		}
		if len(fallbackOptions) > 0 {
			validOptions = fallbackOptions
		} else {
			validOptions = options
		}
	}

	// Prefer longer, more meaningful words
	var goodOptions []string
	for _, option := range validOptions {
		if len(option) > 2 && !isStopWord(strings.ToLower(option)) {
			goodOptions = append(goodOptions, option)
		}
	}

	if len(goodOptions) > 0 {
		return goodOptions[rand.Intn(len(goodOptions))]
	}

	return validOptions[rand.Intn(len(validOptions))]
}