package lib

import (
	"math/rand"
	"regexp"
	"strings"
	"time"
)

type MarkovData struct {
	Order  int
	Chain  map[string][]string // "word1 ... wordN" -> ["word3", ...]
	Starts []string
}

var (
	urlRegex     = regexp.MustCompile(`https?://[^\s]+`)
	mentionRegex = regexp.MustCompile(`<[@#&!][^>]+>`)
	bracketRegex = regexp.MustCompile(`\[.*?\]`)
	speakerRegex = regexp.MustCompile(`^(?:[A-Z]{2,}\s+)+`)
	stopWords    = map[string]bool{
		"the": true, "and": true, "a": true, "to": true, "of": true,
		"in": true, "is": true, "that": true, "it": true, "for": true,
		"as": true, "with": true, "on": true, "at": true, "by": true,
		"this": true, "from": true, "but": true, "or": true, "an": true,
		"be": true, "are": true, "was": true, "were": true, "so": true,
		"if": true, "out": true, "up": true, "about": true, "into": true,
		"over": true, "after": true, "beneath": true, "under": true,
		"above": true, "me": true, "my": true, "mine": true, "you": true,
		"your": true, "yours": true, "he": true, "him": true, "his": true,
		"she": true, "her": true, "hers": true, "they": true, "them": true,
		"their": true, "theirs": true, "we": true, "us": true, "our": true,
		"ours": true, "who": true, "whom": true, "whose": true, "what": true,
		"which": true, "when": true, "where": true, "why": true, "how": true,
		"give": true, "write": true, "tell": true, "say": true, "speak": true,
		"make": true, "do": true, "does": true, "did": true, "done": true,
	}
)

func init() {
	rand.Seed(time.Now().UnixNano())
}

func CleanText(text string) string {
	text = urlRegex.ReplaceAllString(text, "")
	text = mentionRegex.ReplaceAllString(text, "")
	text = bracketRegex.ReplaceAllString(text, "")
	text = strings.TrimSpace(text)
	text = speakerRegex.ReplaceAllString(text, "")
	return strings.Join(strings.Fields(text), " ")
}

func BuildMarkovChain(lines []string, order int) *MarkovData {
	data := &MarkovData{
		Order:  order,
		Chain:  make(map[string][]string),
		Starts: make([]string, 0),
	}

	var allWords []string

	for _, line := range lines {
		// Skip likely headers/metadata (all caps lines)
		trimmed := strings.TrimSpace(line)
		if trimmed != "" && strings.ToUpper(trimmed) == trimmed && strings.ToLower(trimmed) != trimmed {
			continue
		}

		cleaned := CleanText(line)
		if cleaned == "" {
			continue
		}

		allWords = append(allWords, strings.Fields(cleaned)...)
	}

	if len(allWords) < order+1 {
		return data
	}

	// First key is always a start
	data.Starts = append(data.Starts, Key(allWords[:order]...))

	for i := 0; i < len(allWords)-order; i++ {
		keyWords := allWords[i : i+order]
		nextWord := allWords[i+order]

		k := Key(keyWords...)
		data.Chain[k] = append(data.Chain[k], nextWord)

		// If the word shifting out ends a sentence, the next sequence is a start
		if strings.ContainsAny(allWords[i], ".!?") {
			if i+1+order <= len(allWords) {
				data.Starts = append(data.Starts, Key(allWords[i+1:i+1+order]...))
			}
		}
	}

	return data
}

func GenerateMessage(data *MarkovData, seed string) string {
	if len(data.Starts) == 0 {
		return ""
	}

	var currentKey string

	// Try to seed based on input question
	if seed != "" {
		seedWords := strings.Fields(CleanText(seed))

		// Sort seed words: significant words first, then by length
		for i := 0; i < len(seedWords); i++ {
			for j := i + 1; j < len(seedWords); j++ {
				sw1 := strings.ToLower(seedWords[i])
				sw2 := strings.ToLower(seedWords[j])
				isStop1 := stopWords[sw1]
				isStop2 := stopWords[sw2]

				// If one is a stop word and the other isn't, prioritize the non-stop word
				if isStop1 && !isStop2 {
					seedWords[i], seedWords[j] = seedWords[j], seedWords[i]
				} else if !isStop1 && isStop2 {
					continue
				} else {
					// Otherwise sort by length
					if len(seedWords[i]) < len(seedWords[j]) {
						seedWords[i], seedWords[j] = seedWords[j], seedWords[i]
					}
				}
			}
		}

		var candidates []string

		// 1. Try to find a sentence starter
		// We iterate seed words first to prioritize matches for longer words
		for _, sw := range seedWords {
			if len(sw) <= 2 {
				continue
			}
			swLower := strings.ToLower(sw)
			var primaryMatches []string // starts with word

			for _, startKey := range data.Starts {
				parts := strings.Fields(strings.ToLower(startKey))
				if len(parts) < data.Order {
					continue
				}
				if parts[0] == swLower {
					primaryMatches = append(primaryMatches, startKey)
				}
			}

			// If we found sentence starters beginning with this word, use them exclusively
			if len(primaryMatches) > 0 {
				candidates = primaryMatches
				break
			}
		}

		// 2. If no perfect starts, try any start containing the word
		if len(candidates) == 0 {
			for _, sw := range seedWords {
				if len(sw) <= 2 {
					continue
				}
				swLower := strings.ToLower(sw)

				for _, startKey := range data.Starts {
					parts := strings.Fields(strings.ToLower(startKey))
					if len(parts) < data.Order {
						continue
					}
					// Check remaining words in key
					found := false
					for i := 1; i < len(parts); i++ {
						if parts[i] == swLower {
							found = true
							break
						}
					}
					if found {
						candidates = append(candidates, startKey)
					}
				}
				if len(candidates) > 0 {
					break
				}
			}
		}

		// 3. If no starts, try to find any connection in the chain
		if len(candidates) == 0 {
			for _, sw := range seedWords {
				if len(sw) <= 2 {
					continue
				}
				swLower := strings.ToLower(sw)
				var matches []string

				for k := range data.Chain {
					parts := strings.Fields(strings.ToLower(k))
					if len(parts) < data.Order {
						continue
					}
					if parts[0] == swLower {
						matches = append(matches, k)
					}
				}

				if len(matches) > 0 {
					candidates = matches
					break
				}
			}
		}

		if len(candidates) > 0 {
			currentKey = candidates[rand.Intn(len(candidates))]
		}
	}

	if currentKey == "" {
		currentKey = data.Starts[rand.Intn(len(data.Starts))]
	}

	output := strings.Fields(currentKey)

	for i := 0; i < 40; i++ {
		nextOptions, exists := data.Chain[currentKey]
		if !exists || len(nextOptions) == 0 {
			break
		}

		nextWord := nextOptions[rand.Intn(len(nextOptions))]
		output = append(output, nextWord)

		// Shift the key window
		currentWords := strings.Fields(currentKey)
		if len(currentWords) >= 1 {
			newKeyWords := append(currentWords[1:], nextWord)
			currentKey = Key(newKeyWords...)
		} else {
			break
		}

		// Soft stop on punctuation
		if i > 5 && strings.ContainsAny(nextWord, ".!?") {
			if rand.Float32() > 0.3 {
				break
			}
		}
	}

	return strings.Join(output, " ")
}

func Key(words ...string) string {
	return strings.Join(words, " ")
}