Trying some weird training nonsense. Maybe this will be fun.
Docker Deploy / build-and-push (push) Successful in 3m23s
Docker Deploy / build-and-push (push) Successful in 3m23s
This commit is contained in:
@@ -28,5 +28,8 @@ COPY --from=build /go/bin/app /app/himbot
|
||||
# Copy migrations directory
|
||||
COPY --from=build /app/migrations /app/migrations
|
||||
|
||||
# Copy datasets directory
|
||||
COPY --from=build /app/datasets /app/datasets
|
||||
|
||||
# Set the entrypoint
|
||||
ENTRYPOINT ["/app/himbot"]
|
||||
|
||||
@@ -0,0 +1,66 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/gob"
|
||||
"flag"
|
||||
"himbot/lib"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func main() {
|
||||
inputDir := flag.String("input", "datasets/bard", "Directory containing text files to train on")
|
||||
outputFile := flag.String("output", "datasets/bard.gob", "Output file path for the pre-trained model")
|
||||
order := flag.Int("order", 3, "Markov chain order (N-gram size)")
|
||||
flag.Parse()
|
||||
|
||||
log.Printf("Scanning directory: %s", *inputDir)
|
||||
|
||||
var allLines []string
|
||||
fileCount := 0
|
||||
|
||||
err := filepath.Walk(*inputDir, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !info.IsDir() && strings.HasSuffix(info.Name(), ".txt") {
|
||||
content, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
log.Printf("Error reading file %s: %v", path, err)
|
||||
return nil // Continue to next file
|
||||
}
|
||||
lines := strings.Split(string(content), "\n")
|
||||
allLines = append(allLines, lines...)
|
||||
fileCount++
|
||||
if fileCount%5 == 0 {
|
||||
log.Printf("Processed %d files...", fileCount)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Error walking directory: %v", err)
|
||||
}
|
||||
|
||||
log.Printf("Found %d files with %d total lines. Building Markov chain...", fileCount, len(allLines))
|
||||
|
||||
chain := lib.BuildMarkovChain(allLines, *order)
|
||||
|
||||
log.Printf("Chain built with %d start keys. Saving to %s...", len(chain.Starts), *outputFile)
|
||||
|
||||
f, err := os.Create(*outputFile)
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to create output file: %v", err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
encoder := gob.NewEncoder(f)
|
||||
if err := encoder.Encode(chain); err != nil {
|
||||
log.Fatalf("Failed to encode chain: %v", err)
|
||||
}
|
||||
|
||||
log.Println("Done!")
|
||||
}
|
||||
+51
-153
@@ -2,73 +2,69 @@ package command
|
||||
|
||||
import (
|
||||
"crypto/md5"
|
||||
"encoding/gob"
|
||||
"fmt"
|
||||
"himbot/lib"
|
||||
"math/rand"
|
||||
"regexp"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/bwmarrin/discordgo"
|
||||
)
|
||||
|
||||
type MarkovData struct {
|
||||
Chain map[string][]string // "word1 word2" -> ["word3", ...]
|
||||
Starts []string
|
||||
}
|
||||
|
||||
type MarkovCache struct {
|
||||
data map[string]*MarkovData
|
||||
data map[string]*lib.MarkovData
|
||||
hashes map[string]string
|
||||
mu sync.RWMutex
|
||||
}
|
||||
|
||||
var (
|
||||
markovCache = &MarkovCache{
|
||||
data: make(map[string]*MarkovData),
|
||||
data: make(map[string]*lib.MarkovData),
|
||||
hashes: make(map[string]string),
|
||||
}
|
||||
urlRegex = regexp.MustCompile(`https?://[^\s]+`)
|
||||
mentionRegex = regexp.MustCompile(`<[@#&!][^>]+>`)
|
||||
bardChain *lib.MarkovData
|
||||
)
|
||||
|
||||
func MarkovCommand(s *discordgo.Session, i *discordgo.InteractionCreate) (string, error) {
|
||||
channelID := i.ChannelID
|
||||
|
||||
numMessages := lib.AppConfig.MarkovDefaultMessages
|
||||
if len(i.ApplicationCommandData().Options) > 0 {
|
||||
if i.ApplicationCommandData().Options[0].Name == "messages" {
|
||||
numMessages = int(i.ApplicationCommandData().Options[0].IntValue())
|
||||
if numMessages <= 0 {
|
||||
numMessages = lib.AppConfig.MarkovDefaultMessages
|
||||
} else if numMessages > lib.AppConfig.MarkovMaxMessages {
|
||||
numMessages = lib.AppConfig.MarkovMaxMessages
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cacheKey := fmt.Sprintf("%s:%d", channelID, numMessages)
|
||||
if data := getCachedChain(cacheKey); data != nil {
|
||||
if msg := generateMessage(data, ""); msg != "" {
|
||||
return msg, nil
|
||||
}
|
||||
}
|
||||
|
||||
allMessages, err := fetchMessages(s, channelID, numMessages)
|
||||
func InitBard(modelPath string) error {
|
||||
f, err := os.Open(modelPath)
|
||||
if err != nil {
|
||||
return "", err
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var data lib.MarkovData
|
||||
decoder := gob.NewDecoder(f)
|
||||
if err := decoder.Decode(&data); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
data := buildMarkovChain(allMessages)
|
||||
setCachedChain(cacheKey, data, allMessages)
|
||||
bardChain = &data
|
||||
return nil
|
||||
}
|
||||
|
||||
newMessage := generateMessage(data, "")
|
||||
if newMessage == "" {
|
||||
newMessage = "Not enough text data to generate a message."
|
||||
func BardCommand(s *discordgo.Session, i *discordgo.InteractionCreate) (string, error) {
|
||||
if bardChain == nil {
|
||||
return "The bard is sleeping (dataset not loaded).", nil
|
||||
}
|
||||
|
||||
return newMessage, nil
|
||||
var question string
|
||||
for _, option := range i.ApplicationCommandData().Options {
|
||||
if option.Name == "question" {
|
||||
question = option.StringValue()
|
||||
}
|
||||
}
|
||||
|
||||
answer := lib.GenerateMessage(bardChain, question)
|
||||
if answer == "" {
|
||||
answer = "Words fail me."
|
||||
}
|
||||
|
||||
if question != "" {
|
||||
return fmt.Sprintf("**Q:** %s\n**A:** %s", question, answer), nil
|
||||
}
|
||||
|
||||
return answer, nil
|
||||
}
|
||||
|
||||
func MarkovQuestionCommand(s *discordgo.Session, i *discordgo.InteractionCreate) (string, error) {
|
||||
@@ -95,7 +91,7 @@ func MarkovQuestionCommand(s *discordgo.Session, i *discordgo.InteractionCreate)
|
||||
}
|
||||
|
||||
cacheKey := fmt.Sprintf("%s:%d", channelID, numMessages)
|
||||
var data *MarkovData
|
||||
var data *lib.MarkovData
|
||||
|
||||
if cachedData := getCachedChain(cacheKey); cachedData != nil {
|
||||
data = cachedData
|
||||
@@ -104,11 +100,18 @@ func MarkovQuestionCommand(s *discordgo.Session, i *discordgo.InteractionCreate)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
data = buildMarkovChain(allMessages)
|
||||
setCachedChain(cacheKey, data, allMessages)
|
||||
|
||||
var texts []string
|
||||
for _, msg := range allMessages {
|
||||
texts = append(texts, msg.Content)
|
||||
}
|
||||
|
||||
// Use order 2 for chat history (sparse data)
|
||||
data = lib.BuildMarkovChain(texts, 2)
|
||||
setCachedChain(cacheKey, data, hashMessages(allMessages))
|
||||
}
|
||||
|
||||
answer := generateMessage(data, question)
|
||||
answer := lib.GenerateMessage(data, question)
|
||||
if answer == "" {
|
||||
answer = "I don't have enough context to answer that."
|
||||
}
|
||||
@@ -116,15 +119,13 @@ func MarkovQuestionCommand(s *discordgo.Session, i *discordgo.InteractionCreate)
|
||||
return fmt.Sprintf("**Q:** %s\n**A:** %s", question, answer), nil
|
||||
}
|
||||
|
||||
func getCachedChain(cacheKey string) *MarkovData {
|
||||
func getCachedChain(cacheKey string) *lib.MarkovData {
|
||||
markovCache.mu.RLock()
|
||||
defer markovCache.mu.RUnlock()
|
||||
return markovCache.data[cacheKey]
|
||||
}
|
||||
|
||||
func setCachedChain(cacheKey string, data *MarkovData, messages []*discordgo.Message) {
|
||||
hash := hashMessages(messages)
|
||||
|
||||
func setCachedChain(cacheKey string, data *lib.MarkovData, hash string) {
|
||||
markovCache.mu.Lock()
|
||||
defer markovCache.mu.Unlock()
|
||||
|
||||
@@ -186,106 +187,3 @@ func fetchMessages(s *discordgo.Session, channelID string, numMessages int) ([]*
|
||||
|
||||
return allMessages, nil
|
||||
}
|
||||
|
||||
func cleanText(text string) string {
|
||||
text = urlRegex.ReplaceAllString(text, "")
|
||||
text = mentionRegex.ReplaceAllString(text, "")
|
||||
return strings.Join(strings.Fields(text), " ")
|
||||
}
|
||||
|
||||
func buildMarkovChain(messages []*discordgo.Message) *MarkovData {
|
||||
data := &MarkovData{
|
||||
Chain: make(map[string][]string),
|
||||
Starts: make([]string, 0),
|
||||
}
|
||||
|
||||
for _, msg := range messages {
|
||||
cleaned := cleanText(msg.Content)
|
||||
if cleaned == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
words := strings.Fields(cleaned)
|
||||
if len(words) < 3 {
|
||||
continue
|
||||
}
|
||||
|
||||
startKey := key(words[0], words[1])
|
||||
data.Starts = append(data.Starts, startKey)
|
||||
|
||||
for i := 0; i < len(words)-2; i++ {
|
||||
k := key(words[i], words[i+1])
|
||||
val := words[i+2]
|
||||
data.Chain[k] = append(data.Chain[k], val)
|
||||
}
|
||||
}
|
||||
|
||||
return data
|
||||
}
|
||||
|
||||
func generateMessage(data *MarkovData, seed string) string {
|
||||
if len(data.Starts) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
var w1, w2 string
|
||||
var currentKey string
|
||||
|
||||
// Try to seed based on input question
|
||||
if seed != "" {
|
||||
seedWords := strings.Fields(cleanText(seed))
|
||||
var candidates []string
|
||||
|
||||
for k := range data.Chain {
|
||||
for _, sw := range seedWords {
|
||||
if len(sw) > 3 && strings.Contains(strings.ToLower(k), strings.ToLower(sw)) {
|
||||
candidates = append(candidates, k)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(candidates) > 0 {
|
||||
currentKey = candidates[rand.Intn(len(candidates))]
|
||||
}
|
||||
}
|
||||
|
||||
if currentKey == "" {
|
||||
currentKey = data.Starts[rand.Intn(len(data.Starts))]
|
||||
}
|
||||
|
||||
parts := strings.Split(currentKey, " ")
|
||||
w1, w2 = parts[0], parts[1]
|
||||
|
||||
output := []string{w1, w2}
|
||||
|
||||
for i := 0; i < 40; i++ {
|
||||
nextOptions, exists := data.Chain[currentKey]
|
||||
if !exists || len(nextOptions) == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
nextWord := nextOptions[rand.Intn(len(nextOptions))]
|
||||
output = append(output, nextWord)
|
||||
|
||||
w1 = w2
|
||||
w2 = nextWord
|
||||
currentKey = key(w1, w2)
|
||||
|
||||
// Soft stop on punctuation
|
||||
if i > 5 && strings.ContainsAny(nextWord, ".!?") {
|
||||
if rand.Float32() > 0.3 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return strings.Join(output, " ")
|
||||
}
|
||||
|
||||
func key(w1, w2 string) string {
|
||||
return w1 + " " + w2
|
||||
}
|
||||
|
||||
func init() {
|
||||
rand.Seed(time.Now().UnixNano())
|
||||
}
|
||||
|
||||
Binary file not shown.
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,102 @@
|
||||
The Phoenix and Turtle
|
||||
by William Shakespeare
|
||||
Edited by Barbara A. Mowat and Paul Werstine
|
||||
with Michael Poston and Rebecca Niles
|
||||
Folger Shakespeare Library
|
||||
https://shakespeare.folger.edu/shakespeares-works/the-phoenix-and-turtle/
|
||||
Created on Jul 31, 2015, from FDT version 0.9.0.1
|
||||
|
||||
|
||||
"The Phoenix and Turtle"
|
||||
|
||||
|
||||
Let the bird of loudest lay
|
||||
On the sole Arabian tree
|
||||
Herald sad and trumpet be,
|
||||
To whose sound chaste wings obey.
|
||||
|
||||
But thou shrieking harbinger,
|
||||
Foul precurrer of the fiend,
|
||||
Augur of the fever's end,
|
||||
To this troop come thou not near.
|
||||
|
||||
From this session interdict
|
||||
Every fowl of tyrant wing,
|
||||
Save the eagle, feathered king;
|
||||
Keep the obsequy so strict.
|
||||
|
||||
Let the priest in surplice white,
|
||||
That defunctive music can,
|
||||
Be the death-divining swan,
|
||||
Lest the requiem lack his right.
|
||||
|
||||
And thou treble-dated crow,
|
||||
That thy sable gender mak'st
|
||||
With the breath thou giv'st and tak'st,
|
||||
'Mongst our mourners shalt thou go.
|
||||
|
||||
Here the anthem doth commence:
|
||||
Love and constancy is dead,
|
||||
Phoenix and the turtle fled
|
||||
In a mutual flame from hence.
|
||||
|
||||
So they loved, as love in twain
|
||||
Had the essence but in one,
|
||||
Two distincts, division none;
|
||||
Number there in love was slain.
|
||||
|
||||
Hearts remote yet not asunder,
|
||||
Distance and no space was seen
|
||||
'Twixt this turtle and his queen;
|
||||
But in them it were a wonder.
|
||||
|
||||
So between them love did shine
|
||||
That the turtle saw his right
|
||||
Flaming in the phoenix' sight;
|
||||
Either was the other's mine.
|
||||
|
||||
Property was thus appalled
|
||||
That the self was not the same;
|
||||
Single nature's double name
|
||||
Neither two nor one was called.
|
||||
|
||||
Reason, in itself confounded,
|
||||
Saw division grow together,
|
||||
To themselves yet either neither,
|
||||
Simple were so well compounded
|
||||
|
||||
That it cried, "How true a twain
|
||||
Seemeth this concordant one!
|
||||
Love hath reason, Reason none,
|
||||
If what parts can so remain,"
|
||||
|
||||
Whereupon it made this threne
|
||||
To the phoenix and the dove,
|
||||
Co-supremes and stars of love,
|
||||
As chorus to their tragic scene.
|
||||
|
||||
|
||||
Threnos
|
||||
|
||||
|
||||
Beauty, truth, and rarity,
|
||||
Grace in all simplicity,
|
||||
Here enclosed, in cinders lie.
|
||||
|
||||
Death is now the phoenix' nest,
|
||||
And the turtle's loyal breast
|
||||
To eternity doth rest,
|
||||
|
||||
Leaving no posterity;
|
||||
'Twas not their infirmity,
|
||||
It was married chastity.
|
||||
|
||||
Truth may seem, but cannot be;
|
||||
Beauty brag, but 'tis not she;
|
||||
Truth and beauty buried be.
|
||||
|
||||
To this urn let those repair
|
||||
That are either true or fair;
|
||||
For these dead birds sigh a prayer.
|
||||
|
||||
William Shakespeare
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
+262
@@ -0,0 +1,262 @@
|
||||
package lib
|
||||
|
||||
import (
|
||||
"math/rand"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
type MarkovData struct {
|
||||
Order int
|
||||
Chain map[string][]string // "word1 ... wordN" -> ["word3", ...]
|
||||
Starts []string
|
||||
}
|
||||
|
||||
var (
|
||||
urlRegex = regexp.MustCompile(`https?://[^\s]+`)
|
||||
mentionRegex = regexp.MustCompile(`<[@#&!][^>]+>`)
|
||||
bracketRegex = regexp.MustCompile(`\[.*?\]`)
|
||||
speakerRegex = regexp.MustCompile(`^(?:[A-Z]{2,}\s+)+`)
|
||||
stopWords = map[string]bool{
|
||||
"the": true, "and": true, "a": true, "to": true, "of": true,
|
||||
"in": true, "is": true, "that": true, "it": true, "for": true,
|
||||
"as": true, "with": true, "on": true, "at": true, "by": true,
|
||||
"this": true, "from": true, "but": true, "or": true, "an": true,
|
||||
"be": true, "are": true, "was": true, "were": true, "so": true,
|
||||
"if": true, "out": true, "up": true, "about": true, "into": true,
|
||||
"over": true, "after": true, "beneath": true, "under": true,
|
||||
"above": true, "me": true, "my": true, "mine": true, "you": true,
|
||||
"your": true, "yours": true, "he": true, "him": true, "his": true,
|
||||
"she": true, "her": true, "hers": true, "they": true, "them": true,
|
||||
"their": true, "theirs": true, "we": true, "us": true, "our": true,
|
||||
"ours": true, "who": true, "whom": true, "whose": true, "what": true,
|
||||
"which": true, "when": true, "where": true, "why": true, "how": true,
|
||||
"give": true, "write": true, "tell": true, "say": true, "speak": true,
|
||||
"make": true, "do": true, "does": true, "did": true, "done": true,
|
||||
}
|
||||
)
|
||||
|
||||
func init() {
|
||||
rand.Seed(time.Now().UnixNano())
|
||||
}
|
||||
|
||||
func CleanText(text string) string {
|
||||
text = urlRegex.ReplaceAllString(text, "")
|
||||
text = mentionRegex.ReplaceAllString(text, "")
|
||||
text = bracketRegex.ReplaceAllString(text, "")
|
||||
text = strings.TrimSpace(text)
|
||||
text = speakerRegex.ReplaceAllString(text, "")
|
||||
return strings.Join(strings.Fields(text), " ")
|
||||
}
|
||||
|
||||
func BuildMarkovChain(lines []string, order int) *MarkovData {
|
||||
data := &MarkovData{
|
||||
Order: order,
|
||||
Chain: make(map[string][]string),
|
||||
Starts: make([]string, 0),
|
||||
}
|
||||
|
||||
var allWords []string
|
||||
|
||||
for _, line := range lines {
|
||||
// Skip likely headers/metadata (all caps lines)
|
||||
trimmed := strings.TrimSpace(line)
|
||||
if trimmed != "" && strings.ToUpper(trimmed) == trimmed && strings.ToLower(trimmed) != trimmed {
|
||||
continue
|
||||
}
|
||||
|
||||
cleaned := CleanText(line)
|
||||
if cleaned == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
allWords = append(allWords, strings.Fields(cleaned)...)
|
||||
}
|
||||
|
||||
if len(allWords) < order+1 {
|
||||
return data
|
||||
}
|
||||
|
||||
// First key is always a start
|
||||
data.Starts = append(data.Starts, Key(allWords[:order]...))
|
||||
|
||||
for i := 0; i < len(allWords)-order; i++ {
|
||||
keyWords := allWords[i : i+order]
|
||||
nextWord := allWords[i+order]
|
||||
|
||||
k := Key(keyWords...)
|
||||
data.Chain[k] = append(data.Chain[k], nextWord)
|
||||
|
||||
// If the word shifting out ends a sentence, the next sequence is a start
|
||||
if strings.ContainsAny(allWords[i], ".!?") {
|
||||
if i+1+order <= len(allWords) {
|
||||
data.Starts = append(data.Starts, Key(allWords[i+1:i+1+order]...))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return data
|
||||
}
|
||||
|
||||
func GenerateMessage(data *MarkovData, seed string) string {
|
||||
if len(data.Starts) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
var currentKey string
|
||||
|
||||
// Try to seed based on input question
|
||||
if seed != "" {
|
||||
seedWords := strings.Fields(CleanText(seed))
|
||||
|
||||
// Sort seed words: significant words first, then by length
|
||||
for i := 0; i < len(seedWords); i++ {
|
||||
for j := i + 1; j < len(seedWords); j++ {
|
||||
sw1 := strings.ToLower(seedWords[i])
|
||||
sw2 := strings.ToLower(seedWords[j])
|
||||
isStop1 := stopWords[sw1]
|
||||
isStop2 := stopWords[sw2]
|
||||
|
||||
// If one is a stop word and the other isn't, prioritize the non-stop word
|
||||
if isStop1 && !isStop2 {
|
||||
seedWords[i], seedWords[j] = seedWords[j], seedWords[i]
|
||||
} else if !isStop1 && isStop2 {
|
||||
continue
|
||||
} else {
|
||||
// Otherwise sort by length
|
||||
if len(seedWords[i]) < len(seedWords[j]) {
|
||||
seedWords[i], seedWords[j] = seedWords[j], seedWords[i]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var candidates []string
|
||||
|
||||
// 1. Try to find a sentence starter
|
||||
// We iterate seed words first to prioritize matches for longer words
|
||||
for _, sw := range seedWords {
|
||||
if len(sw) <= 2 {
|
||||
continue
|
||||
}
|
||||
swLower := strings.ToLower(sw)
|
||||
var primaryMatches []string // starts with word
|
||||
|
||||
for _, startKey := range data.Starts {
|
||||
parts := strings.Fields(strings.ToLower(startKey))
|
||||
if len(parts) < data.Order {
|
||||
continue
|
||||
}
|
||||
if parts[0] == swLower {
|
||||
primaryMatches = append(primaryMatches, startKey)
|
||||
}
|
||||
}
|
||||
|
||||
// If we found sentence starters beginning with this word, use them exclusively
|
||||
if len(primaryMatches) > 0 {
|
||||
candidates = primaryMatches
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// 2. If no perfect starts, try any start containing the word
|
||||
if len(candidates) == 0 {
|
||||
for _, sw := range seedWords {
|
||||
if len(sw) <= 2 {
|
||||
continue
|
||||
}
|
||||
swLower := strings.ToLower(sw)
|
||||
|
||||
for _, startKey := range data.Starts {
|
||||
parts := strings.Fields(strings.ToLower(startKey))
|
||||
if len(parts) < data.Order {
|
||||
continue
|
||||
}
|
||||
// Check remaining words in key
|
||||
found := false
|
||||
for i := 1; i < len(parts); i++ {
|
||||
if parts[i] == swLower {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if found {
|
||||
candidates = append(candidates, startKey)
|
||||
}
|
||||
}
|
||||
if len(candidates) > 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 3. If no starts, try to find any connection in the chain
|
||||
if len(candidates) == 0 {
|
||||
for _, sw := range seedWords {
|
||||
if len(sw) <= 2 {
|
||||
continue
|
||||
}
|
||||
swLower := strings.ToLower(sw)
|
||||
var matches []string
|
||||
|
||||
for k := range data.Chain {
|
||||
parts := strings.Fields(strings.ToLower(k))
|
||||
if len(parts) < data.Order {
|
||||
continue
|
||||
}
|
||||
if parts[0] == swLower {
|
||||
matches = append(matches, k)
|
||||
}
|
||||
}
|
||||
|
||||
if len(matches) > 0 {
|
||||
candidates = matches
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(candidates) > 0 {
|
||||
currentKey = candidates[rand.Intn(len(candidates))]
|
||||
}
|
||||
}
|
||||
|
||||
if currentKey == "" {
|
||||
currentKey = data.Starts[rand.Intn(len(data.Starts))]
|
||||
}
|
||||
|
||||
output := strings.Fields(currentKey)
|
||||
|
||||
for i := 0; i < 40; i++ {
|
||||
nextOptions, exists := data.Chain[currentKey]
|
||||
if !exists || len(nextOptions) == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
nextWord := nextOptions[rand.Intn(len(nextOptions))]
|
||||
output = append(output, nextWord)
|
||||
|
||||
// Shift the key window
|
||||
currentWords := strings.Fields(currentKey)
|
||||
if len(currentWords) >= 1 {
|
||||
newKeyWords := append(currentWords[1:], nextWord)
|
||||
currentKey = Key(newKeyWords...)
|
||||
} else {
|
||||
break
|
||||
}
|
||||
|
||||
// Soft stop on punctuation
|
||||
if i > 5 && strings.ContainsAny(nextWord, ".!?") {
|
||||
if rand.Float32() > 0.3 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return strings.Join(output, " ")
|
||||
}
|
||||
|
||||
func Key(words ...string) string {
|
||||
return strings.Join(words, " ")
|
||||
}
|
||||
@@ -29,6 +29,10 @@ func main() {
|
||||
initCommands(config)
|
||||
initCommandHandlers(config)
|
||||
|
||||
if err := command.InitBard("datasets/bard.gob"); err != nil {
|
||||
log.Printf("Failed to load Bard dataset: %v", err)
|
||||
}
|
||||
|
||||
err := lib.InitDB()
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to initialize database: %v", err)
|
||||
@@ -191,13 +195,13 @@ func initCommands(config *lib.Config) {
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "gen",
|
||||
Description: "Generate a random message using markov chains based on channel history",
|
||||
Name: "bard",
|
||||
Description: "Ask the bard a question",
|
||||
Options: []*discordgo.ApplicationCommandOption{
|
||||
{
|
||||
Type: discordgo.ApplicationCommandOptionInteger,
|
||||
Name: "messages",
|
||||
Description: fmt.Sprintf("Number of messages to use (default: %d, max: %d)", config.MarkovDefaultMessages, config.MarkovMaxMessages),
|
||||
Type: discordgo.ApplicationCommandOptionString,
|
||||
Name: "question",
|
||||
Description: "The question you want to ask",
|
||||
Required: false,
|
||||
},
|
||||
},
|
||||
@@ -255,7 +259,7 @@ func initCommandHandlers(config *lib.Config) {
|
||||
commandHandlers = map[string]func(s *discordgo.Session, i *discordgo.InteractionCreate){
|
||||
"ping": lib.HandleCommand("ping", time.Duration(config.PingCooldown)*time.Second, command.PingCommand),
|
||||
"hs": lib.HandleCommand("hs", time.Duration(config.HsCooldown)*time.Second, command.HsCommand),
|
||||
"gen": lib.HandleCommand("gen", time.Duration(config.MarkovCooldown)*time.Second, command.MarkovCommand),
|
||||
"bard": lib.HandleCommand("bard", time.Duration(config.MarkovCooldown)*time.Second, command.BardCommand),
|
||||
"ask": lib.HandleCommand("ask", time.Duration(config.MarkovAskCooldown)*time.Second, command.MarkovQuestionCommand),
|
||||
"himbucks": lib.HandleCommand("himbucks", time.Duration(config.HimbucksCooldown)*time.Second, command.BalanceGetCommand),
|
||||
"himboard": lib.HandleCommand("himboard", time.Duration(config.HimboardCooldown)*time.Second, command.LeaderboardCommand),
|
||||
|
||||
Reference in New Issue
Block a user