Trying some weird training nonsense. Maybe this will be fun.
All checks were successful
Docker Deploy / build-and-push (push) Successful in 3m23s
All checks were successful
Docker Deploy / build-and-push (push) Successful in 3m23s
This commit is contained in:
@@ -28,5 +28,8 @@ COPY --from=build /go/bin/app /app/himbot
|
||||
# Copy migrations directory
|
||||
COPY --from=build /app/migrations /app/migrations
|
||||
|
||||
# Copy datasets directory
|
||||
COPY --from=build /app/datasets /app/datasets
|
||||
|
||||
# Set the entrypoint
|
||||
ENTRYPOINT ["/app/himbot"]
|
||||
|
||||
66
cmd/train/main.go
Normal file
66
cmd/train/main.go
Normal file
@@ -0,0 +1,66 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/gob"
|
||||
"flag"
|
||||
"himbot/lib"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func main() {
|
||||
inputDir := flag.String("input", "datasets/bard", "Directory containing text files to train on")
|
||||
outputFile := flag.String("output", "datasets/bard.gob", "Output file path for the pre-trained model")
|
||||
order := flag.Int("order", 3, "Markov chain order (N-gram size)")
|
||||
flag.Parse()
|
||||
|
||||
log.Printf("Scanning directory: %s", *inputDir)
|
||||
|
||||
var allLines []string
|
||||
fileCount := 0
|
||||
|
||||
err := filepath.Walk(*inputDir, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !info.IsDir() && strings.HasSuffix(info.Name(), ".txt") {
|
||||
content, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
log.Printf("Error reading file %s: %v", path, err)
|
||||
return nil // Continue to next file
|
||||
}
|
||||
lines := strings.Split(string(content), "\n")
|
||||
allLines = append(allLines, lines...)
|
||||
fileCount++
|
||||
if fileCount%5 == 0 {
|
||||
log.Printf("Processed %d files...", fileCount)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Error walking directory: %v", err)
|
||||
}
|
||||
|
||||
log.Printf("Found %d files with %d total lines. Building Markov chain...", fileCount, len(allLines))
|
||||
|
||||
chain := lib.BuildMarkovChain(allLines, *order)
|
||||
|
||||
log.Printf("Chain built with %d start keys. Saving to %s...", len(chain.Starts), *outputFile)
|
||||
|
||||
f, err := os.Create(*outputFile)
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to create output file: %v", err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
encoder := gob.NewEncoder(f)
|
||||
if err := encoder.Encode(chain); err != nil {
|
||||
log.Fatalf("Failed to encode chain: %v", err)
|
||||
}
|
||||
|
||||
log.Println("Done!")
|
||||
}
|
||||
@@ -2,73 +2,69 @@ package command
|
||||
|
||||
import (
|
||||
"crypto/md5"
|
||||
"encoding/gob"
|
||||
"fmt"
|
||||
"himbot/lib"
|
||||
"math/rand"
|
||||
"regexp"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/bwmarrin/discordgo"
|
||||
)
|
||||
|
||||
type MarkovData struct {
|
||||
Chain map[string][]string // "word1 word2" -> ["word3", ...]
|
||||
Starts []string
|
||||
}
|
||||
|
||||
type MarkovCache struct {
|
||||
data map[string]*MarkovData
|
||||
data map[string]*lib.MarkovData
|
||||
hashes map[string]string
|
||||
mu sync.RWMutex
|
||||
}
|
||||
|
||||
var (
|
||||
markovCache = &MarkovCache{
|
||||
data: make(map[string]*MarkovData),
|
||||
data: make(map[string]*lib.MarkovData),
|
||||
hashes: make(map[string]string),
|
||||
}
|
||||
urlRegex = regexp.MustCompile(`https?://[^\s]+`)
|
||||
mentionRegex = regexp.MustCompile(`<[@#&!][^>]+>`)
|
||||
bardChain *lib.MarkovData
|
||||
)
|
||||
|
||||
func MarkovCommand(s *discordgo.Session, i *discordgo.InteractionCreate) (string, error) {
|
||||
channelID := i.ChannelID
|
||||
|
||||
numMessages := lib.AppConfig.MarkovDefaultMessages
|
||||
if len(i.ApplicationCommandData().Options) > 0 {
|
||||
if i.ApplicationCommandData().Options[0].Name == "messages" {
|
||||
numMessages = int(i.ApplicationCommandData().Options[0].IntValue())
|
||||
if numMessages <= 0 {
|
||||
numMessages = lib.AppConfig.MarkovDefaultMessages
|
||||
} else if numMessages > lib.AppConfig.MarkovMaxMessages {
|
||||
numMessages = lib.AppConfig.MarkovMaxMessages
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cacheKey := fmt.Sprintf("%s:%d", channelID, numMessages)
|
||||
if data := getCachedChain(cacheKey); data != nil {
|
||||
if msg := generateMessage(data, ""); msg != "" {
|
||||
return msg, nil
|
||||
}
|
||||
}
|
||||
|
||||
allMessages, err := fetchMessages(s, channelID, numMessages)
|
||||
func InitBard(modelPath string) error {
|
||||
f, err := os.Open(modelPath)
|
||||
if err != nil {
|
||||
return "", err
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var data lib.MarkovData
|
||||
decoder := gob.NewDecoder(f)
|
||||
if err := decoder.Decode(&data); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
data := buildMarkovChain(allMessages)
|
||||
setCachedChain(cacheKey, data, allMessages)
|
||||
|
||||
newMessage := generateMessage(data, "")
|
||||
if newMessage == "" {
|
||||
newMessage = "Not enough text data to generate a message."
|
||||
bardChain = &data
|
||||
return nil
|
||||
}
|
||||
|
||||
return newMessage, nil
|
||||
func BardCommand(s *discordgo.Session, i *discordgo.InteractionCreate) (string, error) {
|
||||
if bardChain == nil {
|
||||
return "The bard is sleeping (dataset not loaded).", nil
|
||||
}
|
||||
|
||||
var question string
|
||||
for _, option := range i.ApplicationCommandData().Options {
|
||||
if option.Name == "question" {
|
||||
question = option.StringValue()
|
||||
}
|
||||
}
|
||||
|
||||
answer := lib.GenerateMessage(bardChain, question)
|
||||
if answer == "" {
|
||||
answer = "Words fail me."
|
||||
}
|
||||
|
||||
if question != "" {
|
||||
return fmt.Sprintf("**Q:** %s\n**A:** %s", question, answer), nil
|
||||
}
|
||||
|
||||
return answer, nil
|
||||
}
|
||||
|
||||
func MarkovQuestionCommand(s *discordgo.Session, i *discordgo.InteractionCreate) (string, error) {
|
||||
@@ -95,7 +91,7 @@ func MarkovQuestionCommand(s *discordgo.Session, i *discordgo.InteractionCreate)
|
||||
}
|
||||
|
||||
cacheKey := fmt.Sprintf("%s:%d", channelID, numMessages)
|
||||
var data *MarkovData
|
||||
var data *lib.MarkovData
|
||||
|
||||
if cachedData := getCachedChain(cacheKey); cachedData != nil {
|
||||
data = cachedData
|
||||
@@ -104,11 +100,18 @@ func MarkovQuestionCommand(s *discordgo.Session, i *discordgo.InteractionCreate)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
data = buildMarkovChain(allMessages)
|
||||
setCachedChain(cacheKey, data, allMessages)
|
||||
|
||||
var texts []string
|
||||
for _, msg := range allMessages {
|
||||
texts = append(texts, msg.Content)
|
||||
}
|
||||
|
||||
answer := generateMessage(data, question)
|
||||
// Use order 2 for chat history (sparse data)
|
||||
data = lib.BuildMarkovChain(texts, 2)
|
||||
setCachedChain(cacheKey, data, hashMessages(allMessages))
|
||||
}
|
||||
|
||||
answer := lib.GenerateMessage(data, question)
|
||||
if answer == "" {
|
||||
answer = "I don't have enough context to answer that."
|
||||
}
|
||||
@@ -116,15 +119,13 @@ func MarkovQuestionCommand(s *discordgo.Session, i *discordgo.InteractionCreate)
|
||||
return fmt.Sprintf("**Q:** %s\n**A:** %s", question, answer), nil
|
||||
}
|
||||
|
||||
func getCachedChain(cacheKey string) *MarkovData {
|
||||
func getCachedChain(cacheKey string) *lib.MarkovData {
|
||||
markovCache.mu.RLock()
|
||||
defer markovCache.mu.RUnlock()
|
||||
return markovCache.data[cacheKey]
|
||||
}
|
||||
|
||||
func setCachedChain(cacheKey string, data *MarkovData, messages []*discordgo.Message) {
|
||||
hash := hashMessages(messages)
|
||||
|
||||
func setCachedChain(cacheKey string, data *lib.MarkovData, hash string) {
|
||||
markovCache.mu.Lock()
|
||||
defer markovCache.mu.Unlock()
|
||||
|
||||
@@ -186,106 +187,3 @@ func fetchMessages(s *discordgo.Session, channelID string, numMessages int) ([]*
|
||||
|
||||
return allMessages, nil
|
||||
}
|
||||
|
||||
func cleanText(text string) string {
|
||||
text = urlRegex.ReplaceAllString(text, "")
|
||||
text = mentionRegex.ReplaceAllString(text, "")
|
||||
return strings.Join(strings.Fields(text), " ")
|
||||
}
|
||||
|
||||
func buildMarkovChain(messages []*discordgo.Message) *MarkovData {
|
||||
data := &MarkovData{
|
||||
Chain: make(map[string][]string),
|
||||
Starts: make([]string, 0),
|
||||
}
|
||||
|
||||
for _, msg := range messages {
|
||||
cleaned := cleanText(msg.Content)
|
||||
if cleaned == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
words := strings.Fields(cleaned)
|
||||
if len(words) < 3 {
|
||||
continue
|
||||
}
|
||||
|
||||
startKey := key(words[0], words[1])
|
||||
data.Starts = append(data.Starts, startKey)
|
||||
|
||||
for i := 0; i < len(words)-2; i++ {
|
||||
k := key(words[i], words[i+1])
|
||||
val := words[i+2]
|
||||
data.Chain[k] = append(data.Chain[k], val)
|
||||
}
|
||||
}
|
||||
|
||||
return data
|
||||
}
|
||||
|
||||
func generateMessage(data *MarkovData, seed string) string {
|
||||
if len(data.Starts) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
var w1, w2 string
|
||||
var currentKey string
|
||||
|
||||
// Try to seed based on input question
|
||||
if seed != "" {
|
||||
seedWords := strings.Fields(cleanText(seed))
|
||||
var candidates []string
|
||||
|
||||
for k := range data.Chain {
|
||||
for _, sw := range seedWords {
|
||||
if len(sw) > 3 && strings.Contains(strings.ToLower(k), strings.ToLower(sw)) {
|
||||
candidates = append(candidates, k)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(candidates) > 0 {
|
||||
currentKey = candidates[rand.Intn(len(candidates))]
|
||||
}
|
||||
}
|
||||
|
||||
if currentKey == "" {
|
||||
currentKey = data.Starts[rand.Intn(len(data.Starts))]
|
||||
}
|
||||
|
||||
parts := strings.Split(currentKey, " ")
|
||||
w1, w2 = parts[0], parts[1]
|
||||
|
||||
output := []string{w1, w2}
|
||||
|
||||
for i := 0; i < 40; i++ {
|
||||
nextOptions, exists := data.Chain[currentKey]
|
||||
if !exists || len(nextOptions) == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
nextWord := nextOptions[rand.Intn(len(nextOptions))]
|
||||
output = append(output, nextWord)
|
||||
|
||||
w1 = w2
|
||||
w2 = nextWord
|
||||
currentKey = key(w1, w2)
|
||||
|
||||
// Soft stop on punctuation
|
||||
if i > 5 && strings.ContainsAny(nextWord, ".!?") {
|
||||
if rand.Float32() > 0.3 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return strings.Join(output, " ")
|
||||
}
|
||||
|
||||
func key(w1, w2 string) string {
|
||||
return w1 + " " + w2
|
||||
}
|
||||
|
||||
func init() {
|
||||
rand.Seed(time.Now().UnixNano())
|
||||
}
|
||||
|
||||
BIN
datasets/bard.gob
Normal file
BIN
datasets/bard.gob
Normal file
Binary file not shown.
3341
datasets/bard/a-midsummer-nights-dream_TXT_FolgerShakespeare.txt
Normal file
3341
datasets/bard/a-midsummer-nights-dream_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
4570
datasets/bard/alls-well-that-ends-well_TXT_FolgerShakespeare.txt
Normal file
4570
datasets/bard/alls-well-that-ends-well_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
5870
datasets/bard/antony-and-cleopatra_TXT_FolgerShakespeare.txt
Normal file
5870
datasets/bard/antony-and-cleopatra_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
4224
datasets/bard/as-you-like-it_TXT_FolgerShakespeare.txt
Normal file
4224
datasets/bard/as-you-like-it_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
5844
datasets/bard/coriolanus_TXT_FolgerShakespeare.txt
Normal file
5844
datasets/bard/coriolanus_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
5380
datasets/bard/cymbeline_TXT_FolgerShakespeare.txt
Normal file
5380
datasets/bard/cymbeline_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
6080
datasets/bard/hamlet_TXT_FolgerShakespeare.txt
Normal file
6080
datasets/bard/hamlet_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
4548
datasets/bard/henry-iv-part-1_TXT_FolgerShakespeare.txt
Normal file
4548
datasets/bard/henry-iv-part-1_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
4975
datasets/bard/henry-iv-part-2_TXT_FolgerShakespeare.txt
Normal file
4975
datasets/bard/henry-iv-part-2_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
4731
datasets/bard/henry-v_TXT_FolgerShakespeare.txt
Normal file
4731
datasets/bard/henry-v_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
4562
datasets/bard/henry-vi-part-1_TXT_FolgerShakespeare.txt
Normal file
4562
datasets/bard/henry-vi-part-1_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
5113
datasets/bard/henry-vi-part-2_TXT_FolgerShakespeare.txt
Normal file
5113
datasets/bard/henry-vi-part-2_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
5100
datasets/bard/henry-vi-part-3_TXT_FolgerShakespeare.txt
Normal file
5100
datasets/bard/henry-vi-part-3_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
4730
datasets/bard/henry-viii_TXT_FolgerShakespeare.txt
Normal file
4730
datasets/bard/henry-viii_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
4325
datasets/bard/julius-caesar_TXT_FolgerShakespeare.txt
Normal file
4325
datasets/bard/julius-caesar_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
4020
datasets/bard/king-john_TXT_FolgerShakespeare.txt
Normal file
4020
datasets/bard/king-john_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
5563
datasets/bard/king-lear_TXT_FolgerShakespeare.txt
Normal file
5563
datasets/bard/king-lear_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
4713
datasets/bard/loves-labors-lost_TXT_FolgerShakespeare.txt
Normal file
4713
datasets/bard/loves-labors-lost_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
2191
datasets/bard/lucrece_TXT_FolgerShakespeare.txt
Normal file
2191
datasets/bard/lucrece_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
3821
datasets/bard/macbeth_TXT_FolgerShakespeare.txt
Normal file
3821
datasets/bard/macbeth_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
4477
datasets/bard/measure-for-measure_TXT_FolgerShakespeare.txt
Normal file
4477
datasets/bard/measure-for-measure_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
4223
datasets/bard/much-ado-about-nothing_TXT_FolgerShakespeare.txt
Normal file
4223
datasets/bard/much-ado-about-nothing_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
5730
datasets/bard/othello_TXT_FolgerShakespeare.txt
Normal file
5730
datasets/bard/othello_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
3837
datasets/bard/pericles_TXT_FolgerShakespeare.txt
Normal file
3837
datasets/bard/pericles_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
7063
datasets/bard/poems.txt
Normal file
7063
datasets/bard/poems.txt
Normal file
File diff suppressed because it is too large
Load Diff
4257
datasets/bard/richard-ii_TXT_FolgerShakespeare.txt
Normal file
4257
datasets/bard/richard-ii_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
6286
datasets/bard/richard-iii_TXT_FolgerShakespeare.txt
Normal file
6286
datasets/bard/richard-iii_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
5011
datasets/bard/romeo-and-juliet_TXT_FolgerShakespeare.txt
Normal file
5011
datasets/bard/romeo-and-juliet_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
2675
datasets/bard/shakespeares-sonnets_TXT_FolgerShakespeare.txt
Normal file
2675
datasets/bard/shakespeares-sonnets_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
3201
datasets/bard/the-comedy-of-errors_TXT_FolgerShakespeare.txt
Normal file
3201
datasets/bard/the-comedy-of-errors_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
4055
datasets/bard/the-merchant-of-venice_TXT_FolgerShakespeare.txt
Normal file
4055
datasets/bard/the-merchant-of-venice_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
4446
datasets/bard/the-merry-wives-of-windsor_TXT_FolgerShakespeare.txt
Normal file
4446
datasets/bard/the-merry-wives-of-windsor_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
102
datasets/bard/the-phoenix-and-turtle_TXT_FolgerShakespeare.txt
Normal file
102
datasets/bard/the-phoenix-and-turtle_TXT_FolgerShakespeare.txt
Normal file
@@ -0,0 +1,102 @@
|
||||
The Phoenix and Turtle
|
||||
by William Shakespeare
|
||||
Edited by Barbara A. Mowat and Paul Werstine
|
||||
with Michael Poston and Rebecca Niles
|
||||
Folger Shakespeare Library
|
||||
https://shakespeare.folger.edu/shakespeares-works/the-phoenix-and-turtle/
|
||||
Created on Jul 31, 2015, from FDT version 0.9.0.1
|
||||
|
||||
|
||||
"The Phoenix and Turtle"
|
||||
|
||||
|
||||
Let the bird of loudest lay
|
||||
On the sole Arabian tree
|
||||
Herald sad and trumpet be,
|
||||
To whose sound chaste wings obey.
|
||||
|
||||
But thou shrieking harbinger,
|
||||
Foul precurrer of the fiend,
|
||||
Augur of the fever's end,
|
||||
To this troop come thou not near.
|
||||
|
||||
From this session interdict
|
||||
Every fowl of tyrant wing,
|
||||
Save the eagle, feathered king;
|
||||
Keep the obsequy so strict.
|
||||
|
||||
Let the priest in surplice white,
|
||||
That defunctive music can,
|
||||
Be the death-divining swan,
|
||||
Lest the requiem lack his right.
|
||||
|
||||
And thou treble-dated crow,
|
||||
That thy sable gender mak'st
|
||||
With the breath thou giv'st and tak'st,
|
||||
'Mongst our mourners shalt thou go.
|
||||
|
||||
Here the anthem doth commence:
|
||||
Love and constancy is dead,
|
||||
Phoenix and the turtle fled
|
||||
In a mutual flame from hence.
|
||||
|
||||
So they loved, as love in twain
|
||||
Had the essence but in one,
|
||||
Two distincts, division none;
|
||||
Number there in love was slain.
|
||||
|
||||
Hearts remote yet not asunder,
|
||||
Distance and no space was seen
|
||||
'Twixt this turtle and his queen;
|
||||
But in them it were a wonder.
|
||||
|
||||
So between them love did shine
|
||||
That the turtle saw his right
|
||||
Flaming in the phoenix' sight;
|
||||
Either was the other's mine.
|
||||
|
||||
Property was thus appalled
|
||||
That the self was not the same;
|
||||
Single nature's double name
|
||||
Neither two nor one was called.
|
||||
|
||||
Reason, in itself confounded,
|
||||
Saw division grow together,
|
||||
To themselves yet either neither,
|
||||
Simple were so well compounded
|
||||
|
||||
That it cried, "How true a twain
|
||||
Seemeth this concordant one!
|
||||
Love hath reason, Reason none,
|
||||
If what parts can so remain,"
|
||||
|
||||
Whereupon it made this threne
|
||||
To the phoenix and the dove,
|
||||
Co-supremes and stars of love,
|
||||
As chorus to their tragic scene.
|
||||
|
||||
|
||||
Threnos
|
||||
|
||||
|
||||
Beauty, truth, and rarity,
|
||||
Grace in all simplicity,
|
||||
Here enclosed, in cinders lie.
|
||||
|
||||
Death is now the phoenix' nest,
|
||||
And the turtle's loyal breast
|
||||
To eternity doth rest,
|
||||
|
||||
Leaving no posterity;
|
||||
'Twas not their infirmity,
|
||||
It was married chastity.
|
||||
|
||||
Truth may seem, but cannot be;
|
||||
Beauty brag, but 'tis not she;
|
||||
Truth and beauty buried be.
|
||||
|
||||
To this urn let those repair
|
||||
That are either true or fair;
|
||||
For these dead birds sigh a prayer.
|
||||
|
||||
William Shakespeare
|
||||
4579
datasets/bard/the-taming-of-the-shrew_TXT_FolgerShakespeare.txt
Normal file
4579
datasets/bard/the-taming-of-the-shrew_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
3466
datasets/bard/the-tempest_TXT_FolgerShakespeare.txt
Normal file
3466
datasets/bard/the-tempest_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
3872
datasets/bard/the-two-gentlemen-of-verona_TXT_FolgerShakespeare.txt
Normal file
3872
datasets/bard/the-two-gentlemen-of-verona_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
4938
datasets/bard/the-two-noble-kinsmen_TXT_FolgerShakespeare.txt
Normal file
4938
datasets/bard/the-two-noble-kinsmen_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
4671
datasets/bard/the-winters-tale_TXT_FolgerShakespeare.txt
Normal file
4671
datasets/bard/the-winters-tale_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
3998
datasets/bard/timon-of-athens_TXT_FolgerShakespeare.txt
Normal file
3998
datasets/bard/timon-of-athens_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
4033
datasets/bard/titus-andronicus_TXT_FolgerShakespeare.txt
Normal file
4033
datasets/bard/titus-andronicus_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
5668
datasets/bard/troilus-and-cressida_TXT_FolgerShakespeare.txt
Normal file
5668
datasets/bard/troilus-and-cressida_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
4071
datasets/bard/twelfth-night_TXT_FolgerShakespeare.txt
Normal file
4071
datasets/bard/twelfth-night_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
1434
datasets/bard/venus-and-adonis_TXT_FolgerShakespeare.txt
Normal file
1434
datasets/bard/venus-and-adonis_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
262
lib/markov.go
Normal file
262
lib/markov.go
Normal file
@@ -0,0 +1,262 @@
|
||||
package lib
|
||||
|
||||
import (
|
||||
"math/rand"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
type MarkovData struct {
|
||||
Order int
|
||||
Chain map[string][]string // "word1 ... wordN" -> ["word3", ...]
|
||||
Starts []string
|
||||
}
|
||||
|
||||
var (
|
||||
urlRegex = regexp.MustCompile(`https?://[^\s]+`)
|
||||
mentionRegex = regexp.MustCompile(`<[@#&!][^>]+>`)
|
||||
bracketRegex = regexp.MustCompile(`\[.*?\]`)
|
||||
speakerRegex = regexp.MustCompile(`^(?:[A-Z]{2,}\s+)+`)
|
||||
stopWords = map[string]bool{
|
||||
"the": true, "and": true, "a": true, "to": true, "of": true,
|
||||
"in": true, "is": true, "that": true, "it": true, "for": true,
|
||||
"as": true, "with": true, "on": true, "at": true, "by": true,
|
||||
"this": true, "from": true, "but": true, "or": true, "an": true,
|
||||
"be": true, "are": true, "was": true, "were": true, "so": true,
|
||||
"if": true, "out": true, "up": true, "about": true, "into": true,
|
||||
"over": true, "after": true, "beneath": true, "under": true,
|
||||
"above": true, "me": true, "my": true, "mine": true, "you": true,
|
||||
"your": true, "yours": true, "he": true, "him": true, "his": true,
|
||||
"she": true, "her": true, "hers": true, "they": true, "them": true,
|
||||
"their": true, "theirs": true, "we": true, "us": true, "our": true,
|
||||
"ours": true, "who": true, "whom": true, "whose": true, "what": true,
|
||||
"which": true, "when": true, "where": true, "why": true, "how": true,
|
||||
"give": true, "write": true, "tell": true, "say": true, "speak": true,
|
||||
"make": true, "do": true, "does": true, "did": true, "done": true,
|
||||
}
|
||||
)
|
||||
|
||||
func init() {
|
||||
rand.Seed(time.Now().UnixNano())
|
||||
}
|
||||
|
||||
func CleanText(text string) string {
|
||||
text = urlRegex.ReplaceAllString(text, "")
|
||||
text = mentionRegex.ReplaceAllString(text, "")
|
||||
text = bracketRegex.ReplaceAllString(text, "")
|
||||
text = strings.TrimSpace(text)
|
||||
text = speakerRegex.ReplaceAllString(text, "")
|
||||
return strings.Join(strings.Fields(text), " ")
|
||||
}
|
||||
|
||||
func BuildMarkovChain(lines []string, order int) *MarkovData {
|
||||
data := &MarkovData{
|
||||
Order: order,
|
||||
Chain: make(map[string][]string),
|
||||
Starts: make([]string, 0),
|
||||
}
|
||||
|
||||
var allWords []string
|
||||
|
||||
for _, line := range lines {
|
||||
// Skip likely headers/metadata (all caps lines)
|
||||
trimmed := strings.TrimSpace(line)
|
||||
if trimmed != "" && strings.ToUpper(trimmed) == trimmed && strings.ToLower(trimmed) != trimmed {
|
||||
continue
|
||||
}
|
||||
|
||||
cleaned := CleanText(line)
|
||||
if cleaned == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
allWords = append(allWords, strings.Fields(cleaned)...)
|
||||
}
|
||||
|
||||
if len(allWords) < order+1 {
|
||||
return data
|
||||
}
|
||||
|
||||
// First key is always a start
|
||||
data.Starts = append(data.Starts, Key(allWords[:order]...))
|
||||
|
||||
for i := 0; i < len(allWords)-order; i++ {
|
||||
keyWords := allWords[i : i+order]
|
||||
nextWord := allWords[i+order]
|
||||
|
||||
k := Key(keyWords...)
|
||||
data.Chain[k] = append(data.Chain[k], nextWord)
|
||||
|
||||
// If the word shifting out ends a sentence, the next sequence is a start
|
||||
if strings.ContainsAny(allWords[i], ".!?") {
|
||||
if i+1+order <= len(allWords) {
|
||||
data.Starts = append(data.Starts, Key(allWords[i+1:i+1+order]...))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return data
|
||||
}
|
||||
|
||||
func GenerateMessage(data *MarkovData, seed string) string {
|
||||
if len(data.Starts) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
var currentKey string
|
||||
|
||||
// Try to seed based on input question
|
||||
if seed != "" {
|
||||
seedWords := strings.Fields(CleanText(seed))
|
||||
|
||||
// Sort seed words: significant words first, then by length
|
||||
for i := 0; i < len(seedWords); i++ {
|
||||
for j := i + 1; j < len(seedWords); j++ {
|
||||
sw1 := strings.ToLower(seedWords[i])
|
||||
sw2 := strings.ToLower(seedWords[j])
|
||||
isStop1 := stopWords[sw1]
|
||||
isStop2 := stopWords[sw2]
|
||||
|
||||
// If one is a stop word and the other isn't, prioritize the non-stop word
|
||||
if isStop1 && !isStop2 {
|
||||
seedWords[i], seedWords[j] = seedWords[j], seedWords[i]
|
||||
} else if !isStop1 && isStop2 {
|
||||
continue
|
||||
} else {
|
||||
// Otherwise sort by length
|
||||
if len(seedWords[i]) < len(seedWords[j]) {
|
||||
seedWords[i], seedWords[j] = seedWords[j], seedWords[i]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var candidates []string
|
||||
|
||||
// 1. Try to find a sentence starter
|
||||
// We iterate seed words first to prioritize matches for longer words
|
||||
for _, sw := range seedWords {
|
||||
if len(sw) <= 2 {
|
||||
continue
|
||||
}
|
||||
swLower := strings.ToLower(sw)
|
||||
var primaryMatches []string // starts with word
|
||||
|
||||
for _, startKey := range data.Starts {
|
||||
parts := strings.Fields(strings.ToLower(startKey))
|
||||
if len(parts) < data.Order {
|
||||
continue
|
||||
}
|
||||
if parts[0] == swLower {
|
||||
primaryMatches = append(primaryMatches, startKey)
|
||||
}
|
||||
}
|
||||
|
||||
// If we found sentence starters beginning with this word, use them exclusively
|
||||
if len(primaryMatches) > 0 {
|
||||
candidates = primaryMatches
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// 2. If no perfect starts, try any start containing the word
|
||||
if len(candidates) == 0 {
|
||||
for _, sw := range seedWords {
|
||||
if len(sw) <= 2 {
|
||||
continue
|
||||
}
|
||||
swLower := strings.ToLower(sw)
|
||||
|
||||
for _, startKey := range data.Starts {
|
||||
parts := strings.Fields(strings.ToLower(startKey))
|
||||
if len(parts) < data.Order {
|
||||
continue
|
||||
}
|
||||
// Check remaining words in key
|
||||
found := false
|
||||
for i := 1; i < len(parts); i++ {
|
||||
if parts[i] == swLower {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if found {
|
||||
candidates = append(candidates, startKey)
|
||||
}
|
||||
}
|
||||
if len(candidates) > 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 3. If no starts, try to find any connection in the chain
|
||||
if len(candidates) == 0 {
|
||||
for _, sw := range seedWords {
|
||||
if len(sw) <= 2 {
|
||||
continue
|
||||
}
|
||||
swLower := strings.ToLower(sw)
|
||||
var matches []string
|
||||
|
||||
for k := range data.Chain {
|
||||
parts := strings.Fields(strings.ToLower(k))
|
||||
if len(parts) < data.Order {
|
||||
continue
|
||||
}
|
||||
if parts[0] == swLower {
|
||||
matches = append(matches, k)
|
||||
}
|
||||
}
|
||||
|
||||
if len(matches) > 0 {
|
||||
candidates = matches
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(candidates) > 0 {
|
||||
currentKey = candidates[rand.Intn(len(candidates))]
|
||||
}
|
||||
}
|
||||
|
||||
if currentKey == "" {
|
||||
currentKey = data.Starts[rand.Intn(len(data.Starts))]
|
||||
}
|
||||
|
||||
output := strings.Fields(currentKey)
|
||||
|
||||
for i := 0; i < 40; i++ {
|
||||
nextOptions, exists := data.Chain[currentKey]
|
||||
if !exists || len(nextOptions) == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
nextWord := nextOptions[rand.Intn(len(nextOptions))]
|
||||
output = append(output, nextWord)
|
||||
|
||||
// Shift the key window
|
||||
currentWords := strings.Fields(currentKey)
|
||||
if len(currentWords) >= 1 {
|
||||
newKeyWords := append(currentWords[1:], nextWord)
|
||||
currentKey = Key(newKeyWords...)
|
||||
} else {
|
||||
break
|
||||
}
|
||||
|
||||
// Soft stop on punctuation
|
||||
if i > 5 && strings.ContainsAny(nextWord, ".!?") {
|
||||
if rand.Float32() > 0.3 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return strings.Join(output, " ")
|
||||
}
|
||||
|
||||
func Key(words ...string) string {
|
||||
return strings.Join(words, " ")
|
||||
}
|
||||
16
main.go
16
main.go
@@ -29,6 +29,10 @@ func main() {
|
||||
initCommands(config)
|
||||
initCommandHandlers(config)
|
||||
|
||||
if err := command.InitBard("datasets/bard.gob"); err != nil {
|
||||
log.Printf("Failed to load Bard dataset: %v", err)
|
||||
}
|
||||
|
||||
err := lib.InitDB()
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to initialize database: %v", err)
|
||||
@@ -191,13 +195,13 @@ func initCommands(config *lib.Config) {
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "gen",
|
||||
Description: "Generate a random message using markov chains based on channel history",
|
||||
Name: "bard",
|
||||
Description: "Ask the bard a question",
|
||||
Options: []*discordgo.ApplicationCommandOption{
|
||||
{
|
||||
Type: discordgo.ApplicationCommandOptionInteger,
|
||||
Name: "messages",
|
||||
Description: fmt.Sprintf("Number of messages to use (default: %d, max: %d)", config.MarkovDefaultMessages, config.MarkovMaxMessages),
|
||||
Type: discordgo.ApplicationCommandOptionString,
|
||||
Name: "question",
|
||||
Description: "The question you want to ask",
|
||||
Required: false,
|
||||
},
|
||||
},
|
||||
@@ -255,7 +259,7 @@ func initCommandHandlers(config *lib.Config) {
|
||||
commandHandlers = map[string]func(s *discordgo.Session, i *discordgo.InteractionCreate){
|
||||
"ping": lib.HandleCommand("ping", time.Duration(config.PingCooldown)*time.Second, command.PingCommand),
|
||||
"hs": lib.HandleCommand("hs", time.Duration(config.HsCooldown)*time.Second, command.HsCommand),
|
||||
"gen": lib.HandleCommand("gen", time.Duration(config.MarkovCooldown)*time.Second, command.MarkovCommand),
|
||||
"bard": lib.HandleCommand("bard", time.Duration(config.MarkovCooldown)*time.Second, command.BardCommand),
|
||||
"ask": lib.HandleCommand("ask", time.Duration(config.MarkovAskCooldown)*time.Second, command.MarkovQuestionCommand),
|
||||
"himbucks": lib.HandleCommand("himbucks", time.Duration(config.HimbucksCooldown)*time.Second, command.BalanceGetCommand),
|
||||
"himboard": lib.HandleCommand("himboard", time.Duration(config.HimboardCooldown)*time.Second, command.LeaderboardCommand),
|
||||
|
||||
Reference in New Issue
Block a user