Trying some weird training nonsense. Maybe this will be fun.
All checks were successful
Docker Deploy / build-and-push (push) Successful in 3m23s
All checks were successful
Docker Deploy / build-and-push (push) Successful in 3m23s
This commit is contained in:
@@ -28,5 +28,8 @@ COPY --from=build /go/bin/app /app/himbot
|
|||||||
# Copy migrations directory
|
# Copy migrations directory
|
||||||
COPY --from=build /app/migrations /app/migrations
|
COPY --from=build /app/migrations /app/migrations
|
||||||
|
|
||||||
|
# Copy datasets directory
|
||||||
|
COPY --from=build /app/datasets /app/datasets
|
||||||
|
|
||||||
# Set the entrypoint
|
# Set the entrypoint
|
||||||
ENTRYPOINT ["/app/himbot"]
|
ENTRYPOINT ["/app/himbot"]
|
||||||
|
|||||||
66
cmd/train/main.go
Normal file
66
cmd/train/main.go
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/gob"
|
||||||
|
"flag"
|
||||||
|
"himbot/lib"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
inputDir := flag.String("input", "datasets/bard", "Directory containing text files to train on")
|
||||||
|
outputFile := flag.String("output", "datasets/bard.gob", "Output file path for the pre-trained model")
|
||||||
|
order := flag.Int("order", 3, "Markov chain order (N-gram size)")
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
log.Printf("Scanning directory: %s", *inputDir)
|
||||||
|
|
||||||
|
var allLines []string
|
||||||
|
fileCount := 0
|
||||||
|
|
||||||
|
err := filepath.Walk(*inputDir, func(path string, info os.FileInfo, err error) error {
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if !info.IsDir() && strings.HasSuffix(info.Name(), ".txt") {
|
||||||
|
content, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("Error reading file %s: %v", path, err)
|
||||||
|
return nil // Continue to next file
|
||||||
|
}
|
||||||
|
lines := strings.Split(string(content), "\n")
|
||||||
|
allLines = append(allLines, lines...)
|
||||||
|
fileCount++
|
||||||
|
if fileCount%5 == 0 {
|
||||||
|
log.Printf("Processed %d files...", fileCount)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Error walking directory: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Printf("Found %d files with %d total lines. Building Markov chain...", fileCount, len(allLines))
|
||||||
|
|
||||||
|
chain := lib.BuildMarkovChain(allLines, *order)
|
||||||
|
|
||||||
|
log.Printf("Chain built with %d start keys. Saving to %s...", len(chain.Starts), *outputFile)
|
||||||
|
|
||||||
|
f, err := os.Create(*outputFile)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Failed to create output file: %v", err)
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
encoder := gob.NewEncoder(f)
|
||||||
|
if err := encoder.Encode(chain); err != nil {
|
||||||
|
log.Fatalf("Failed to encode chain: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Println("Done!")
|
||||||
|
}
|
||||||
@@ -2,73 +2,69 @@ package command
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"crypto/md5"
|
"crypto/md5"
|
||||||
|
"encoding/gob"
|
||||||
"fmt"
|
"fmt"
|
||||||
"himbot/lib"
|
"himbot/lib"
|
||||||
"math/rand"
|
"os"
|
||||||
"regexp"
|
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/bwmarrin/discordgo"
|
"github.com/bwmarrin/discordgo"
|
||||||
)
|
)
|
||||||
|
|
||||||
type MarkovData struct {
|
|
||||||
Chain map[string][]string // "word1 word2" -> ["word3", ...]
|
|
||||||
Starts []string
|
|
||||||
}
|
|
||||||
|
|
||||||
type MarkovCache struct {
|
type MarkovCache struct {
|
||||||
data map[string]*MarkovData
|
data map[string]*lib.MarkovData
|
||||||
hashes map[string]string
|
hashes map[string]string
|
||||||
mu sync.RWMutex
|
mu sync.RWMutex
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
markovCache = &MarkovCache{
|
markovCache = &MarkovCache{
|
||||||
data: make(map[string]*MarkovData),
|
data: make(map[string]*lib.MarkovData),
|
||||||
hashes: make(map[string]string),
|
hashes: make(map[string]string),
|
||||||
}
|
}
|
||||||
urlRegex = regexp.MustCompile(`https?://[^\s]+`)
|
bardChain *lib.MarkovData
|
||||||
mentionRegex = regexp.MustCompile(`<[@#&!][^>]+>`)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func MarkovCommand(s *discordgo.Session, i *discordgo.InteractionCreate) (string, error) {
|
func InitBard(modelPath string) error {
|
||||||
channelID := i.ChannelID
|
f, err := os.Open(modelPath)
|
||||||
|
|
||||||
numMessages := lib.AppConfig.MarkovDefaultMessages
|
|
||||||
if len(i.ApplicationCommandData().Options) > 0 {
|
|
||||||
if i.ApplicationCommandData().Options[0].Name == "messages" {
|
|
||||||
numMessages = int(i.ApplicationCommandData().Options[0].IntValue())
|
|
||||||
if numMessages <= 0 {
|
|
||||||
numMessages = lib.AppConfig.MarkovDefaultMessages
|
|
||||||
} else if numMessages > lib.AppConfig.MarkovMaxMessages {
|
|
||||||
numMessages = lib.AppConfig.MarkovMaxMessages
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
cacheKey := fmt.Sprintf("%s:%d", channelID, numMessages)
|
|
||||||
if data := getCachedChain(cacheKey); data != nil {
|
|
||||||
if msg := generateMessage(data, ""); msg != "" {
|
|
||||||
return msg, nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
allMessages, err := fetchMessages(s, channelID, numMessages)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
var data lib.MarkovData
|
||||||
|
decoder := gob.NewDecoder(f)
|
||||||
|
if err := decoder.Decode(&data); err != nil {
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
data := buildMarkovChain(allMessages)
|
bardChain = &data
|
||||||
setCachedChain(cacheKey, data, allMessages)
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
newMessage := generateMessage(data, "")
|
func BardCommand(s *discordgo.Session, i *discordgo.InteractionCreate) (string, error) {
|
||||||
if newMessage == "" {
|
if bardChain == nil {
|
||||||
newMessage = "Not enough text data to generate a message."
|
return "The bard is sleeping (dataset not loaded).", nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return newMessage, nil
|
var question string
|
||||||
|
for _, option := range i.ApplicationCommandData().Options {
|
||||||
|
if option.Name == "question" {
|
||||||
|
question = option.StringValue()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
answer := lib.GenerateMessage(bardChain, question)
|
||||||
|
if answer == "" {
|
||||||
|
answer = "Words fail me."
|
||||||
|
}
|
||||||
|
|
||||||
|
if question != "" {
|
||||||
|
return fmt.Sprintf("**Q:** %s\n**A:** %s", question, answer), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return answer, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func MarkovQuestionCommand(s *discordgo.Session, i *discordgo.InteractionCreate) (string, error) {
|
func MarkovQuestionCommand(s *discordgo.Session, i *discordgo.InteractionCreate) (string, error) {
|
||||||
@@ -95,7 +91,7 @@ func MarkovQuestionCommand(s *discordgo.Session, i *discordgo.InteractionCreate)
|
|||||||
}
|
}
|
||||||
|
|
||||||
cacheKey := fmt.Sprintf("%s:%d", channelID, numMessages)
|
cacheKey := fmt.Sprintf("%s:%d", channelID, numMessages)
|
||||||
var data *MarkovData
|
var data *lib.MarkovData
|
||||||
|
|
||||||
if cachedData := getCachedChain(cacheKey); cachedData != nil {
|
if cachedData := getCachedChain(cacheKey); cachedData != nil {
|
||||||
data = cachedData
|
data = cachedData
|
||||||
@@ -104,11 +100,18 @@ func MarkovQuestionCommand(s *discordgo.Session, i *discordgo.InteractionCreate)
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
data = buildMarkovChain(allMessages)
|
|
||||||
setCachedChain(cacheKey, data, allMessages)
|
var texts []string
|
||||||
|
for _, msg := range allMessages {
|
||||||
|
texts = append(texts, msg.Content)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use order 2 for chat history (sparse data)
|
||||||
|
data = lib.BuildMarkovChain(texts, 2)
|
||||||
|
setCachedChain(cacheKey, data, hashMessages(allMessages))
|
||||||
}
|
}
|
||||||
|
|
||||||
answer := generateMessage(data, question)
|
answer := lib.GenerateMessage(data, question)
|
||||||
if answer == "" {
|
if answer == "" {
|
||||||
answer = "I don't have enough context to answer that."
|
answer = "I don't have enough context to answer that."
|
||||||
}
|
}
|
||||||
@@ -116,15 +119,13 @@ func MarkovQuestionCommand(s *discordgo.Session, i *discordgo.InteractionCreate)
|
|||||||
return fmt.Sprintf("**Q:** %s\n**A:** %s", question, answer), nil
|
return fmt.Sprintf("**Q:** %s\n**A:** %s", question, answer), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getCachedChain(cacheKey string) *MarkovData {
|
func getCachedChain(cacheKey string) *lib.MarkovData {
|
||||||
markovCache.mu.RLock()
|
markovCache.mu.RLock()
|
||||||
defer markovCache.mu.RUnlock()
|
defer markovCache.mu.RUnlock()
|
||||||
return markovCache.data[cacheKey]
|
return markovCache.data[cacheKey]
|
||||||
}
|
}
|
||||||
|
|
||||||
func setCachedChain(cacheKey string, data *MarkovData, messages []*discordgo.Message) {
|
func setCachedChain(cacheKey string, data *lib.MarkovData, hash string) {
|
||||||
hash := hashMessages(messages)
|
|
||||||
|
|
||||||
markovCache.mu.Lock()
|
markovCache.mu.Lock()
|
||||||
defer markovCache.mu.Unlock()
|
defer markovCache.mu.Unlock()
|
||||||
|
|
||||||
@@ -186,106 +187,3 @@ func fetchMessages(s *discordgo.Session, channelID string, numMessages int) ([]*
|
|||||||
|
|
||||||
return allMessages, nil
|
return allMessages, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func cleanText(text string) string {
|
|
||||||
text = urlRegex.ReplaceAllString(text, "")
|
|
||||||
text = mentionRegex.ReplaceAllString(text, "")
|
|
||||||
return strings.Join(strings.Fields(text), " ")
|
|
||||||
}
|
|
||||||
|
|
||||||
func buildMarkovChain(messages []*discordgo.Message) *MarkovData {
|
|
||||||
data := &MarkovData{
|
|
||||||
Chain: make(map[string][]string),
|
|
||||||
Starts: make([]string, 0),
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, msg := range messages {
|
|
||||||
cleaned := cleanText(msg.Content)
|
|
||||||
if cleaned == "" {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
words := strings.Fields(cleaned)
|
|
||||||
if len(words) < 3 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
startKey := key(words[0], words[1])
|
|
||||||
data.Starts = append(data.Starts, startKey)
|
|
||||||
|
|
||||||
for i := 0; i < len(words)-2; i++ {
|
|
||||||
k := key(words[i], words[i+1])
|
|
||||||
val := words[i+2]
|
|
||||||
data.Chain[k] = append(data.Chain[k], val)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return data
|
|
||||||
}
|
|
||||||
|
|
||||||
func generateMessage(data *MarkovData, seed string) string {
|
|
||||||
if len(data.Starts) == 0 {
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
|
|
||||||
var w1, w2 string
|
|
||||||
var currentKey string
|
|
||||||
|
|
||||||
// Try to seed based on input question
|
|
||||||
if seed != "" {
|
|
||||||
seedWords := strings.Fields(cleanText(seed))
|
|
||||||
var candidates []string
|
|
||||||
|
|
||||||
for k := range data.Chain {
|
|
||||||
for _, sw := range seedWords {
|
|
||||||
if len(sw) > 3 && strings.Contains(strings.ToLower(k), strings.ToLower(sw)) {
|
|
||||||
candidates = append(candidates, k)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(candidates) > 0 {
|
|
||||||
currentKey = candidates[rand.Intn(len(candidates))]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if currentKey == "" {
|
|
||||||
currentKey = data.Starts[rand.Intn(len(data.Starts))]
|
|
||||||
}
|
|
||||||
|
|
||||||
parts := strings.Split(currentKey, " ")
|
|
||||||
w1, w2 = parts[0], parts[1]
|
|
||||||
|
|
||||||
output := []string{w1, w2}
|
|
||||||
|
|
||||||
for i := 0; i < 40; i++ {
|
|
||||||
nextOptions, exists := data.Chain[currentKey]
|
|
||||||
if !exists || len(nextOptions) == 0 {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
nextWord := nextOptions[rand.Intn(len(nextOptions))]
|
|
||||||
output = append(output, nextWord)
|
|
||||||
|
|
||||||
w1 = w2
|
|
||||||
w2 = nextWord
|
|
||||||
currentKey = key(w1, w2)
|
|
||||||
|
|
||||||
// Soft stop on punctuation
|
|
||||||
if i > 5 && strings.ContainsAny(nextWord, ".!?") {
|
|
||||||
if rand.Float32() > 0.3 {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return strings.Join(output, " ")
|
|
||||||
}
|
|
||||||
|
|
||||||
func key(w1, w2 string) string {
|
|
||||||
return w1 + " " + w2
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
rand.Seed(time.Now().UnixNano())
|
|
||||||
}
|
|
||||||
|
|||||||
BIN
datasets/bard.gob
Normal file
BIN
datasets/bard.gob
Normal file
Binary file not shown.
3341
datasets/bard/a-midsummer-nights-dream_TXT_FolgerShakespeare.txt
Normal file
3341
datasets/bard/a-midsummer-nights-dream_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
4570
datasets/bard/alls-well-that-ends-well_TXT_FolgerShakespeare.txt
Normal file
4570
datasets/bard/alls-well-that-ends-well_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
5870
datasets/bard/antony-and-cleopatra_TXT_FolgerShakespeare.txt
Normal file
5870
datasets/bard/antony-and-cleopatra_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
4224
datasets/bard/as-you-like-it_TXT_FolgerShakespeare.txt
Normal file
4224
datasets/bard/as-you-like-it_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
5844
datasets/bard/coriolanus_TXT_FolgerShakespeare.txt
Normal file
5844
datasets/bard/coriolanus_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
5380
datasets/bard/cymbeline_TXT_FolgerShakespeare.txt
Normal file
5380
datasets/bard/cymbeline_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
6080
datasets/bard/hamlet_TXT_FolgerShakespeare.txt
Normal file
6080
datasets/bard/hamlet_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
4548
datasets/bard/henry-iv-part-1_TXT_FolgerShakespeare.txt
Normal file
4548
datasets/bard/henry-iv-part-1_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
4975
datasets/bard/henry-iv-part-2_TXT_FolgerShakespeare.txt
Normal file
4975
datasets/bard/henry-iv-part-2_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
4731
datasets/bard/henry-v_TXT_FolgerShakespeare.txt
Normal file
4731
datasets/bard/henry-v_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
4562
datasets/bard/henry-vi-part-1_TXT_FolgerShakespeare.txt
Normal file
4562
datasets/bard/henry-vi-part-1_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
5113
datasets/bard/henry-vi-part-2_TXT_FolgerShakespeare.txt
Normal file
5113
datasets/bard/henry-vi-part-2_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
5100
datasets/bard/henry-vi-part-3_TXT_FolgerShakespeare.txt
Normal file
5100
datasets/bard/henry-vi-part-3_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
4730
datasets/bard/henry-viii_TXT_FolgerShakespeare.txt
Normal file
4730
datasets/bard/henry-viii_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
4325
datasets/bard/julius-caesar_TXT_FolgerShakespeare.txt
Normal file
4325
datasets/bard/julius-caesar_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
4020
datasets/bard/king-john_TXT_FolgerShakespeare.txt
Normal file
4020
datasets/bard/king-john_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
5563
datasets/bard/king-lear_TXT_FolgerShakespeare.txt
Normal file
5563
datasets/bard/king-lear_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
4713
datasets/bard/loves-labors-lost_TXT_FolgerShakespeare.txt
Normal file
4713
datasets/bard/loves-labors-lost_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
2191
datasets/bard/lucrece_TXT_FolgerShakespeare.txt
Normal file
2191
datasets/bard/lucrece_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
3821
datasets/bard/macbeth_TXT_FolgerShakespeare.txt
Normal file
3821
datasets/bard/macbeth_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
4477
datasets/bard/measure-for-measure_TXT_FolgerShakespeare.txt
Normal file
4477
datasets/bard/measure-for-measure_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
4223
datasets/bard/much-ado-about-nothing_TXT_FolgerShakespeare.txt
Normal file
4223
datasets/bard/much-ado-about-nothing_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
5730
datasets/bard/othello_TXT_FolgerShakespeare.txt
Normal file
5730
datasets/bard/othello_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
3837
datasets/bard/pericles_TXT_FolgerShakespeare.txt
Normal file
3837
datasets/bard/pericles_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
7063
datasets/bard/poems.txt
Normal file
7063
datasets/bard/poems.txt
Normal file
File diff suppressed because it is too large
Load Diff
4257
datasets/bard/richard-ii_TXT_FolgerShakespeare.txt
Normal file
4257
datasets/bard/richard-ii_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
6286
datasets/bard/richard-iii_TXT_FolgerShakespeare.txt
Normal file
6286
datasets/bard/richard-iii_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
5011
datasets/bard/romeo-and-juliet_TXT_FolgerShakespeare.txt
Normal file
5011
datasets/bard/romeo-and-juliet_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
2675
datasets/bard/shakespeares-sonnets_TXT_FolgerShakespeare.txt
Normal file
2675
datasets/bard/shakespeares-sonnets_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
3201
datasets/bard/the-comedy-of-errors_TXT_FolgerShakespeare.txt
Normal file
3201
datasets/bard/the-comedy-of-errors_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
4055
datasets/bard/the-merchant-of-venice_TXT_FolgerShakespeare.txt
Normal file
4055
datasets/bard/the-merchant-of-venice_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
4446
datasets/bard/the-merry-wives-of-windsor_TXT_FolgerShakespeare.txt
Normal file
4446
datasets/bard/the-merry-wives-of-windsor_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
102
datasets/bard/the-phoenix-and-turtle_TXT_FolgerShakespeare.txt
Normal file
102
datasets/bard/the-phoenix-and-turtle_TXT_FolgerShakespeare.txt
Normal file
@@ -0,0 +1,102 @@
|
|||||||
|
The Phoenix and Turtle
|
||||||
|
by William Shakespeare
|
||||||
|
Edited by Barbara A. Mowat and Paul Werstine
|
||||||
|
with Michael Poston and Rebecca Niles
|
||||||
|
Folger Shakespeare Library
|
||||||
|
https://shakespeare.folger.edu/shakespeares-works/the-phoenix-and-turtle/
|
||||||
|
Created on Jul 31, 2015, from FDT version 0.9.0.1
|
||||||
|
|
||||||
|
|
||||||
|
"The Phoenix and Turtle"
|
||||||
|
|
||||||
|
|
||||||
|
Let the bird of loudest lay
|
||||||
|
On the sole Arabian tree
|
||||||
|
Herald sad and trumpet be,
|
||||||
|
To whose sound chaste wings obey.
|
||||||
|
|
||||||
|
But thou shrieking harbinger,
|
||||||
|
Foul precurrer of the fiend,
|
||||||
|
Augur of the fever's end,
|
||||||
|
To this troop come thou not near.
|
||||||
|
|
||||||
|
From this session interdict
|
||||||
|
Every fowl of tyrant wing,
|
||||||
|
Save the eagle, feathered king;
|
||||||
|
Keep the obsequy so strict.
|
||||||
|
|
||||||
|
Let the priest in surplice white,
|
||||||
|
That defunctive music can,
|
||||||
|
Be the death-divining swan,
|
||||||
|
Lest the requiem lack his right.
|
||||||
|
|
||||||
|
And thou treble-dated crow,
|
||||||
|
That thy sable gender mak'st
|
||||||
|
With the breath thou giv'st and tak'st,
|
||||||
|
'Mongst our mourners shalt thou go.
|
||||||
|
|
||||||
|
Here the anthem doth commence:
|
||||||
|
Love and constancy is dead,
|
||||||
|
Phoenix and the turtle fled
|
||||||
|
In a mutual flame from hence.
|
||||||
|
|
||||||
|
So they loved, as love in twain
|
||||||
|
Had the essence but in one,
|
||||||
|
Two distincts, division none;
|
||||||
|
Number there in love was slain.
|
||||||
|
|
||||||
|
Hearts remote yet not asunder,
|
||||||
|
Distance and no space was seen
|
||||||
|
'Twixt this turtle and his queen;
|
||||||
|
But in them it were a wonder.
|
||||||
|
|
||||||
|
So between them love did shine
|
||||||
|
That the turtle saw his right
|
||||||
|
Flaming in the phoenix' sight;
|
||||||
|
Either was the other's mine.
|
||||||
|
|
||||||
|
Property was thus appalled
|
||||||
|
That the self was not the same;
|
||||||
|
Single nature's double name
|
||||||
|
Neither two nor one was called.
|
||||||
|
|
||||||
|
Reason, in itself confounded,
|
||||||
|
Saw division grow together,
|
||||||
|
To themselves yet either neither,
|
||||||
|
Simple were so well compounded
|
||||||
|
|
||||||
|
That it cried, "How true a twain
|
||||||
|
Seemeth this concordant one!
|
||||||
|
Love hath reason, Reason none,
|
||||||
|
If what parts can so remain,"
|
||||||
|
|
||||||
|
Whereupon it made this threne
|
||||||
|
To the phoenix and the dove,
|
||||||
|
Co-supremes and stars of love,
|
||||||
|
As chorus to their tragic scene.
|
||||||
|
|
||||||
|
|
||||||
|
Threnos
|
||||||
|
|
||||||
|
|
||||||
|
Beauty, truth, and rarity,
|
||||||
|
Grace in all simplicity,
|
||||||
|
Here enclosed, in cinders lie.
|
||||||
|
|
||||||
|
Death is now the phoenix' nest,
|
||||||
|
And the turtle's loyal breast
|
||||||
|
To eternity doth rest,
|
||||||
|
|
||||||
|
Leaving no posterity;
|
||||||
|
'Twas not their infirmity,
|
||||||
|
It was married chastity.
|
||||||
|
|
||||||
|
Truth may seem, but cannot be;
|
||||||
|
Beauty brag, but 'tis not she;
|
||||||
|
Truth and beauty buried be.
|
||||||
|
|
||||||
|
To this urn let those repair
|
||||||
|
That are either true or fair;
|
||||||
|
For these dead birds sigh a prayer.
|
||||||
|
|
||||||
|
William Shakespeare
|
||||||
4579
datasets/bard/the-taming-of-the-shrew_TXT_FolgerShakespeare.txt
Normal file
4579
datasets/bard/the-taming-of-the-shrew_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
3466
datasets/bard/the-tempest_TXT_FolgerShakespeare.txt
Normal file
3466
datasets/bard/the-tempest_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
3872
datasets/bard/the-two-gentlemen-of-verona_TXT_FolgerShakespeare.txt
Normal file
3872
datasets/bard/the-two-gentlemen-of-verona_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
4938
datasets/bard/the-two-noble-kinsmen_TXT_FolgerShakespeare.txt
Normal file
4938
datasets/bard/the-two-noble-kinsmen_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
4671
datasets/bard/the-winters-tale_TXT_FolgerShakespeare.txt
Normal file
4671
datasets/bard/the-winters-tale_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
3998
datasets/bard/timon-of-athens_TXT_FolgerShakespeare.txt
Normal file
3998
datasets/bard/timon-of-athens_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
4033
datasets/bard/titus-andronicus_TXT_FolgerShakespeare.txt
Normal file
4033
datasets/bard/titus-andronicus_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
5668
datasets/bard/troilus-and-cressida_TXT_FolgerShakespeare.txt
Normal file
5668
datasets/bard/troilus-and-cressida_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
4071
datasets/bard/twelfth-night_TXT_FolgerShakespeare.txt
Normal file
4071
datasets/bard/twelfth-night_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
1434
datasets/bard/venus-and-adonis_TXT_FolgerShakespeare.txt
Normal file
1434
datasets/bard/venus-and-adonis_TXT_FolgerShakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
262
lib/markov.go
Normal file
262
lib/markov.go
Normal file
@@ -0,0 +1,262 @@
|
|||||||
|
package lib
|
||||||
|
|
||||||
|
import (
|
||||||
|
"math/rand"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
type MarkovData struct {
|
||||||
|
Order int
|
||||||
|
Chain map[string][]string // "word1 ... wordN" -> ["word3", ...]
|
||||||
|
Starts []string
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
urlRegex = regexp.MustCompile(`https?://[^\s]+`)
|
||||||
|
mentionRegex = regexp.MustCompile(`<[@#&!][^>]+>`)
|
||||||
|
bracketRegex = regexp.MustCompile(`\[.*?\]`)
|
||||||
|
speakerRegex = regexp.MustCompile(`^(?:[A-Z]{2,}\s+)+`)
|
||||||
|
stopWords = map[string]bool{
|
||||||
|
"the": true, "and": true, "a": true, "to": true, "of": true,
|
||||||
|
"in": true, "is": true, "that": true, "it": true, "for": true,
|
||||||
|
"as": true, "with": true, "on": true, "at": true, "by": true,
|
||||||
|
"this": true, "from": true, "but": true, "or": true, "an": true,
|
||||||
|
"be": true, "are": true, "was": true, "were": true, "so": true,
|
||||||
|
"if": true, "out": true, "up": true, "about": true, "into": true,
|
||||||
|
"over": true, "after": true, "beneath": true, "under": true,
|
||||||
|
"above": true, "me": true, "my": true, "mine": true, "you": true,
|
||||||
|
"your": true, "yours": true, "he": true, "him": true, "his": true,
|
||||||
|
"she": true, "her": true, "hers": true, "they": true, "them": true,
|
||||||
|
"their": true, "theirs": true, "we": true, "us": true, "our": true,
|
||||||
|
"ours": true, "who": true, "whom": true, "whose": true, "what": true,
|
||||||
|
"which": true, "when": true, "where": true, "why": true, "how": true,
|
||||||
|
"give": true, "write": true, "tell": true, "say": true, "speak": true,
|
||||||
|
"make": true, "do": true, "does": true, "did": true, "done": true,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
rand.Seed(time.Now().UnixNano())
|
||||||
|
}
|
||||||
|
|
||||||
|
func CleanText(text string) string {
|
||||||
|
text = urlRegex.ReplaceAllString(text, "")
|
||||||
|
text = mentionRegex.ReplaceAllString(text, "")
|
||||||
|
text = bracketRegex.ReplaceAllString(text, "")
|
||||||
|
text = strings.TrimSpace(text)
|
||||||
|
text = speakerRegex.ReplaceAllString(text, "")
|
||||||
|
return strings.Join(strings.Fields(text), " ")
|
||||||
|
}
|
||||||
|
|
||||||
|
func BuildMarkovChain(lines []string, order int) *MarkovData {
|
||||||
|
data := &MarkovData{
|
||||||
|
Order: order,
|
||||||
|
Chain: make(map[string][]string),
|
||||||
|
Starts: make([]string, 0),
|
||||||
|
}
|
||||||
|
|
||||||
|
var allWords []string
|
||||||
|
|
||||||
|
for _, line := range lines {
|
||||||
|
// Skip likely headers/metadata (all caps lines)
|
||||||
|
trimmed := strings.TrimSpace(line)
|
||||||
|
if trimmed != "" && strings.ToUpper(trimmed) == trimmed && strings.ToLower(trimmed) != trimmed {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
cleaned := CleanText(line)
|
||||||
|
if cleaned == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
allWords = append(allWords, strings.Fields(cleaned)...)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(allWords) < order+1 {
|
||||||
|
return data
|
||||||
|
}
|
||||||
|
|
||||||
|
// First key is always a start
|
||||||
|
data.Starts = append(data.Starts, Key(allWords[:order]...))
|
||||||
|
|
||||||
|
for i := 0; i < len(allWords)-order; i++ {
|
||||||
|
keyWords := allWords[i : i+order]
|
||||||
|
nextWord := allWords[i+order]
|
||||||
|
|
||||||
|
k := Key(keyWords...)
|
||||||
|
data.Chain[k] = append(data.Chain[k], nextWord)
|
||||||
|
|
||||||
|
// If the word shifting out ends a sentence, the next sequence is a start
|
||||||
|
if strings.ContainsAny(allWords[i], ".!?") {
|
||||||
|
if i+1+order <= len(allWords) {
|
||||||
|
data.Starts = append(data.Starts, Key(allWords[i+1:i+1+order]...))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return data
|
||||||
|
}
|
||||||
|
|
||||||
|
func GenerateMessage(data *MarkovData, seed string) string {
|
||||||
|
if len(data.Starts) == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
var currentKey string
|
||||||
|
|
||||||
|
// Try to seed based on input question
|
||||||
|
if seed != "" {
|
||||||
|
seedWords := strings.Fields(CleanText(seed))
|
||||||
|
|
||||||
|
// Sort seed words: significant words first, then by length
|
||||||
|
for i := 0; i < len(seedWords); i++ {
|
||||||
|
for j := i + 1; j < len(seedWords); j++ {
|
||||||
|
sw1 := strings.ToLower(seedWords[i])
|
||||||
|
sw2 := strings.ToLower(seedWords[j])
|
||||||
|
isStop1 := stopWords[sw1]
|
||||||
|
isStop2 := stopWords[sw2]
|
||||||
|
|
||||||
|
// If one is a stop word and the other isn't, prioritize the non-stop word
|
||||||
|
if isStop1 && !isStop2 {
|
||||||
|
seedWords[i], seedWords[j] = seedWords[j], seedWords[i]
|
||||||
|
} else if !isStop1 && isStop2 {
|
||||||
|
continue
|
||||||
|
} else {
|
||||||
|
// Otherwise sort by length
|
||||||
|
if len(seedWords[i]) < len(seedWords[j]) {
|
||||||
|
seedWords[i], seedWords[j] = seedWords[j], seedWords[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var candidates []string
|
||||||
|
|
||||||
|
// 1. Try to find a sentence starter
|
||||||
|
// We iterate seed words first to prioritize matches for longer words
|
||||||
|
for _, sw := range seedWords {
|
||||||
|
if len(sw) <= 2 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
swLower := strings.ToLower(sw)
|
||||||
|
var primaryMatches []string // starts with word
|
||||||
|
|
||||||
|
for _, startKey := range data.Starts {
|
||||||
|
parts := strings.Fields(strings.ToLower(startKey))
|
||||||
|
if len(parts) < data.Order {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if parts[0] == swLower {
|
||||||
|
primaryMatches = append(primaryMatches, startKey)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we found sentence starters beginning with this word, use them exclusively
|
||||||
|
if len(primaryMatches) > 0 {
|
||||||
|
candidates = primaryMatches
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. If no perfect starts, try any start containing the word
|
||||||
|
if len(candidates) == 0 {
|
||||||
|
for _, sw := range seedWords {
|
||||||
|
if len(sw) <= 2 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
swLower := strings.ToLower(sw)
|
||||||
|
|
||||||
|
for _, startKey := range data.Starts {
|
||||||
|
parts := strings.Fields(strings.ToLower(startKey))
|
||||||
|
if len(parts) < data.Order {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Check remaining words in key
|
||||||
|
found := false
|
||||||
|
for i := 1; i < len(parts); i++ {
|
||||||
|
if parts[i] == swLower {
|
||||||
|
found = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if found {
|
||||||
|
candidates = append(candidates, startKey)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(candidates) > 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. If no starts, try to find any connection in the chain
|
||||||
|
if len(candidates) == 0 {
|
||||||
|
for _, sw := range seedWords {
|
||||||
|
if len(sw) <= 2 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
swLower := strings.ToLower(sw)
|
||||||
|
var matches []string
|
||||||
|
|
||||||
|
for k := range data.Chain {
|
||||||
|
parts := strings.Fields(strings.ToLower(k))
|
||||||
|
if len(parts) < data.Order {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if parts[0] == swLower {
|
||||||
|
matches = append(matches, k)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(matches) > 0 {
|
||||||
|
candidates = matches
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(candidates) > 0 {
|
||||||
|
currentKey = candidates[rand.Intn(len(candidates))]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if currentKey == "" {
|
||||||
|
currentKey = data.Starts[rand.Intn(len(data.Starts))]
|
||||||
|
}
|
||||||
|
|
||||||
|
output := strings.Fields(currentKey)
|
||||||
|
|
||||||
|
for i := 0; i < 40; i++ {
|
||||||
|
nextOptions, exists := data.Chain[currentKey]
|
||||||
|
if !exists || len(nextOptions) == 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
nextWord := nextOptions[rand.Intn(len(nextOptions))]
|
||||||
|
output = append(output, nextWord)
|
||||||
|
|
||||||
|
// Shift the key window
|
||||||
|
currentWords := strings.Fields(currentKey)
|
||||||
|
if len(currentWords) >= 1 {
|
||||||
|
newKeyWords := append(currentWords[1:], nextWord)
|
||||||
|
currentKey = Key(newKeyWords...)
|
||||||
|
} else {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
// Soft stop on punctuation
|
||||||
|
if i > 5 && strings.ContainsAny(nextWord, ".!?") {
|
||||||
|
if rand.Float32() > 0.3 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return strings.Join(output, " ")
|
||||||
|
}
|
||||||
|
|
||||||
|
func Key(words ...string) string {
|
||||||
|
return strings.Join(words, " ")
|
||||||
|
}
|
||||||
16
main.go
16
main.go
@@ -29,6 +29,10 @@ func main() {
|
|||||||
initCommands(config)
|
initCommands(config)
|
||||||
initCommandHandlers(config)
|
initCommandHandlers(config)
|
||||||
|
|
||||||
|
if err := command.InitBard("datasets/bard.gob"); err != nil {
|
||||||
|
log.Printf("Failed to load Bard dataset: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
err := lib.InitDB()
|
err := lib.InitDB()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("Failed to initialize database: %v", err)
|
log.Fatalf("Failed to initialize database: %v", err)
|
||||||
@@ -191,13 +195,13 @@ func initCommands(config *lib.Config) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "gen",
|
Name: "bard",
|
||||||
Description: "Generate a random message using markov chains based on channel history",
|
Description: "Ask the bard a question",
|
||||||
Options: []*discordgo.ApplicationCommandOption{
|
Options: []*discordgo.ApplicationCommandOption{
|
||||||
{
|
{
|
||||||
Type: discordgo.ApplicationCommandOptionInteger,
|
Type: discordgo.ApplicationCommandOptionString,
|
||||||
Name: "messages",
|
Name: "question",
|
||||||
Description: fmt.Sprintf("Number of messages to use (default: %d, max: %d)", config.MarkovDefaultMessages, config.MarkovMaxMessages),
|
Description: "The question you want to ask",
|
||||||
Required: false,
|
Required: false,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@@ -255,7 +259,7 @@ func initCommandHandlers(config *lib.Config) {
|
|||||||
commandHandlers = map[string]func(s *discordgo.Session, i *discordgo.InteractionCreate){
|
commandHandlers = map[string]func(s *discordgo.Session, i *discordgo.InteractionCreate){
|
||||||
"ping": lib.HandleCommand("ping", time.Duration(config.PingCooldown)*time.Second, command.PingCommand),
|
"ping": lib.HandleCommand("ping", time.Duration(config.PingCooldown)*time.Second, command.PingCommand),
|
||||||
"hs": lib.HandleCommand("hs", time.Duration(config.HsCooldown)*time.Second, command.HsCommand),
|
"hs": lib.HandleCommand("hs", time.Duration(config.HsCooldown)*time.Second, command.HsCommand),
|
||||||
"gen": lib.HandleCommand("gen", time.Duration(config.MarkovCooldown)*time.Second, command.MarkovCommand),
|
"bard": lib.HandleCommand("bard", time.Duration(config.MarkovCooldown)*time.Second, command.BardCommand),
|
||||||
"ask": lib.HandleCommand("ask", time.Duration(config.MarkovAskCooldown)*time.Second, command.MarkovQuestionCommand),
|
"ask": lib.HandleCommand("ask", time.Duration(config.MarkovAskCooldown)*time.Second, command.MarkovQuestionCommand),
|
||||||
"himbucks": lib.HandleCommand("himbucks", time.Duration(config.HimbucksCooldown)*time.Second, command.BalanceGetCommand),
|
"himbucks": lib.HandleCommand("himbucks", time.Duration(config.HimbucksCooldown)*time.Second, command.BalanceGetCommand),
|
||||||
"himboard": lib.HandleCommand("himboard", time.Duration(config.HimboardCooldown)*time.Second, command.LeaderboardCommand),
|
"himboard": lib.HandleCommand("himboard", time.Duration(config.HimboardCooldown)*time.Second, command.LeaderboardCommand),
|
||||||
|
|||||||
Reference in New Issue
Block a user