Files
himbot/cmd/train/main.go
Atridad Lahiji 9694a42f3f
All checks were successful
Docker Deploy / build-and-push (push) Successful in 3m23s
Trying some weird training nonsense. Maybe this will be fun.
2026-01-20 14:48:53 -07:00

67 lines
1.6 KiB
Go

package main
import (
"encoding/gob"
"flag"
"himbot/lib"
"log"
"os"
"path/filepath"
"strings"
)
func main() {
inputDir := flag.String("input", "datasets/bard", "Directory containing text files to train on")
outputFile := flag.String("output", "datasets/bard.gob", "Output file path for the pre-trained model")
order := flag.Int("order", 3, "Markov chain order (N-gram size)")
flag.Parse()
log.Printf("Scanning directory: %s", *inputDir)
var allLines []string
fileCount := 0
err := filepath.Walk(*inputDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if !info.IsDir() && strings.HasSuffix(info.Name(), ".txt") {
content, err := os.ReadFile(path)
if err != nil {
log.Printf("Error reading file %s: %v", path, err)
return nil // Continue to next file
}
lines := strings.Split(string(content), "\n")
allLines = append(allLines, lines...)
fileCount++
if fileCount%5 == 0 {
log.Printf("Processed %d files...", fileCount)
}
}
return nil
})
if err != nil {
log.Fatalf("Error walking directory: %v", err)
}
log.Printf("Found %d files with %d total lines. Building Markov chain...", fileCount, len(allLines))
chain := lib.BuildMarkovChain(allLines, *order)
log.Printf("Chain built with %d start keys. Saving to %s...", len(chain.Starts), *outputFile)
f, err := os.Create(*outputFile)
if err != nil {
log.Fatalf("Failed to create output file: %v", err)
}
defer f.Close()
encoder := gob.NewEncoder(f)
if err := encoder.Encode(chain); err != nil {
log.Fatalf("Failed to encode chain: %v", err)
}
log.Println("Done!")
}