Trying some weird training nonsense. Maybe this will be fun.
All checks were successful
Docker Deploy / build-and-push (push) Successful in 3m23s
All checks were successful
Docker Deploy / build-and-push (push) Successful in 3m23s
This commit is contained in:
66
cmd/train/main.go
Normal file
66
cmd/train/main.go
Normal file
@@ -0,0 +1,66 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/gob"
|
||||
"flag"
|
||||
"himbot/lib"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func main() {
|
||||
inputDir := flag.String("input", "datasets/bard", "Directory containing text files to train on")
|
||||
outputFile := flag.String("output", "datasets/bard.gob", "Output file path for the pre-trained model")
|
||||
order := flag.Int("order", 3, "Markov chain order (N-gram size)")
|
||||
flag.Parse()
|
||||
|
||||
log.Printf("Scanning directory: %s", *inputDir)
|
||||
|
||||
var allLines []string
|
||||
fileCount := 0
|
||||
|
||||
err := filepath.Walk(*inputDir, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !info.IsDir() && strings.HasSuffix(info.Name(), ".txt") {
|
||||
content, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
log.Printf("Error reading file %s: %v", path, err)
|
||||
return nil // Continue to next file
|
||||
}
|
||||
lines := strings.Split(string(content), "\n")
|
||||
allLines = append(allLines, lines...)
|
||||
fileCount++
|
||||
if fileCount%5 == 0 {
|
||||
log.Printf("Processed %d files...", fileCount)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Error walking directory: %v", err)
|
||||
}
|
||||
|
||||
log.Printf("Found %d files with %d total lines. Building Markov chain...", fileCount, len(allLines))
|
||||
|
||||
chain := lib.BuildMarkovChain(allLines, *order)
|
||||
|
||||
log.Printf("Chain built with %d start keys. Saving to %s...", len(chain.Starts), *outputFile)
|
||||
|
||||
f, err := os.Create(*outputFile)
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to create output file: %v", err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
encoder := gob.NewEncoder(f)
|
||||
if err := encoder.Encode(chain); err != nil {
|
||||
log.Fatalf("Failed to encode chain: %v", err)
|
||||
}
|
||||
|
||||
log.Println("Done!")
|
||||
}
|
||||
Reference in New Issue
Block a user