All checks were successful
Docker Deploy / build-and-push (push) Successful in 3m23s
67 lines
1.6 KiB
Go
67 lines
1.6 KiB
Go
package main
|
|
|
|
import (
|
|
"encoding/gob"
|
|
"flag"
|
|
"himbot/lib"
|
|
"log"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
)
|
|
|
|
func main() {
|
|
inputDir := flag.String("input", "datasets/bard", "Directory containing text files to train on")
|
|
outputFile := flag.String("output", "datasets/bard.gob", "Output file path for the pre-trained model")
|
|
order := flag.Int("order", 3, "Markov chain order (N-gram size)")
|
|
flag.Parse()
|
|
|
|
log.Printf("Scanning directory: %s", *inputDir)
|
|
|
|
var allLines []string
|
|
fileCount := 0
|
|
|
|
err := filepath.Walk(*inputDir, func(path string, info os.FileInfo, err error) error {
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if !info.IsDir() && strings.HasSuffix(info.Name(), ".txt") {
|
|
content, err := os.ReadFile(path)
|
|
if err != nil {
|
|
log.Printf("Error reading file %s: %v", path, err)
|
|
return nil // Continue to next file
|
|
}
|
|
lines := strings.Split(string(content), "\n")
|
|
allLines = append(allLines, lines...)
|
|
fileCount++
|
|
if fileCount%5 == 0 {
|
|
log.Printf("Processed %d files...", fileCount)
|
|
}
|
|
}
|
|
return nil
|
|
})
|
|
|
|
if err != nil {
|
|
log.Fatalf("Error walking directory: %v", err)
|
|
}
|
|
|
|
log.Printf("Found %d files with %d total lines. Building Markov chain...", fileCount, len(allLines))
|
|
|
|
chain := lib.BuildMarkovChain(allLines, *order)
|
|
|
|
log.Printf("Chain built with %d start keys. Saving to %s...", len(chain.Starts), *outputFile)
|
|
|
|
f, err := os.Create(*outputFile)
|
|
if err != nil {
|
|
log.Fatalf("Failed to create output file: %v", err)
|
|
}
|
|
defer f.Close()
|
|
|
|
encoder := gob.NewEncoder(f)
|
|
if err := encoder.Encode(chain); err != nil {
|
|
log.Fatalf("Failed to encode chain: %v", err)
|
|
}
|
|
|
|
log.Println("Done!")
|
|
}
|