package main import ( "encoding/gob" "flag" "himbot/lib" "log" "os" "path/filepath" "strings" ) func main() { inputDir := flag.String("input", "datasets/bard", "Directory containing text files to train on") outputFile := flag.String("output", "datasets/bard.gob", "Output file path for the pre-trained model") order := flag.Int("order", 3, "Markov chain order (N-gram size)") flag.Parse() log.Printf("Scanning directory: %s", *inputDir) var allLines []string fileCount := 0 err := filepath.Walk(*inputDir, func(path string, info os.FileInfo, err error) error { if err != nil { return err } if !info.IsDir() && strings.HasSuffix(info.Name(), ".txt") { content, err := os.ReadFile(path) if err != nil { log.Printf("Error reading file %s: %v", path, err) return nil // Continue to next file } lines := strings.Split(string(content), "\n") allLines = append(allLines, lines...) fileCount++ if fileCount%5 == 0 { log.Printf("Processed %d files...", fileCount) } } return nil }) if err != nil { log.Fatalf("Error walking directory: %v", err) } log.Printf("Found %d files with %d total lines. Building Markov chain...", fileCount, len(allLines)) chain := lib.BuildMarkovChain(allLines, *order) log.Printf("Chain built with %d start keys. Saving to %s...", len(chain.Starts), *outputFile) f, err := os.Create(*outputFile) if err != nil { log.Fatalf("Failed to create output file: %v", err) } defer f.Close() encoder := gob.NewEncoder(f) if err := encoder.Encode(chain); err != nil { log.Fatalf("Failed to encode chain: %v", err) } log.Println("Done!") }