Skip to content

Instantly share code, notes, and snippets.

@teknoraver
Last active November 29, 2021 12:10
Show Gist options
  • Save teknoraver/7363d17a29af0dc0a207f33f87e3e059 to your computer and use it in GitHub Desktop.
Save teknoraver/7363d17a29af0dc0a207f33f87e3e059 to your computer and use it in GitHub Desktop.
split file by maximum size, but never break lines
package main
import (
"bufio"
"flag"
"fmt"
"os"
)
func main() {
maxSize := flag.Int("s", 1024*1024, "chunk size")
flag.Parse()
if flag.NArg() < 2 {
fmt.Fprintf(os.Stderr, "usage: %s [-s chunksize] input outputfmt\n", os.Args[0])
fmt.Fprintf(os.Stderr, "\texample:\n\t%s -s 1024 users.txt newusers%%02d.txt\n", os.Args[0])
os.Exit(1)
}
var srcPath = flag.Args()[0]
var dstBase = flag.Args()[1]
var currentSize int
var currentFileno int
inFile, err := os.Open(srcPath)
if err != nil {
panic(err)
}
out, err := os.Create(fmt.Sprintf(dstBase, 0))
if err != nil {
panic(err)
}
scanner := bufio.NewScanner(inFile)
for scanner.Scan() {
line := scanner.Text() + "\n"
currentSize += len(line)
if currentSize >= *maxSize {
out.Close()
currentFileno++
currentSize = len(line)
out, err = os.Create(fmt.Sprintf(dstBase, currentFileno))
if err != nil {
panic(err)
}
}
out.WriteString(line)
}
out.Close()
inFile.Close()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment