Combine sorted files with minimal buffering

I have two log files that have a prefix with a sortable time interval. I would like to see them in order while the processes generating the log files are still running. This is a fairly accurate simulation of the situation:

slow() {
    # print stdout at 30bps
    exec pv -qL 30
}
timestamp() {
    # prefix stdin with a sortable timestamp
    exec tai64n
}

# Simulate two slowly-running batch jobs:
seq 000 099 | slow | timestamp > seq.1 &
seq1=$!
seq 100 199 | slow | timestamp > seq.2 &
seq2=$!

# I'd like to see the combined output of those two logs, in timestamp-sorted order
try1() {
    # this shows me the output as soon as it available,
    # but it badly interleaved and not necessarily in order
    tail -f seq.1 --pid=$seq1 &
    tail -f seq.2 --pid=$seq2 &
}
try2() {
    # this gives the correct output,
    # but outputs nothing till both jobs have stopped
    sort -sm <(tail -f seq.1 --pid=$seq1) <(tail -f seq.2 --pid=$seq2)
}


try2
wait
+4
source share
1 answer

tee ( , - ) , tee . , tail -f -s 0.01 ( 100/) / - split --filter='sort -sm' .

tai64n, perl:

tai64n() {
  perl -MTime::HiRes=time -pe '
    printf "\@4%015x%x%n", split(/\./,time), $c; print 0 x(25-$c) . " "'
}

sh bash, perl:

slow() {
    # print stdout at 30bps
    pv -qL 30
}

tai64n_and_tee() {
  # prefix stdin with a sortable timestamp and copy to given file
  perl -MTime::HiRes=time -e '
    $_ = shift;
    open(TEE, "> $_") or die $!;
    while (<>) {
      $_ = sprintf("\@4%015x%x%n", split(/\./,time), $c) . 0 x(25-$c) . " $_";
      print TEE $_;
      print $_;
    }
  ' "$1"
}

# Simulate two slowly-running batch jobs:
seq 000 099 | slow | tai64n_and_tee seq.1 &
seq 100 199 | slow | tai64n_and_tee seq.2 &
wait

, perl . perl, tai64n, perl, tee, tai64n.

0

All Articles