diff --git a/README.pod b/README.pod deleted file mode 120000 index abe0c14..0000000 --- a/README.pod +++ /dev/null @@ -1 +0,0 @@ -bin/feedgnuplot \ No newline at end of file diff --git a/README.pod b/README.pod new file mode 100755 index 0000000..94c9912 --- /dev/null +++ b/README.pod @@ -0,0 +1,1590 @@ +#!/usr/bin/perl +use strict; +use warnings; +use Getopt::Long; +use Time::HiRes qw( usleep gettimeofday tv_interval ); +use IO::Handle; +use List::Util qw( first ); +use Scalar::Util qw( looks_like_number ); +use Text::ParseWords; +use threads; +use threads::shared; +use Thread::Queue; +use Pod::Usage; +use Time::Piece; + +my $VERSION = 1.26; + +my %options; +interpretCommandline(); + +# list containing the plot data. Each element is a hashref of parameters. +# $curve->{datastring} is a string of all the data in this curve that can be +# sent directly to gnuplot. $curve->{datastring_meta} is a hashref {domain => +# ..., offset_start => ...}. offset_start represents a position in the +# datastring where this particular data element begins. As the data is culled +# with --xlen, the offsets are preserved by using $curve->{datastring_offset} to +# represent the offset IN THE ORIGINAL STRING of the current start of the +# datastring + + +my @curves = (); + +# list mapping curve names to their indices in the @curves list +my %curveIndices = (); + +# now start the data acquisition and plotting threads +my $dataQueue; + +# Whether any new data has arrived since the last replot +my $haveNewData; + +# when the last replot happened +my $last_replot_time = [gettimeofday]; + +# whether the previous replot was timer based +my $last_replot_is_from_timer = 1; + +my $streamingFinished : shared = undef; + +if($options{stream}) +{ + $dataQueue = Thread::Queue->new(); + my $addThr = threads->create(\&mainThread); + + # spawn the plot updating thread. If I'm replotting from a data trigger, I don't need this + my $plotThr = threads->create(\&plotUpdateThread) if $options{stream} > 0; + + while(<>) + { + chomp; + + last if /^exit/; + + # place every line of input to the queue, so that the plotting thread can process it. if we are + # using an implicit domain (x = line number), then we send it on the data queue also, since + # $. is not meaningful in the plotting thread + if(!$options{domain}) + { + $_ .= " $."; + } + $dataQueue->enqueue($_); + } + + $streamingFinished = 1; + + $plotThr->join() if defined $plotThr; + $addThr->join(); +} +else +{ mainThread(); } + + + + + +sub interpretCommandline +{ + # if I'm using a self-plotting data file with a #! line, then $ARGV[0] will contain ALL of the + # options and $ARGV[1] will contain the data file to plot. In this case I need to split $ARGV[0] so + # that GetOptions() can parse it correctly. On the other hand, if I'm plotting normally (not with + # #!) a file with spaces in the filename, I don't want to split the filename. Hopefully this logic + # takes care of both those cases. + if (exists $ARGV[0] && !-r $ARGV[0]) + { + unshift @ARGV, shellwords shift @ARGV; + } + + # everything off by default: + # do not stream in the data by default + # point plotting by default. + # no monotonicity checks by default + # normal histograms by default + $options{ maxcurves } = 100; + $options{ histstyle} = 'freq'; + + # Previously I was using 'legend=s%' and 'curvestyle=s%' for curve addressing. This had cleaner + # syntax, but disregarded the order of the given options. This resulted in arbitrarily ordered + # curves. I thus make parse these into lists, and then also make hashes, for later use + + # needed for these to be parsed into an array-ref + $options{legend} = []; + $options{curvestyle} = []; + $options{histogram} = []; + GetOptions(\%options, 'stream:s', 'domain!', 'dataid!', '3d!', 'colormap!', 'lines!', 'points!', + 'circles', 'legend=s{2}', 'autolegend!', 'xlabel=s', 'ylabel=s', 'y2label=s', 'zlabel=s', + 'title=s', 'xlen=f', 'ymin=f', 'ymax=f', 'xmin=s', 'xmax=s', 'y2min=f', 'y2max=f', + 'zmin=f', 'zmax=f', 'y2=s@', 'curvestyle=s{2}', 'curvestyleall=s', 'extracmds=s@', + 'square!', 'square_xy!', 'hardcopy=s', 'maxcurves=i', 'monotonic!', 'timefmt=s', + 'histogram=s@', 'binwidth=f', 'histstyle=s', + 'terminal=s', + 'extraValuesPerPoint=i', 'help', 'dump', 'exit', 'version', + 'geometry=s') or pod2usage( -exitval => 1, + -verbose => 1, # synopsis and args + -output => \*STDERR ); + + + # handle various cmdline-option errors + if ( $options{help} ) + { + pod2usage( -exitval => 0, + -verbose => 1, # synopsis and args + -output => \*STDOUT ); + } + + if( $options{version} ) + { + print "feedgnuplot version $VERSION\n"; + exit 0; + } + + # no global style if one isn't given + $options{curvestyleall} = '' unless defined $options{curvestyleall}; + + # expand options that are given as comma-separated lists + for my $listkey (qw(histogram y2)) + { + @{$options{$listkey}} = map split('\s*,\s*', $_), @{$options{$listkey}} + if defined $options{$listkey}; + } + + # --legend and --curvestyle options are conceptually hashes, but are parsed as + # arrays in order to preserve the ordering. I parse both of these into hashes + # because those are useful to have later. After this I can access individual + # legends with $options{legend_hash}{curveid} + for my $listkey (qw(legend curvestyle)) + { + $options{"${listkey}_hash"} = {}; + + my $n = scalar @{$options{$listkey}}/2; + foreach my $idx (0..$n-1) + { + $options{"${listkey}_hash"}{$options{$listkey}[$idx*2]} = $options{$listkey}[$idx*2 + 1]; + } + } + + if ( defined $options{hardcopy} && defined $options{stream} ) + { + print STDERR "--stream doesn't make sense together with --hardcopy\n"; + exit -1; + } + + # parse stream option. Allowed only numbers >= 0 or 'trigger'. After this code + # $options{stream} is + # -1 for triggered replotting + # >0 for timed replotting + # undef if not streaming + if(defined $options{stream}) + { + # if no streaming period is given, default to 1Hz. + $options{stream} = 1 if $options{stream} eq ''; + + if( !looks_like_number $options{stream} ) + { + if($options{stream} eq 'trigger') + { + $options{stream} = 0; + } + else + { + print STDERR "--stream can only take in values >=0 or 'trigger'\n"; + exit -1; + } + } + + if ( $options{stream} == 0 ) + { + $options{stream} = -1; + } + elsif ( $options{stream} <= 0) + { + print STDERR "--stream can only take in values >=0 or 'trigger'\n"; + exit -1; + } + } + + if ($options{colormap}) + { + # colormap styles all curves with palette. Seems like there should be a way to do this with a + # global setting, but I can't get that to work + $options{curvestyleall} .= ' palette'; + } + + if ( $options{'3d'} ) + { + if ( !$options{domain} ) + { + print STDERR "--3d only makes sense with --domain\n"; + exit -1; + } + + if ( $options{timefmt} ) + { + print STDERR "--3d makes no sense with --timefmt\n"; + exit -1; + } + + if ( defined $options{y2min} || defined $options{y2max} || defined $options{y2} ) + { + print STDERR "--3d does not make sense with --y2...\n"; + exit -1; + } + + if ( defined $options{xlen} ) + { + print STDERR "--3d does not make sense with --xlen\n"; + exit -1; + } + + if ( defined $options{monotonic} ) + { + print STDERR "--3d does not make sense with --monotonic\n"; + exit -1; + } + + if ( defined $options{binwidth} || @{$options{histogram}} ) + { + print STDERR "--3d does not make sense with histograms\n"; + exit -1; + } + + if ( defined $options{circles} ) + { + print STDERR "--3d does not make sense with circles (gnuplot doesn't support this)\n"; + exit -1; + } + } + else + { + if ( $options{timefmt} && !$options{domain} ) + { + print STDERR "--timefmt makes sense only with --domain\n"; + exit -1; + } + + if(!$options{colormap}) + { + if ( defined $options{zmin} || defined $options{zmax} || defined $options{zlabel} ) + { + print STDERR "--zmin/zmax/zlabel only makes sense with --3d or --colormap\n"; + exit -1; + } + } + + if ( defined $options{square_xy} ) + { + print STDERR "--square_xy only makes sense with --3d\n"; + exit -1; + } + } + + if(defined $options{xlen} && !$options{stream} ) + { + print STDERR "--xlen does not make sense without --stream\n"; + exit -1; + } + + if($options{stream} && defined $options{xlen} && + ( defined $options{xmin} || defined $options{xmax})) + { + print STDERR "With --stream and --xlen the X bounds are set, so neither --xmin nor --xmax make sense\n"; + exit -1; + } + + # --xlen implies an order to the data, so I force monotonicity + $options{monotonic} = 1 if defined $options{xlen}; + + if( $options{histstyle} !~ /freq|cum|uniq|cnorm/ ) + { + print STDERR "unknown histstyle. Allowed are 'freq...', 'cum...', 'uniq...', 'cnorm...'\n"; + exit -1; + } + + # deal with timefmt + if ( $options{timefmt} ) + { + # I need to compute a regex to match the time field and I need to count how + # many whilespace-separated fields there are. + + # strip leading and trailing whitespace + $options{timefmt} =~ s/^\s*//; + $options{timefmt} =~ s/\s*$//; + + my $Nfields = scalar split( ' ', $options{timefmt}); + $options{timefmt_Ncols} = $Nfields; + my $regex_str = join( '\s+', ('\S+') x $Nfields ); + $options{timefmt_regex} = qr/$regex_str/; + + # make sure --xlen is an integer. With a timefmt xlen goes through strptime + # and strftime, and those are integer-only + if( defined $options{xlen} ) + { + if( $options{xlen} - int($options{xlen}) ) + { + say STDERR "When streaming --xlen MUST be an integer. Rounding up to the nearest second"; + $options{xlen} = 1 + int($options{xlen}); + } + } + } +} + +sub getGnuplotVersion +{ + open(GNUPLOT_VERSION, 'gnuplot --version |') or die "Couldn't run gnuplot"; + my ($gnuplotVersion) = =~ /gnuplot\s*(\d*\.\d*)/; + if (!$gnuplotVersion) + { + print STDERR "Couldn't find the version of gnuplot. Does it work? Trying anyway...\n"; + $gnuplotVersion = 0; + } + close(GNUPLOT_VERSION); + + return $gnuplotVersion; +} + +sub plotUpdateThread +{ + while(! $streamingFinished) + { + usleep( $options{stream} * 1e6 ); + + # indicate that the timer was the replot source + $dataQueue->enqueue('replot timertick'); + } + + $dataQueue->enqueue(undef); +} + +sub sendRangeCommand +{ + my ($name, $min, $max) = @_; + + return unless defined $min || defined $max; + + if( defined $min ) + { $min = "\"$min\""; } + else + { $min = ''; } + + if( defined $max ) + { $max = "\"$max\""; } + else + { $max = ''; } + + my $cmd = "set $name [$min:$max]\n"; + print PIPE $cmd; +} + +sub makeDomainNumeric +{ + my ($domain0) = @_; + + if ( $options{timefmt} ) + { + my $timepiece = Time::Piece->strptime( $domain0, $options{timefmt} ) + or die "Couldn't parse time format. String '$domain0' doesn't fit format '$options{timefmt}'"; + + return $timepiece->epoch(); + } + + return $domain0; +} + +sub mainThread +{ + my $valuesPerPoint = 1; + if($options{extraValuesPerPoint}) { $valuesPerPoint += $options{extraValuesPerPoint}; } + if($options{colormap}) { $valuesPerPoint++; } + if($options{circles} ) { $valuesPerPoint++; } + + local *PIPE; + my $dopersist = ''; + + if( !$options{stream} && getGnuplotVersion() >= 4.3) + { + $dopersist = '--persist'; + } + + if(exists $options{dump}) + { + *PIPE = *STDOUT; + } + else + { + my $geometry = defined $options{geometry} ? + "-geometry $options{geometry}" : ''; + open PIPE, "|gnuplot $geometry $dopersist" or die "Can't initialize gnuplot\n"; + } + autoflush PIPE 1; + + my $outputfile; + my $outputfileType; + if( defined $options{hardcopy}) + { + $outputfile = $options{hardcopy}; + if( $outputfile =~ /^[^|] # starts with anything other than | + .* # stuff in the middle + \.(eps|ps|pdf|png|svg)$/ix) # ends with a known extension + { + $outputfileType = lc $1; + } + + my %terminalOpts = + ( eps => 'postscript solid color enhanced eps', + ps => 'postscript solid color landscape 10', + pdf => 'pdfcairo solid color font ",10" size 11in,8.5in', + png => 'png size 1280,1024', + svg => 'svg'); + + if( !defined $options{terminal} && + defined $outputfileType && + $terminalOpts{$outputfileType} ) + { + $options{terminal} = $terminalOpts{$outputfileType}; + } + + die "Asked to plot to file '$outputfile', but I don't know which terminal to use, and no --terminal given" + unless $options{terminal}; + } + print PIPE "set terminal $options{terminal}\n" if $options{terminal}; + print PIPE "set output \"$outputfile\"\n" if $outputfile; + + # set up plotting style + my $style = ''; + if($options{lines}) { $style .= 'lines';} + if($options{points}) { $style .= 'points';} + if($options{circles}) + { + $options{curvestyleall} = "with circles $options{curvestyleall}"; + } + + print PIPE "set style data $style\n" if $style; + print PIPE "set grid\n"; + + print(PIPE "set xlabel \"$options{xlabel }\"\n") if defined $options{xlabel}; + print(PIPE "set ylabel \"$options{ylabel }\"\n") if defined $options{ylabel}; + print(PIPE "set zlabel \"$options{zlabel }\"\n") if defined $options{zlabel}; + print(PIPE "set y2label \"$options{y2label}\"\n") if defined $options{y2label}; + print(PIPE "set title \"$options{title }\"\n") if defined $options{title}; + + if($options{square}) + { + # set a square aspect ratio. Gnuplot does this differently for 2D and 3D plots + if(! $options{'3d'}) + { + print(PIPE "set size ratio -1\n"); + } + else + { + print(PIPE "set view equal xyz\n"); + } + } + + if($options{square_xy}) + { + print(PIPE "set view equal xy\n"); + } + +# For the specified values, set the legend entries to 'title "blah blah"' + if(defined $options{legend} && @{$options{legend}}) + { + # @{$options{legend}} is a list where consecutive pairs are (curveID, + # legend). I use $options{legend} here instead of $options{legend_hash} + # because I create a new curve when I see a new one, and the hash is + # unordered, thus messing up the ordering + my $n = scalar @{$options{legend}}/2; + foreach my $idx (0..$n-1) + { + setCurveLabel($options{legend}[$idx*2 ], + $options{legend}[$idx*2 + 1]); + } + } + +# add the extra curve options + if(defined $options{curvestyle} && @{$options{curvestyle}}) + { + # @{$options{curvestyle}} is a list where consecutive pairs are (curveID, + # style). I use $options{curvestyle} here instead of + # $options{curvestyle_hash} because I create a new curve when I see a new + # one, and the hash is unordered, thus messing up the ordering + my $n = scalar @{$options{curvestyle}}/2; + foreach my $idx (0..$n-1) + { + addCurveOption($options{curvestyle}[$idx*2 ], + $options{curvestyle}[$idx*2 + 1]); + } + } + +# For the values requested to be printed on the y2 axis, set that + if( defined $options{y2} ) + { + foreach (@{$options{y2}}) + { + addCurveOption($_, 'axes x1y2'); + } + } + +# timefmt + if( $options{timefmt} ) + { + print(PIPE "set timefmt '$options{timefmt}'\n"); + print(PIPE "set xdata time\n"); + } + +# add the extra global options + if(defined $options{extracmds}) + { + foreach (@{$options{extracmds}}) + { + print(PIPE "$_\n"); + } + } + +# set up histograms + if( defined $options{histogram} ) + { + $options{binwidth} ||= 1; # if no binwidth given, set it to 1 + print PIPE + "set boxwidth $options{binwidth}\n" . + "histbin(x) = $options{binwidth} * floor(0.5 + x/$options{binwidth})\n"; + foreach (@{$options{histogram}}) + { + setCurveAsHistogram( $_ ); + } + } + + # regexp for a possibly floating point, possibly scientific notation number + my $numRE = '-?\d*\.?\d+(?:[Ee][-+]?\d+)?'; + my $domainRE = $options{timefmt_regex} || $numRE; + + + # a point may be preceded by an id + my $pointRE = $options{dataid} ? '(\S+)\s+' : '()'; + $pointRE .= '(' . join('\s+', ($numRE) x $valuesPerPoint) . ')'; + $pointRE = qr/$pointRE/; + +# set all the axis ranges + # If a bound isn't given I want to set it to the empty string, so I can communicate it simply to + # gnuplot + print PIPE "set xtics\n"; + + if($options{y2}) + { + print PIPE "set ytics nomirror\n"; + print PIPE "set y2tics\n"; + # if any of the ranges are given, set the range + sendRangeCommand( "y2range", $options{y2min}, $options{y2max} ); + } + + # if any of the ranges are given, set the range + sendRangeCommand( "xrange", $options{xmin}, $options{xmax} ); + sendRangeCommand( "yrange", $options{ymin}, $options{ymax} ); + sendRangeCommand( "zrange", $options{zmin}, $options{zmax} ); + sendRangeCommand( "cbrange", $options{zmin}, $options{zmax} ) if($options{colormap}); + + + + + # latest domain variable present in our data + my $latestX; + + # The domain of the current point + my @domain; + + # The x-axis domain represented as a number. This is exactly the same as + # $domain[0] unless the x-axis domain uses a timefmt. Then this is the + # number of seconds since the UNIX epoch. + my $domain0_numeric; + + # I should be using the // operator, but I'd like to be compatible with perl 5.8 + while( $_ = (defined $dataQueue ? $dataQueue->dequeue() : <>)) + { + next if /^#/o; + + if( $options{stream} ) + { + if(/^clear/o ) + { + clearCurves(); + next; + } + + if(/^replot/o ) + { + # /timertick/ determines if the timer was the source of the replot + replot( $domain0_numeric, /timertick/ ); + next; + } + + # /exit/ is handled in the data-reading thread + } + + if(! /^replot/o) + { + # parse the incoming data lines. The format is + # x id0 dat0 id1 dat1 .... + # where idX is the ID of the curve that datX corresponds to + # + # $options{domain} indicates whether the initial 'x' is given or not (if not, the line + # number is used) + # $options{dataid} indicates whether idX is given or not (if not, the point order in the + # line is used) + # 3d plots require $options{domain}, and dictate "x y" for the domain instead of just "x" + + if($options{domain}) + { + /($domainRE)/go or next; + $domain[0] = $1; + $domain0_numeric = makeDomainNumeric( $domain[0] ); + + if($options{'3d'}) + { + /($numRE)/go or next; + $domain[1] = $1; + } + elsif( $options{monotonic} ) + { + if( defined $latestX && $domain0_numeric < $latestX ) + { + # the x-coordinate of the new point is in the past, so I wipe out + # all the data and start anew. Before I wipe the old data, I + # replot the old data + replot( $domain0_numeric ); + clearCurves(); + $latestX = undef; + } + else + { $latestX = $domain0_numeric; } + } + + } + else + { + # since $. is not meaningful in the plotting thread if we're using the data queue, we pass + # $. on the data queue in that case + if(defined $dataQueue) + { + s/ ([\d]+)$//o; + $domain[0] = $1; + } + else + { + $domain[0] = $.; + } + $domain0_numeric = makeDomainNumeric( $domain[0] ); + } + + my $id = -1; + while (/$pointRE/go) + { + if($1 ne '') {$id = $1;} + else {$id++; } + + pushPoint(getCurve($id), + "@domain $2\n", $domain0_numeric); + } + } + } + + # if we were streaming, we're now done! + if( $options{stream} ) + { + return; + } + + # finished reading in all. Plot what we have + plotStoredData(); + + if ( defined $options{hardcopy}) + { + print PIPE "set output\n"; + + # sleep until the plot file exists, and it is closed. Sometimes the output + # is still being written at this point. If the output filename starts with + # '|', gnuplot pipes the output to that process, instead of writing to a + # file. In that case I don't make sure the file exists, since there IS not + # file + if( $options{hardcopy} !~ /^\|/ ) + { + usleep(100_000) until -e $outputfile; + usleep(100_000) until(system("fuser -s \"$outputfile\"")); + } + + print "Wrote output to $outputfile\n"; + return; + } + + # we persist gnuplot, so we shouldn't need this sleep. However, once + # gnuplot exits, but the persistent window sticks around, you can no + # longer interactively zoom the plot. So we still sleep + sleep(100000) unless $options{dump} || $options{exit}; +} + +sub pruneOldData +{ + my ($oldestx) = @_; + + foreach my $curve (@curves) + { + next unless $curve->{datastring}; + + my $meta = $curve->{datastring_meta}; + + my $firstInWindow = first {$meta->[$_]{domain} >= $oldestx} 0..$#$meta; + if ( !defined $firstInWindow ) + { + # everything is too old. Clear out all the data + $curve->{datastring} = ''; + $curve->{datastring_meta} = []; + $curve->{datastring_offset} = 0; + } + elsif ( $firstInWindow >= 2 ) + { + # clear out everything that's too old, except for one point. This point + # will be off the plot, but if we're plotting lines there will be a + # connecting line to it. Some of the line will be visible + substr( $curve->{datastring}, 0, + $meta->[$firstInWindow-1]{offset_start} - $curve->{datastring_offset}, + '' ); + $curve->{datastring_offset} = $meta->[$firstInWindow-1]{offset_start}; + } + } +} + +sub plotStoredData +{ + # get the options for those curves that havse any data + my @nonemptyCurves = grep { $_->{datastring} } @curves; + my @extraopts = map {$_->{options}} @nonemptyCurves; + + my $body = join(', ' , map({ "'-' $_" } @extraopts) ); + if($options{'3d'}) { print PIPE "splot $body\n"; } + else { print PIPE "plot $body\n"; } + + foreach my $curve (@nonemptyCurves) + { + print PIPE $curve->{datastring}; + print PIPE "e\n"; + } +} + +sub updateCurveOptions +{ + # generates the 'options' string for a curve, based on its legend title and its other options + # These could be integrated into a single string, but that raises an issue in the no-title + # case. When no title is specified, gnuplot will still add a legend entry with an unhelpful '-' + # label. Thus I explicitly do 'notitle' for that case + + my ($curve, $id) = @_; + + # use the given title, unless we're generating a legend automatically. Given titles + # override autolegend + my $title; + if(defined $curve->{title}) + { $title = $curve->{title}; } + elsif( $options{autolegend} ) + { $title = $id; } + + my $titleoption = defined $title ? "title \"$title\"" : "notitle"; + + my $curvestyleall = ''; + $curvestyleall = $options{curvestyleall} + if defined $options{curvestyleall} && !defined $options{curvestyle_hash}{$id}; + + my $histoptions = $curve->{histoptions} || ''; + + my $usingoptions = ''; + if( $options{timefmt} ) + { + $usingoptions = "using 1:" . ($options{timefmt_Ncols}+1); + } + + $curve->{options} = "$histoptions $usingoptions $titleoption $curve->{extraoptions} $curvestyleall"; +} + +sub getCurve +{ + # This function returns the curve corresponding to a particular label, creating a new curve if + # necessary + + if(scalar @curves >= $options{maxcurves}) + { + print STDERR "Tried to exceed the --maxcurves setting.\n"; + print STDERR "Invoke with a higher --maxcurves limit if you really want to do this.\n"; + exit -1; + } + + my ($id) = @_; + + if( !exists $curveIndices{$id} ) + { + push @curves, {extraoptions => ' ', + datastring => '', + datastring_meta => [], + datastring_offset => 0}; # push a curve with no data and no options + $curveIndices{$id} = $#curves; + + updateCurveOptions($curves[$#curves], $id); + } + return $curves[$curveIndices{$id}]; +} + +sub addCurveOption +{ + my ($id, $str) = @_; + + my $curve = getCurve($id); + $curve->{extraoptions} .= "$str "; + updateCurveOptions($curve, $id); +} + +sub setCurveLabel +{ + my ($id, $str) = @_; + + my $curve = getCurve($id); + $curve->{title} = $str; + updateCurveOptions($curve, $id); +} + +sub setCurveAsHistogram +{ + my ($id, $str) = @_; + + my $curve = getCurve($id); + $curve->{histoptions} = 'using (histbin($2)):(1.0) smooth ' . $options{histstyle}; + + updateCurveOptions($curve, $id); +} + +# remove all the curve data +sub clearCurves +{ + foreach my $curve(@curves) + { + $curve->{datastring} = ''; + $curve->{datastring_meta} = []; + $curve->{datastring_offset} = 0; + } +} + +sub replot +{ + return unless $haveNewData; + $haveNewData = undef; + + return if !$options{stream}; + + + # The logic involving domain rollover replotting due to --monotonic is a bit + # tricky. I want this: + + # if( domain rolls over slowly ) + # { + # should update on a timer; + # when the domain rolls over, --monotonic should force a replot + # } + # if( domain rolls over quickly ) + # { + # should update when the domain rolls over, + # at most as quickly as the timer indicates + # } + + + my ($domain0_numeric, $replot_is_from_timer) = @_; + + my $now = [gettimeofday]; + + if( # If there is no replot timer at all, replot at any indication + $options{stream} < 0 || + + # if the last replot was timer-based, but this one isn't, force a replot. + # This makes sure that a replot happens for a domain rollover shortly + # after a timer replot + !$replot_is_from_timer && $last_replot_is_from_timer || + + # if enough time has elapsed since the last replot, it's ok to replot + tv_interval ( $last_replot_time, $now ) > 0.8*$options{stream} ) + { + # ok, then. We really need to replot + if ( defined $options{xlen} ) + { + # we have an --xlen, so we need to clean out the old data + pruneOldData( $domain0_numeric - $options{xlen} ); + + my ($xmin, $xmax) = ($domain0_numeric - $options{xlen}, $domain0_numeric); + if ( defined $options{timefmt} ) + { + # if we're using a timefmt, I need to convert my xmin range from + # seconds-since-the-epoch BACK to the timefmt. Sheesh + ($xmin, $xmax) = map {Time::Piece->strptime( $_, '%s' )->strftime( $options{timefmt} ) } ($xmin, $xmax); + } + sendRangeCommand( "xrange", $xmin, $xmax ); + } + + plotStoredData(); + + + # update replot state + $last_replot_time = $now; + $last_replot_is_from_timer = $replot_is_from_timer; + } +} + +# function to add a point to the plot. Assumes that the curve indexed by $idx already exists +sub pushPoint +{ + my ($curve, $datastring, $domain0_numeric) = @_; + + push @{$curve->{datastring_meta}}, { offset_start => length( $curve->{datastring} ) + $curve->{datastring_offset}, + domain => $domain0_numeric }; + $curve->{datastring} .= $datastring; + + $haveNewData = 1; +} + + +=head1 NAME + +feedgnuplot - General purpose pipe-oriented plotting tool + +=head1 SYNOPSIS + +Simple plotting of piped data: + + $ seq 5 | awk '{print 2*$1, $1*$1}' + 2 1 + 4 4 + 6 9 + 8 16 + 10 25 + + $ seq 5 | awk '{print 2*$1, $1*$1}' | + feedgnuplot --lines --points --legend 0 "data 0" --title "Test plot" --y2 1 + +Simple real-time plotting example: plot how much data is received on the wlan0 +network interface in bytes/second (uses bash, awk and Linux): + + $ while true; do sleep 1; cat /proc/net/dev; done | + gawk '/wlan0/ {if(b) {print $2-b; fflush()} b=$2}' | + feedgnuplot --lines --stream --xlen 10 --ylabel 'Bytes/sec' --xlabel seconds + +=head1 DESCRIPTION + +This is a flexible, command-line-oriented frontend to Gnuplot. It creates +plots from data coming in on STDIN or given in a filename passed on the +commandline. Various data representations are supported, as is hardcopy +output and streaming display of live data. A simple example: + + $ seq 5 | awk '{print 2*$1, $1*$1}' | feedgnuplot + +You should see a plot with two curves. The C command generates some data to +plot and the C reads it in from STDIN and generates the plot. The +C invocation is just an example; more interesting things would be plotted +in normal usage. No commandline-options are required for the most basic +plotting. Input parsing is flexible; every line need not have the same number of +points. New curves will be created as needed. + +The most commonly used functionality of gnuplot is supported directly by the +script. Anything not directly supported can still be done with the +C<--extracmds> and C<--curvestyle> options. Arbitrary gnuplot commands can be +passed in with C<--extracmds>. For example, to turn off the grid, pass in +C<--extracmds 'unset grid'>. As many of these options as needed can be passed +in. To add arbitrary curve styles, use C<--curvestyle curveID extrastyle>. Pass +these more than once to affect more than one curve. To apply an extra style to +I the curves that lack an explicit C<--curvestyle>, pass in +C<--curvestyleall extrastyle>. + +=head2 Data formats + +By default, each value present in the incoming data represents a distinct data +point, as demonstrated in the original example above (we had 10 numbers in the +input and 10 points in the plot). If requested, the script supports more +sophisticated interpretation of input data + +=head3 Domain selection + +If C<--domain> is passed in, the first value on each line of input is +interpreted as the I-value for the rest of the data on that line. Without +C<--domain> the I-value is the line number, and the first value on a line is +a plain data point like the others. Default is C<--nodomain>. Thus the original +example above produces 2 curves, with B<1,2,3,4,5> as the I-values. If we run +the same command with --domain: + + $ seq 5 | awk '{print 2*$1, $1*$1}' | feedgnuplot --domain + +we get only 1 curve, with B<2,4,6,8,10> as the I-values. As many points as +desired can appear on a single line, but all points on a line are associated +with the I-value at the start of that line. + +=head3 Curve indexing + +By default, each column represents a separate curve. This is fine unless sparse +data is to be plotted. With the C<--dataid> option, each point is represented by +2 values: a string identifying the curve, and the value itself. If we add +C<--dataid> to the original example: + + $ seq 5 | awk '{print 2*$1, $1*$1}' | feedgnuplot --dataid --autolegend + +we get 5 different curves with one point in each. The first column, as produced +by C, is B<2,4,6,8,10>. These are interpreted as the IDs of the curves to +be plotted. The C<--autolegend> option adds a legend using the given IDs to +label the curves. The IDs need not be numbers; generic strings are accepted. As +many points as desired can appear on a single line. C<--domain> can be used in +conjunction with C<--dataid>. + +=head3 Multi-value style support + +Depending on how gnuplot is plotting the data, more than one value may be needed +to represent a single point. For example, the script has support to plot all the +data with C<--circles>. This requires a radius to be specified for each point in +addition to the position of the point. Thus, when plotting with C<--circles>, 2 +numbers are read for each data point instead of 1. A similar situation exists +with C<--colormap> where each point contains the position I the +color. There are other gnuplot styles that require more data (such as error +bars), but none of these are directly supported by the script. They can still be +used, though, by specifying the specific style with C<--curvestyle>, and +specifying how many extra values are needed for each point with +C<--extraValuesPerPoint extra>. C<--extraValuesPerPoint> is ONLY needed for the +styles not explicitly supported; supported styles set that variable +automatically. + +=head3 3D data + +To plot 3D data, pass in C<--3d>. C<--domain> MUST be given when plotting 3D +data to avoid domain ambiguity. If 3D data is being plotted, there are by +definition 2 domain values instead of one (I as a function of I and I +instead of I as a function of I). Thus the first 2 values on each line are +interpreted as the domain instead of just 1. The rest of the processing happens +the same way as before. + +=head3 Time/date data + +If the input data domain is a time/date, this can be interpreted with +C<--timefmt>. This option takes a single argument: the format to use to parse +the data. The format is documented in 'set timefmt' in gnuplot, although the +common flags that C understands are generally supported. The backslash +sequences in the format are I supported, so if you want a tab, put in a tab +instead of \t. Whitespace in the format I supported. When this flag is +given, some other options act a little bit differently: + +=over + +=item + +C<--xlen> is an I in seconds + +=item + +C<--xmin> and C<--xmax> I use the format passed in to C<--timefmt> + +=back + +Using this option changes both the way the input is parsed I the way the +x-axis tics are labelled. Gnuplot tries to be intelligent in this labelling, but +it doesn't always to what the user wants. The labelling can be controlled with +the gnuplot C command, which takes the same type of format string as +C<--timefmt>. Example: + + $ sar 1 -1 | + awk '$1 ~ /..:..:../ && $8 ~/^[0-9\.]*$/ {print $1,$8; fflush()}' | + feedgnuplot --stream --domain + --lines --timefmt '%H:%M:%S' + --extracmds 'set format x "%H:%M:%S"' + +This plots the 'idle' CPU consumption against time. + +Note that while gnuplot supports the time/date on any axis, I +currently supports it I as the x-axis domain. This may change in the +future. + +=head2 Real-time streaming data + +To plot real-time data, pass in the C<--stream [refreshperiod]> option. Data +will then be plotted as it is received. The plot will be updated every +C seconds. If the period isn't specified, a 1Hz refresh rate is +used. To refresh at specific intervals indicated by the data, set the +refreshperiod to 0 or to 'trigger'. The plot will then I be refreshed when +a data line 'replot' is received. This 'replot' command works in both triggered +and timed modes, but in triggered mode, it's the only way to replot. Look in +L for more information. + +To plot only the most recent data (instead of I the data), C<--xlen +windowsize> can be given. This will create an constantly-updating, scrolling +view of the recent past. C should be replaced by the desired length +of the domain window to plot, in domain units (passed-in values if C<--domain> +or line numbers otherwise). If the domain is a time/date via C<--timefmt>, then +C is and I in seconds. + +=head3 Special data commands + +If we are reading streaming data, the input stream can contain special commands +in addition to the raw data. Feedgnuplot looks for these at the start of every +input line. If a command is detected, the rest of the line is discarded. These +commands are + +=over + +=item C + +This command refreshes the plot right now, instead of waiting for the next +refresh time indicated by the timer. This command works in addition to the timed +refresh, as indicated by C<--stream [refreshperiod]>. + +=item C + +This command clears out the current data in the plot. The plotting process +continues, however, to any data following the C. + +=item C + +This command causes feedgnuplot to exit. + +=back + +=head2 Hardcopy output + +The script is able to produce hardcopy output with C<--hardcopy outputfile>. The +output type can be inferred from the filename, if B<.ps>, B<.eps>, B<.pdf>, +B<.svg> or B<.png> is requested. If any other file type is requested, +C<--terminal> I be passed in to tell gnuplot how to make the plot. + +=head2 Self-plotting data files + +This script can be used to enable self-plotting data files. There are 2 ways of +doing this: with a shebang (#!) or with inline perl data. + +=head3 Self-plotting data with a #! + +A self-plotting, executable data file C is formatted as + + $ cat data + #!/usr/bin/feedgnuplot --lines --points + 2 1 + 4 4 + 6 9 + 8 16 + 10 25 + 12 36 + 14 49 + 16 64 + 18 81 + 20 100 + 22 121 + 24 144 + 26 169 + 28 196 + 30 225 + +This is the shebang (#!) line followed by the data, formatted as before. The +data file can be plotted simply with + + $ ./data + +The caveats here are that on Linux the whole #! line is limited to 127 charaters +and that the full path to feedgnuplot must be given. The 127 character limit is +a serious limitation, but this can likely be resolved with a kernel patch. I +have only tried on Linux 2.6. + +=head3 Self-plotting data with perl inline data + +Perl supports storing data and code in the same file. This can also be used to +create self-plotting files: + + $ cat plotdata.pl + #!/usr/bin/perl + use strict; + use warnings; + + open PLOT, "| feedgnuplot --lines --points" or die "Couldn't open plotting pipe"; + while( ) + { + my @xy = split; + print PLOT "@xy\n"; + } + __DATA__ + 2 1 + 4 4 + 6 9 + 8 16 + 10 25 + 12 36 + 14 49 + 16 64 + 18 81 + 20 100 + 22 121 + 24 144 + 26 169 + 28 196 + 30 225 + +This is especially useful if the logged data is not in a format directly +supported by feedgnuplot. Raw data can be stored after the __DATA__ directive, +with a small perl script to manipulate the data into a useable format and send +it to the plotter. + +=head1 ARGUMENTS + +=over + +=item + +--[no]domain + +If enabled, the first element of each line is the domain variable. If not, the +point index is used + +=item + +--[no]dataid + +If enabled, each data point is preceded by the ID of the data set that point +corresponds to. This ID is interpreted as a string, NOT as just a number. If not +enabled, the order of the point is used. + +As an example, if line 3 of the input is "0 9 1 20" then + +=over + +=item + +'--nodomain --nodataid' would parse the 4 numbers as points in 4 different +curves at x=3 + +=item + +'--domain --nodataid' would parse the 4 numbers as points in 3 different +curves at x=0. Here, 0 is the x-variable and 9,1,20 are the data values + +=item + +'--nodomain --dataid' would parse the 4 numbers as points in 2 different +curves at x=3. Here 0 and 1 are the data IDs and 9 and 20 are the +data values + +=item + +'--domain --dataid' would parse the 4 numbers as a single point at +x=0. Here 9 is the data ID and 1 is the data value. 20 is an extra +value, so it is ignored. If another value followed 20, we'd get another +point in curve ID 20 + +=back + +=item + +--[no]3d + +Do [not] plot in 3D. This only makes sense with --domain. Each domain here is an +(x,y) tuple + +=item + +--timefmt [format] + +Interpret the X data as a time/date, parsed with the given format + +=item + +--colormap + +Show a colormapped xy plot. Requires extra data for the color. zmin/zmax can be +used to set the extents of the colors. Automatically increments +C<--extraValuesPerPoint> + +=item + +--stream [period] + +Plot the data as it comes in, in realtime. If period is given, replot every +period seconds. If no period is given, replot at 1Hz. If the period is given as +0 or 'trigger', replot I when the incoming data dictates this. See the +L section of the man page. + +=item + +--[no]lines + +Do [not] draw lines to connect consecutive points + +=item + +--[no]points + +Do [not] draw points + +=item + +--circles + +Plot with circles. This requires a radius be specified for each point. +Automatically increments C<--extraValuesPerPoint>). C supported for 3d +plots. + +=item + +--title xxx + +Set the title of the plot + +=item + +--legend curveID legend + +Set the label for a curve plot. Use this option multiple times for multiple +curves. With --dataid, curveID is the ID. Otherwise, it's the index of the +curve, starting at 0 + +=item + +--autolegend + +Use the curve IDs for the legend. Titles given with --legend override these + +=item + +--xlen xxx + +When using --stream, sets the size of the x-window to plot. Omit this or set it +to 0 to plot ALL the data. Does not make sense with 3d plots. Implies +--monotonic + +=item + +--xmin/xmax/ymin/ymax/y2min/y2max/zmin/zmax xxx + +Set the range for the given axis. These x-axis bounds are ignored in a streaming +plot. The y2-axis bound do not apply in 3d plots. The z-axis bounds apply +I to 3d plots or colormaps. + +=item + +--xlabel/ylabel/y2label/zlabel xxx + +Label the given axis. The y2-axis label does not apply to 3d plots while the +z-axis label applies I to 3d plots. + +=item + +--y2 xxx + +Plot the data specified by this curve ID on the y2 axis. Without --dataid, the +ID is just an ordered 0-based index. Does not apply to 3d plots. Can be passed +multiple times, or passed a comma-separated list. By default the y2-axis curves +look the same as the y-axis ones. I.e. the viewer of the resulting plot has to +be told which is which via an axes label, legend, etc. Prior to version 1.25 of +feedgnuplot the curves plotted on the y2 axis were drawn with a thicker line. +This is no longer the case, but that behavior can be brought back by passing +something like + + --y2 curveid --curvestyle curveid 'linewidth 3' + +=item + +--histogram curveID + + +Set up a this specific curve to plot a histogram. The bin width is given with +the --binwidth option (assumed 1.0 if omitted). --histogram does NOT touch the +drawing style. It is often desired to plot these with boxes, and this MUST be +explicitly requested with --curvestyleall 'with boxes'. This works with --domain +and/or --stream, but in those cases the x-value is used ONLY to cull old data +because of --xlen or --monotonic. I.e. the x-values are NOT drawn in any way. +Can be passed multiple times, or passed a comma- separated list + +=item + +--binwidth width + +The width of bins when making histograms. This setting applies to ALL histograms +in the plot. Defaults to 1.0 if not given. + +=item + +--histstyle style + +Normally, histograms are generated with the 'smooth freq' gnuplot style. +--histstyle can be used to select different 'smooth' settings. Allowed are +'unique', 'cumulative' and 'cnormal'. 'unique' indicates whether a bin has at +least one item in it: instead of counting the items, it'll always report 0 or 1. +'cumulative' is the integral of the "normal" histogram. 'cnormal' is like +'cumulative', but rescaled to end up at 1.0. + +=item + +--curvestyle curveID + +style Additional styles per curve. With --dataid, curveID is the ID. Otherwise, +it's the index of the curve, starting at 0. Use this option multiple times for +multiple curves. --curvestylall does NOT apply to curves that have a +--curvestyle + +=item + +--curvestyleall xxx + +Additional styles for all curves that have no --curvestyle + +=item + +--extracmds xxx + +Additional commands. These could contain extra global styles for instance. Can +be passed multiple times. + +=item + +--square + +Plot data with aspect ratio 1. For 3D plots, this controls the aspect ratio for +all 3 axes + +=item + +--square_xy + +For 3D plots, set square aspect ratio for ONLY the x,y axes + +=item + +--hardcopy xxx + +If not streaming, output to a file specified here. Format inferred from +filename, unless specified by --terminal + +=item + +--terminal xxx + +String passed to 'set terminal'. No attempts are made to validate this. +--hardcopy sets this to some sensible defaults if --hardcopy is given .png, +.pdf, .ps, .eps or .svg. If any other file type is desired, use both --hardcopy +and --terminal + +=item + +--maxcurves xxx + +The maximum allowed number of curves. This is 100 by default, but can be reset +with this option. This exists purely to prevent perl from allocating all of the +system's memory when reading bogus data + +=item + +--monotonic + +If --domain is given, checks to make sure that the x- coordinate in the input +data is monotonically increasing. If a given x-variable is in the past, all data +currently cached for this curve is purged. Without --monotonic, all data is +kept. Does not make sense with 3d plots. No --monotonic by default. The data is +replotted before being purged + +=item + +--extraValuesPerPoint + +xxx How many extra values are given for each data point. Normally this is 0, and +does not need to be specified, but sometimes we want extra data, like for colors +or point sizes or error bars, etc. feedgnuplot options that require this +(colormap, circles) automatically set it. This option is ONLY needed if unknown +styles are used, with --curvestyleall for instance + +=item + +--dump + +Instead of printing to gnuplot, print to STDOUT. Very useful for debugging. It +is possible to send the output produced this way to gnuplot directly. + +=item + +--exit + +Terminate the feedgnuplot process after passing data to gnuplot. The window will +persist but will not be interactive. Without this option feedgnuplot keeps +running and must be killed by the user. Note that this option works only with +later versions of gnuplot and only with some gnuplot terminals. + +=item + +--geometry + +If using X11, specifies the size, position of the plot window + +=item + +--version + +Print the version and exit + +=back + +=head1 RECIPES + +=head2 Basic plotting of piped data + + $ seq 5 | awk '{print 2*$1, $1*$1}' + 2 1 + 4 4 + 6 9 + 8 16 + 10 25 + + $ seq 5 | awk '{print 2*$1, $1*$1}' | + feedgnuplot --lines --points --legend 0 "data 0" --title "Test plot" --y2 1 + +=head2 Realtime plot of network throughput + +Looks at wlan0 on Linux. + + $ while true; do sleep 1; cat /proc/net/dev; done | + gawk '/wlan0/ {if(b) {print $2-b; fflush()} b=$2}' | + feedgnuplot --lines --stream --xlen 10 --ylabel 'Bytes/sec' --xlabel seconds + +=head2 Realtime plot of battery charge in respect to time + +Uses the result of the C command. + + $ while true; do acpi; sleep 15; done | + perl -nE 'BEGIN{ $| = 1; } /([0-9]*)%/; say join(" ", time(), $1);' | + feedgnuplot --stream --ymin 0 --ymax 100 --lines --domain --xlabel 'Time' --timefmt '%s' --ylabel "Battery charge (%)" + +=head2 Realtime plot of temperatures in an IBM Thinkpad + +Uses C, which reports temperatures at various locations +in a Thinkpad. + + $ while true; do cat /proc/acpi/ibm/thermal | awk '{$1=""; print}' ; sleep 1; done | + feedgnuplot --stream --xlen 100 --lines --autolegend --ymax 100 --ymin 20 --ylabel 'Temperature (deg C)' + +=head2 Plotting a histogram of file sizes in a directory + + $ ls -l | awk '{print $5/1e6}' | + feedgnuplot --histogram 0 --curvestyleall 'with boxes' --ymin 0 --xlabel 'File size (MB)' --ylabel Frequency + +=head1 ACKNOWLEDGEMENT + +This program is originally based on the driveGnuPlots.pl script from +Thanassis Tsiodras. It is available from his site at +L + +=head1 REPOSITORY + +L + +=head1 AUTHOR + +Dima Kogan, C<< >> + +=head1 LICENSE AND COPYRIGHT + +Copyright 2011-2012 Dima Kogan. + +This program is free software; you can redistribute it and/or modify it +under the terms of either: the GNU General Public License as published +by the Free Software Foundation; or the Artistic License. + +See http://dev.perl.org/licenses/ for more information. + +=cut