mirror of
https://github.com/dkogan/feedgnuplot.git
synced 2025-05-05 22:11:12 +08:00
Tool to plot realtime and stored data from the commandline, using gnuplot.
![]() $options{y2} and $options{extracmds} now default to [], so I never need to check for defined $options{y2}. This patch also changes some foreach() { dosomething(); } blocks into dosomething() foreach(); |
||
---|---|---|
bin | ||
completions | ||
t | ||
Changes | ||
ignore.txt | ||
INSTALL | ||
LICENSE | ||
Makefile.PL | ||
MANIFEST | ||
README.pod |
#!/usr/bin/perl use strict; use warnings; use Getopt::Long; use Time::HiRes qw( usleep gettimeofday tv_interval ); use IO::Handle; use List::Util qw( first ); use Scalar::Util qw( looks_like_number ); use Text::ParseWords; use threads; use threads::shared; use Thread::Queue; use Pod::Usage; use Time::Piece; my $VERSION = 1.29; my %options; interpretCommandline(); # list containing the plot data. Each element is a hashref of parameters. # $curve->{datastring} is a string of all the data in this curve that can be # sent directly to gnuplot. $curve->{datastring_meta} is a hashref {domain => # ..., offset_start => ...}. offset_start represents a position in the # datastring where this particular data element begins. As the data is culled # with --xlen, the offsets are preserved by using $curve->{datastring_offset} to # represent the offset IN THE ORIGINAL STRING of the current start of the # datastring my @curves = (); # list mapping curve names to their indices in the @curves list my %curveIndices = (); # now start the data acquisition and plotting threads my $dataQueue; # Whether any new data has arrived since the last replot my $haveNewData; # when the last replot happened my $last_replot_time = [gettimeofday]; # whether the previous replot was timer based my $last_replot_is_from_timer = 1; my $streamingFinished : shared = undef; if($options{stream}) { $dataQueue = Thread::Queue->new(); my $addThr = threads->create(\&mainThread); # spawn the plot updating thread. If I'm replotting from a data trigger, I don't need this my $plotThr = threads->create(\&plotUpdateThread) if $options{stream} > 0; while(<>) { chomp; last if /^exit/; # place every line of input to the queue, so that the plotting thread can process it. if we are # using an implicit domain (x = line number), then we send it on the data queue also, since # $. is not meaningful in the plotting thread if(!$options{domain}) { $_ .= " $."; } $dataQueue->enqueue($_); } $streamingFinished = 1; $plotThr->join() if defined $plotThr; $addThr->join(); } else { mainThread(); } sub interpretCommandline { # if I'm using a self-plotting data file with a #! line, then $ARGV[0] will contain ALL of the # options and $ARGV[1] will contain the data file to plot. In this case I need to split $ARGV[0] so # that GetOptions() can parse it correctly. On the other hand, if I'm plotting normally (not with # #!) a file with spaces in the filename, I don't want to split the filename. Hopefully this logic # takes care of both those cases. if (exists $ARGV[0] && !-r $ARGV[0]) { unshift @ARGV, shellwords shift @ARGV; } # everything off by default: # do not stream in the data by default # point plotting by default. # no monotonicity checks by default # normal histograms by default $options{ maxcurves } = 100; $options{ histstyle} = 'freq'; # Previously I was using 'legend=s%' and 'curvestyle=s%' for curve addressing. This had cleaner # syntax, but disregarded the order of the given options. This resulted in arbitrarily ordered # curves. I thus make parse these into lists, and then also make hashes, for later use # needed for these to be parsed into an array-ref $options{legend} = []; $options{curvestyle} = []; $options{histogram} = []; $options{y2} = []; $options{extracmds} = []; GetOptions(\%options, 'stream:s', 'domain!', 'dataid!', '3d!', 'colormap!', 'lines!', 'points!', 'circles', 'legend=s{2}', 'autolegend!', 'xlabel=s', 'ylabel=s', 'y2label=s', 'zlabel=s', 'title=s', 'xlen=f', 'ymin=f', 'ymax=f', 'xmin=s', 'xmax=s', 'y2min=f', 'y2max=f', 'zmin=f', 'zmax=f', 'y2=s@', 'curvestyle=s{2}', 'curvestyleall=s', 'extracmds=s@', 'square!', 'square_xy!', 'hardcopy=s', 'maxcurves=i', 'monotonic!', 'timefmt=s', 'histogram=s@', 'binwidth=f', 'histstyle=s', 'terminal=s', 'extraValuesPerPoint=i', 'help', 'dump', 'exit', 'version', 'geometry=s') or pod2usage( -exitval => 1, -verbose => 1, # synopsis and args -output => \*STDERR ); # handle various cmdline-option errors if ( $options{help} ) { pod2usage( -exitval => 0, -verbose => 1, # synopsis and args -output => \*STDOUT ); } if( $options{version} ) { print "feedgnuplot version $VERSION\n"; exit 0; } # no global style if one isn't given $options{curvestyleall} = '' unless defined $options{curvestyleall}; # expand options that are given as comma-separated lists for my $listkey (qw(histogram y2)) { @{$options{$listkey}} = map split('\s*,\s*', $_), @{$options{$listkey}} if defined $options{$listkey}; } # --legend and --curvestyle options are conceptually hashes, but are parsed as # arrays in order to preserve the ordering. I parse both of these into hashes # because those are useful to have later. After this I can access individual # legends with $options{legend_hash}{curveid} for my $listkey (qw(legend curvestyle)) { $options{"${listkey}_hash"} = {}; my $n = scalar @{$options{$listkey}}/2; foreach my $idx (0..$n-1) { $options{"${listkey}_hash"}{$options{$listkey}[$idx*2]} = $options{$listkey}[$idx*2 + 1]; } } if ( defined $options{hardcopy} && defined $options{stream} ) { print STDERR "--stream doesn't make sense together with --hardcopy\n"; exit -1; } # parse stream option. Allowed only numbers >= 0 or 'trigger'. After this code # $options{stream} is # -1 for triggered replotting # >0 for timed replotting # undef if not streaming if(defined $options{stream}) { # if no streaming period is given, default to 1Hz. $options{stream} = 1 if $options{stream} eq ''; if( !looks_like_number $options{stream} ) { if($options{stream} eq 'trigger') { $options{stream} = 0; } else { print STDERR "--stream can only take in values >=0 or 'trigger'\n"; exit -1; } } if ( $options{stream} == 0 ) { $options{stream} = -1; } elsif ( $options{stream} <= 0) { print STDERR "--stream can only take in values >=0 or 'trigger'\n"; exit -1; } } if ($options{colormap}) { # colormap styles all curves with palette. Seems like there should be a way to do this with a # global setting, but I can't get that to work $options{curvestyleall} .= ' palette'; } if ( $options{'3d'} ) { if ( !$options{domain} ) { print STDERR "--3d only makes sense with --domain\n"; exit -1; } if ( $options{timefmt} ) { print STDERR "--3d makes no sense with --timefmt\n"; exit -1; } if ( defined $options{y2min} || defined $options{y2max} || @{$options{y2}} ) { print STDERR "--3d does not make sense with --y2...\n"; exit -1; } if ( defined $options{xlen} ) { print STDERR "--3d does not make sense with --xlen\n"; exit -1; } if ( defined $options{monotonic} ) { print STDERR "--3d does not make sense with --monotonic\n"; exit -1; } if ( defined $options{binwidth} || @{$options{histogram}} ) { print STDERR "--3d does not make sense with histograms\n"; exit -1; } if ( defined $options{circles} ) { print STDERR "--3d does not make sense with circles (gnuplot doesn't support this)\n"; exit -1; } } else { if ( $options{timefmt} && !$options{domain} ) { print STDERR "--timefmt makes sense only with --domain\n"; exit -1; } if(!$options{colormap}) { if ( defined $options{zmin} || defined $options{zmax} || defined $options{zlabel} ) { print STDERR "--zmin/zmax/zlabel only makes sense with --3d or --colormap\n"; exit -1; } } if ( defined $options{square_xy} ) { print STDERR "--square_xy only makes sense with --3d\n"; exit -1; } } if(defined $options{xlen} && !$options{stream} ) { print STDERR "--xlen does not make sense without --stream\n"; exit -1; } if($options{stream} && defined $options{xlen} && ( defined $options{xmin} || defined $options{xmax})) { print STDERR "With --stream and --xlen the X bounds are set, so neither --xmin nor --xmax make sense\n"; exit -1; } # --xlen implies an order to the data, so I force monotonicity $options{monotonic} = 1 if defined $options{xlen}; if( $options{histstyle} !~ /freq|cum|uniq|cnorm/ ) { print STDERR "unknown histstyle. Allowed are 'freq...', 'cum...', 'uniq...', 'cnorm...'\n"; exit -1; } # deal with timefmt if ( $options{timefmt} ) { # I need to compute a regex to match the time field and I need to count how # many whilespace-separated fields there are. # strip leading and trailing whitespace $options{timefmt} =~ s/^\s*//; $options{timefmt} =~ s/\s*$//; my $Nfields = scalar split( ' ', $options{timefmt}); $options{timefmt_Ncols} = $Nfields; my $regex_str = join( '\s+', ('\S+') x $Nfields ); $options{timefmt_regex} = qr/$regex_str/; # make sure --xlen is an integer. With a timefmt xlen goes through strptime # and strftime, and those are integer-only if( defined $options{xlen} ) { if( $options{xlen} - int($options{xlen}) ) { say STDERR "When streaming --xlen MUST be an integer. Rounding up to the nearest second"; $options{xlen} = 1 + int($options{xlen}); } } } } sub getGnuplotVersion { open(GNUPLOT_VERSION, 'gnuplot --version |') or die "Couldn't run gnuplot"; my ($gnuplotVersion) = <GNUPLOT_VERSION> =~ /gnuplot\s*(\d*\.\d*)/; if (!$gnuplotVersion) { print STDERR "Couldn't find the version of gnuplot. Does it work? Trying anyway...\n"; $gnuplotVersion = 0; } close(GNUPLOT_VERSION); return $gnuplotVersion; } sub plotUpdateThread { while(! $streamingFinished) { usleep( $options{stream} * 1e6 ); # indicate that the timer was the replot source $dataQueue->enqueue('replot timertick'); } $dataQueue->enqueue(undef); } sub sendRangeCommand { my ($name, $min, $max) = @_; return unless defined $min || defined $max; if( defined $min ) { $min = "\"$min\""; } else { $min = ''; } if( defined $max ) { $max = "\"$max\""; } else { $max = ''; } my $cmd = "set $name [$min:$max]\n"; print PIPE $cmd; } sub makeDomainNumeric { my ($domain0) = @_; if ( $options{timefmt} ) { my $timepiece = Time::Piece->strptime( $domain0, $options{timefmt} ) or die "Couldn't parse time format. String '$domain0' doesn't fit format '$options{timefmt}'"; return $timepiece->epoch(); } return $domain0; } sub mainThread { my $valuesPerPoint = 1; if($options{extraValuesPerPoint}) { $valuesPerPoint += $options{extraValuesPerPoint}; } if($options{colormap}) { $valuesPerPoint++; } if($options{circles} ) { $valuesPerPoint++; } local *PIPE; my $dopersist = ''; if( !$options{stream} && getGnuplotVersion() >= 4.3) { $dopersist = '--persist'; } if(exists $options{dump}) { *PIPE = *STDOUT; } else { my $geometry = defined $options{geometry} ? "-geometry $options{geometry}" : ''; open PIPE, "|gnuplot $geometry $dopersist" or die "Can't initialize gnuplot\n"; } autoflush PIPE 1; my $outputfile; my $outputfileType; if( defined $options{hardcopy}) { $outputfile = $options{hardcopy}; if( $outputfile =~ /^[^|] # starts with anything other than | .* # stuff in the middle \.(eps|ps|pdf|png|svg)$/ix) # ends with a known extension { $outputfileType = lc $1; } my %terminalOpts = ( eps => 'postscript solid color enhanced eps', ps => 'postscript solid color landscape 10', pdf => 'pdfcairo solid color font ",10" size 11in,8.5in', png => 'png size 1280,1024', svg => 'svg'); if( !defined $options{terminal} && defined $outputfileType && $terminalOpts{$outputfileType} ) { $options{terminal} = $terminalOpts{$outputfileType}; } die "Asked to plot to file '$outputfile', but I don't know which terminal to use, and no --terminal given" unless $options{terminal}; } print PIPE "set terminal $options{terminal}\n" if $options{terminal}; print PIPE "set output \"$outputfile\"\n" if $outputfile; # set up plotting style my $style = ''; if($options{lines}) { $style .= 'lines';} if($options{points}) { $style .= 'points';} if($options{circles}) { $options{curvestyleall} = "with circles $options{curvestyleall}"; } print PIPE "set style data $style\n" if $style; print PIPE "set grid\n"; print(PIPE "set xlabel \"$options{xlabel }\"\n") if defined $options{xlabel}; print(PIPE "set ylabel \"$options{ylabel }\"\n") if defined $options{ylabel}; print(PIPE "set zlabel \"$options{zlabel }\"\n") if defined $options{zlabel}; print(PIPE "set y2label \"$options{y2label}\"\n") if defined $options{y2label}; print(PIPE "set title \"$options{title }\"\n") if defined $options{title}; if($options{square}) { # set a square aspect ratio. Gnuplot does this differently for 2D and 3D plots if(! $options{'3d'}) { print(PIPE "set size ratio -1\n"); } else { print(PIPE "set view equal xyz\n"); } } if($options{square_xy}) { print(PIPE "set view equal xy\n"); } # For the specified values, set the legend entries to 'title "blah blah"' if(@{$options{legend}}) { # @{$options{legend}} is a list where consecutive pairs are (curveID, # legend). I use $options{legend} here instead of $options{legend_hash} # because I create a new curve when I see a new one, and the hash is # unordered, thus messing up the ordering my $n = scalar @{$options{legend}}/2; foreach my $idx (0..$n-1) { setCurveLabel($options{legend}[$idx*2 ], $options{legend}[$idx*2 + 1]); } } # add the extra curve options if(@{$options{curvestyle}}) { # @{$options{curvestyle}} is a list where consecutive pairs are (curveID, # style). I use $options{curvestyle} here instead of # $options{curvestyle_hash} because I create a new curve when I see a new # one, and the hash is unordered, thus messing up the ordering my $n = scalar @{$options{curvestyle}}/2; foreach my $idx (0..$n-1) { addCurveOption($options{curvestyle}[$idx*2 ], $options{curvestyle}[$idx*2 + 1]); } } # For the values requested to be printed on the y2 axis, set that addCurveOption($_, 'axes x1y2') foreach (@{$options{y2}}); # timefmt if( $options{timefmt} ) { print(PIPE "set timefmt '$options{timefmt}'\n"); print(PIPE "set xdata time\n"); } # add the extra global options print(PIPE "$_\n") foreach (@{$options{extracmds}}); # set up histograms $options{binwidth} ||= 1; # if no binwidth given, set it to 1 print PIPE "set boxwidth $options{binwidth}\n" . "histbin(x) = $options{binwidth} * floor(0.5 + x/$options{binwidth})\n"; setCurveAsHistogram( $_ ) foreach (@{$options{histogram}}); # regexp for a possibly floating point, possibly scientific notation number my $numRE = '-?\d*\.?\d+(?:[Ee][-+]?\d+)?'; my $domainRE = $options{timefmt_regex} || $numRE; # a point may be preceded by an id my $pointRE = $options{dataid} ? '(\S+)\s+' : '()'; $pointRE .= '(' . join('\s+', ($numRE) x $valuesPerPoint) . ')'; $pointRE = qr/$pointRE/; # set all the axis ranges # If a bound isn't given I want to set it to the empty string, so I can communicate it simply to # gnuplot print PIPE "set xtics\n"; if(@{$options{y2}}) { print PIPE "set ytics nomirror\n"; print PIPE "set y2tics\n"; # if any of the ranges are given, set the range sendRangeCommand( "y2range", $options{y2min}, $options{y2max} ); } # if any of the ranges are given, set the range sendRangeCommand( "xrange", $options{xmin}, $options{xmax} ); sendRangeCommand( "yrange", $options{ymin}, $options{ymax} ); sendRangeCommand( "zrange", $options{zmin}, $options{zmax} ); sendRangeCommand( "cbrange", $options{zmin}, $options{zmax} ) if($options{colormap}); # latest domain variable present in our data my $latestX; # The domain of the current point my @domain; # The x-axis domain represented as a number. This is exactly the same as # $domain[0] unless the x-axis domain uses a timefmt. Then this is the # number of seconds since the UNIX epoch. my $domain0_numeric; # I should be using the // operator, but I'd like to be compatible with perl 5.8 while( $_ = (defined $dataQueue ? $dataQueue->dequeue() : <>)) { next if /^#/o; if( $options{stream} ) { if(/^clear/o ) { clearCurves(); next; } if(/^replot/o ) { # /timertick/ determines if the timer was the source of the replot replot( $domain0_numeric, /timertick/ ); next; } # /exit/ is handled in the data-reading thread } if(! /^replot/o) { # parse the incoming data lines. The format is # x id0 dat0 id1 dat1 .... # where idX is the ID of the curve that datX corresponds to # # $options{domain} indicates whether the initial 'x' is given or not (if not, the line # number is used) # $options{dataid} indicates whether idX is given or not (if not, the point order in the # line is used) # 3d plots require $options{domain}, and dictate "x y" for the domain instead of just "x" if($options{domain}) { /($domainRE)/go or next; $domain[0] = $1; $domain0_numeric = makeDomainNumeric( $domain[0] ); if($options{'3d'}) { /($numRE)/go or next; $domain[1] = $1; } elsif( $options{monotonic} ) { if( defined $latestX && $domain0_numeric < $latestX ) { # the x-coordinate of the new point is in the past, so I wipe out # all the data and start anew. Before I wipe the old data, I # replot the old data replot( $domain0_numeric ); clearCurves(); $latestX = undef; } else { $latestX = $domain0_numeric; } } } else { # since $. is not meaningful in the plotting thread if we're using the data queue, we pass # $. on the data queue in that case if(defined $dataQueue) { s/ ([\d]+)$//o; $domain[0] = $1; } else { $domain[0] = $.; } $domain0_numeric = makeDomainNumeric( $domain[0] ); } my $id = -1; while (/$pointRE/go) { if($1 ne '') {$id = $1;} else {$id++; } pushPoint(getCurve($id), "@domain $2\n", $domain0_numeric); } } } # if we were streaming, we're now done! if( $options{stream} ) { return; } # finished reading in all. Plot what we have plotStoredData(); if ( defined $options{hardcopy}) { print PIPE "set output\n"; # sleep until the plot file exists, and it is closed. Sometimes the output # is still being written at this point. If the output filename starts with # '|', gnuplot pipes the output to that process, instead of writing to a # file. In that case I don't make sure the file exists, since there IS not # file if( $options{hardcopy} !~ /^\|/ ) { usleep(100_000) until -e $outputfile; usleep(100_000) until(system("fuser -s \"$outputfile\"")); } print "Wrote output to $outputfile\n"; return; } # we persist gnuplot, so we shouldn't need this sleep. However, once # gnuplot exits, but the persistent window sticks around, you can no # longer interactively zoom the plot. So we still sleep sleep(100000) unless $options{dump} || $options{exit}; } sub pruneOldData { my ($oldestx) = @_; foreach my $curve (@curves) { next unless $curve->{datastring}; my $meta = $curve->{datastring_meta}; my $firstInWindow = first {$meta->[$_]{domain} >= $oldestx} 0..$#$meta; if ( !defined $firstInWindow ) { # everything is too old. Clear out all the data $curve->{datastring} = ''; $curve->{datastring_meta} = []; $curve->{datastring_offset} = 0; } elsif ( $firstInWindow >= 2 ) { # clear out everything that's too old, except for one point. This point # will be off the plot, but if we're plotting lines there will be a # connecting line to it. Some of the line will be visible substr( $curve->{datastring}, 0, $meta->[$firstInWindow-1]{offset_start} - $curve->{datastring_offset}, '' ); $curve->{datastring_offset} = $meta->[$firstInWindow-1]{offset_start}; } } } sub plotStoredData { # get the options for those curves that havse any data my @nonemptyCurves = grep { $_->{datastring} } @curves; my @extraopts = map {$_->{options}} @nonemptyCurves; my $body = join(', ' , map({ "'-' $_" } @extraopts) ); if($options{'3d'}) { print PIPE "splot $body\n"; } else { print PIPE "plot $body\n"; } foreach my $curve (@nonemptyCurves) { print PIPE $curve->{datastring}; print PIPE "e\n"; } } sub updateCurveOptions { # generates the 'options' string for a curve, based on its legend title and its other options # These could be integrated into a single string, but that raises an issue in the no-title # case. When no title is specified, gnuplot will still add a legend entry with an unhelpful '-' # label. Thus I explicitly do 'notitle' for that case my ($curve, $id) = @_; # use the given title, unless we're generating a legend automatically. Given titles # override autolegend my $title; if(defined $curve->{title}) { $title = $curve->{title}; } elsif( $options{autolegend} ) { $title = $id; } my $titleoption = defined $title ? "title \"$title\"" : "notitle"; my $curvestyleall = ''; $curvestyleall = $options{curvestyleall} if defined $options{curvestyleall} && !defined $options{curvestyle_hash}{$id}; my $histoptions = $curve->{histoptions} || ''; my $usingoptions = ''; if( $options{timefmt} ) { $usingoptions = "using 1:" . ($options{timefmt_Ncols}+1); } $curve->{options} = "$histoptions $usingoptions $titleoption $curve->{extraoptions} $curvestyleall"; } sub getCurve { # This function returns the curve corresponding to a particular label, creating a new curve if # necessary if(scalar @curves >= $options{maxcurves}) { print STDERR "Tried to exceed the --maxcurves setting.\n"; print STDERR "Invoke with a higher --maxcurves limit if you really want to do this.\n"; exit -1; } my ($id) = @_; if( !exists $curveIndices{$id} ) { push @curves, {extraoptions => ' ', datastring => '', datastring_meta => [], datastring_offset => 0}; # push a curve with no data and no options $curveIndices{$id} = $#curves; updateCurveOptions($curves[$#curves], $id); } return $curves[$curveIndices{$id}]; } sub addCurveOption { my ($id, $str) = @_; my $curve = getCurve($id); $curve->{extraoptions} .= "$str "; updateCurveOptions($curve, $id); } sub setCurveLabel { my ($id, $str) = @_; my $curve = getCurve($id); $curve->{title} = $str; updateCurveOptions($curve, $id); } sub setCurveAsHistogram { my ($id, $str) = @_; my $curve = getCurve($id); $curve->{histoptions} = 'using (histbin($2)):(1.0) smooth ' . $options{histstyle}; updateCurveOptions($curve, $id); } # remove all the curve data sub clearCurves { foreach my $curve(@curves) { $curve->{datastring} = ''; $curve->{datastring_meta} = []; $curve->{datastring_offset} = 0; } } sub replot { return unless $haveNewData; $haveNewData = undef; return if !$options{stream}; # The logic involving domain rollover replotting due to --monotonic is a bit # tricky. I want this: # if( domain rolls over slowly ) # { # should update on a timer; # when the domain rolls over, --monotonic should force a replot # } # if( domain rolls over quickly ) # { # should update when the domain rolls over, # at most as quickly as the timer indicates # } my ($domain0_numeric, $replot_is_from_timer) = @_; my $now = [gettimeofday]; if( # If there is no replot timer at all, replot at any indication $options{stream} < 0 || # if the last replot was timer-based, but this one isn't, force a replot. # This makes sure that a replot happens for a domain rollover shortly # after a timer replot !$replot_is_from_timer && $last_replot_is_from_timer || # if enough time has elapsed since the last replot, it's ok to replot tv_interval ( $last_replot_time, $now ) > 0.8*$options{stream} ) { # ok, then. We really need to replot if ( defined $options{xlen} ) { # we have an --xlen, so we need to clean out the old data pruneOldData( $domain0_numeric - $options{xlen} ); my ($xmin, $xmax) = ($domain0_numeric - $options{xlen}, $domain0_numeric); if ( defined $options{timefmt} ) { # if we're using a timefmt, I need to convert my xmin range from # seconds-since-the-epoch BACK to the timefmt. Sheesh ($xmin, $xmax) = map {Time::Piece->strptime( $_, '%s' )->strftime( $options{timefmt} ) } ($xmin, $xmax); } sendRangeCommand( "xrange", $xmin, $xmax ); } plotStoredData(); # update replot state $last_replot_time = $now; $last_replot_is_from_timer = $replot_is_from_timer; } } # function to add a point to the plot. Assumes that the curve indexed by $idx already exists sub pushPoint { my ($curve, $datastring, $domain0_numeric) = @_; push @{$curve->{datastring_meta}}, { offset_start => length( $curve->{datastring} ) + $curve->{datastring_offset}, domain => $domain0_numeric }; $curve->{datastring} .= $datastring; $haveNewData = 1; } =head1 NAME feedgnuplot - General purpose pipe-oriented plotting tool =head1 SYNOPSIS Simple plotting of piped data: $ seq 5 | awk '{print 2*$1, $1*$1}' 2 1 4 4 6 9 8 16 10 25 $ seq 5 | awk '{print 2*$1, $1*$1}' | feedgnuplot --lines --points --legend 0 "data 0" --title "Test plot" --y2 1 --terminal 'dumb 80,40' --exit Test plot 10 ++------+--------+-------+-------+-------+--------+-------+------*A 25 + + + + + + + + **#+ | : : : : : : data 0+**A*** | | : : : : : : :** # | 9 ++.......................................................**.##....| | : : : : : : ** :# | | : : : : : : ** # | | : : : : : :** ##: ++ 20 8 ++................................................A....#..........| | : : : : : **: # : | | : : : : : ** : ## : | | : : : : : ** :# : | | : : : : :** B : | 7 ++......................................**......##................| | : : : : ** : ## : : ++ 15 | : : : : ** : # : : | | : : : :** : ## : : | 6 ++..............................*A.......##.......................| | : : : ** : ##: : : | | : : : ** : # : : : | | : : :** : ## : : : ++ 10 5 ++......................**........##..............................| | : : ** : #B : : : | | : : ** : ## : : : : | | : :** : ## : : : : | 4 ++...............A.......###......................................| | : **: ##: : : : : | | : ** : ## : : : : : ++ 5 | : ** : ## : : : : : | | :** ##B# : : : : : | 3 ++.....**..####...................................................| | **#### : : : : : : | | **## : : : : : : : | B** + + + + + + + + 2 A+------+--------+-------+-------+-------+--------+-------+------++ 0 1 1.5 2 2.5 3 3.5 4 4.5 5 Simple real-time plotting example: plot how much data is received on the wlan0 network interface in bytes/second (uses bash, awk and Linux): $ while true; do sleep 1; cat /proc/net/dev; done | gawk '/wlan0/ {if(b) {print $2-b; fflush()} b=$2}' | feedgnuplot --lines --stream --xlen 10 --ylabel 'Bytes/sec' --xlabel seconds =head1 DESCRIPTION This is a flexible, command-line-oriented frontend to Gnuplot. It creates plots from data coming in on STDIN or given in a filename passed on the commandline. Various data representations are supported, as is hardcopy output and streaming display of live data. A simple example: $ seq 5 | awk '{print 2*$1, $1*$1}' | feedgnuplot You should see a plot with two curves. The C<awk> command generates some data to plot and the C<feedgnuplot> reads it in from STDIN and generates the plot. The C<awk> invocation is just an example; more interesting things would be plotted in normal usage. No commandline-options are required for the most basic plotting. Input parsing is flexible; every line need not have the same number of points. New curves will be created as needed. The most commonly used functionality of gnuplot is supported directly by the script. Anything not directly supported can still be done with the C<--extracmds> and C<--curvestyle> options. Arbitrary gnuplot commands can be passed in with C<--extracmds>. For example, to turn off the grid, pass in C<--extracmds 'unset grid'>. As many of these options as needed can be passed in. To add arbitrary curve styles, use C<--curvestyle curveID extrastyle>. Pass these more than once to affect more than one curve. To apply an extra style to I<all> the curves that lack an explicit C<--curvestyle>, pass in C<--curvestyleall extrastyle>. =head2 Data formats By default, each value present in the incoming data represents a distinct data point, as demonstrated in the original example above (we had 10 numbers in the input and 10 points in the plot). If requested, the script supports more sophisticated interpretation of input data =head3 Domain selection If C<--domain> is passed in, the first value on each line of input is interpreted as the I<X>-value for the rest of the data on that line. Without C<--domain> the I<X>-value is the line number, and the first value on a line is a plain data point like the others. Default is C<--nodomain>. Thus the original example above produces 2 curves, with B<1,2,3,4,5> as the I<X>-values. If we run the same command with --domain: $ seq 5 | awk '{print 2*$1, $1*$1}' | feedgnuplot --domain we get only 1 curve, with B<2,4,6,8,10> as the I<X>-values. As many points as desired can appear on a single line, but all points on a line are associated with the I<X>-value at the start of that line. =head3 Curve indexing By default, each column represents a separate curve. This is fine unless sparse data is to be plotted. With the C<--dataid> option, each point is represented by 2 values: a string identifying the curve, and the value itself. If we add C<--dataid> to the original example: $ seq 5 | awk '{print 2*$1, $1*$1}' | feedgnuplot --dataid --autolegend we get 5 different curves with one point in each. The first column, as produced by C<awk>, is B<2,4,6,8,10>. These are interpreted as the IDs of the curves to be plotted. The C<--autolegend> option adds a legend using the given IDs to label the curves. The IDs need not be numbers; generic strings are accepted. As many points as desired can appear on a single line. C<--domain> can be used in conjunction with C<--dataid>. =head3 Multi-value style support Depending on how gnuplot is plotting the data, more than one value may be needed to represent a single point. For example, the script has support to plot all the data with C<--circles>. This requires a radius to be specified for each point in addition to the position of the point. Thus, when plotting with C<--circles>, 2 numbers are read for each data point instead of 1. A similar situation exists with C<--colormap> where each point contains the position I<and> the color. There are other gnuplot styles that require more data (such as error bars), but none of these are directly supported by the script. They can still be used, though, by specifying the specific style with C<--curvestyle>, and specifying how many extra values are needed for each point with C<--extraValuesPerPoint extra>. C<--extraValuesPerPoint> is ONLY needed for the styles not explicitly supported; supported styles set that variable automatically. =head3 3D data To plot 3D data, pass in C<--3d>. C<--domain> MUST be given when plotting 3D data to avoid domain ambiguity. If 3D data is being plotted, there are by definition 2 domain values instead of one (I<Z> as a function of I<X> and I<Y> instead of I<Y> as a function of I<X>). Thus the first 2 values on each line are interpreted as the domain instead of just 1. The rest of the processing happens the same way as before. =head3 Time/date data If the input data domain is a time/date, this can be interpreted with C<--timefmt>. This option takes a single argument: the format to use to parse the data. The format is documented in 'set timefmt' in gnuplot, although the common flags that C<strftime> understands are generally supported. The backslash sequences in the format are I<not> supported, so if you want a tab, put in a tab instead of \t. Whitespace in the format I<is> supported. When this flag is given, some other options act a little bit differently: =over =item C<--xlen> is an I<integer> in seconds =item C<--xmin> and C<--xmax> I<must> use the format passed in to C<--timefmt> =back Using this option changes both the way the input is parsed I<and> the way the x-axis tics are labelled. Gnuplot tries to be intelligent in this labelling, but it doesn't always to what the user wants. The labelling can be controlled with the gnuplot C<set format> command, which takes the same type of format string as C<--timefmt>. Example: $ sar 1 -1 | awk '$1 ~ /..:..:../ && $8 ~/^[0-9\.]*$/ {print $1,$8; fflush()}' | feedgnuplot --stream --domain --lines --timefmt '%H:%M:%S' --extracmds 'set format x "%H:%M:%S"' This plots the 'idle' CPU consumption against time. Note that while gnuplot supports the time/date on any axis, I<feedgnuplot> currently supports it I<only> as the x-axis domain. This may change in the future. =head2 Real-time streaming data To plot real-time data, pass in the C<--stream [refreshperiod]> option. Data will then be plotted as it is received. The plot will be updated every C<refreshperiod> seconds. If the period isn't specified, a 1Hz refresh rate is used. To refresh at specific intervals indicated by the data, set the refreshperiod to 0 or to 'trigger'. The plot will then I<only> be refreshed when a data line 'replot' is received. This 'replot' command works in both triggered and timed modes, but in triggered mode, it's the only way to replot. Look in L</"Special data commands"> for more information. To plot only the most recent data (instead of I<all> the data), C<--xlen windowsize> can be given. This will create an constantly-updating, scrolling view of the recent past. C<windowsize> should be replaced by the desired length of the domain window to plot, in domain units (passed-in values if C<--domain> or line numbers otherwise). If the domain is a time/date via C<--timefmt>, then C<windowsize> is and I<integer> in seconds. =head3 Special data commands If we are reading streaming data, the input stream can contain special commands in addition to the raw data. Feedgnuplot looks for these at the start of every input line. If a command is detected, the rest of the line is discarded. These commands are =over =item C<replot> This command refreshes the plot right now, instead of waiting for the next refresh time indicated by the timer. This command works in addition to the timed refresh, as indicated by C<--stream [refreshperiod]>. =item C<clear> This command clears out the current data in the plot. The plotting process continues, however, to any data following the C<clear>. =item C<exit> This command causes feedgnuplot to exit. =back =head2 Hardcopy output The script is able to produce hardcopy output with C<--hardcopy outputfile>. The output type can be inferred from the filename, if B<.ps>, B<.eps>, B<.pdf>, B<.svg> or B<.png> is requested. If any other file type is requested, C<--terminal> I<must> be passed in to tell gnuplot how to make the plot. =head2 Self-plotting data files This script can be used to enable self-plotting data files. There are 2 ways of doing this: with a shebang (#!) or with inline perl data. =head3 Self-plotting data with a #! A self-plotting, executable data file C<data> is formatted as $ cat data #!/usr/bin/feedgnuplot --lines --points 2 1 4 4 6 9 8 16 10 25 12 36 14 49 16 64 18 81 20 100 22 121 24 144 26 169 28 196 30 225 This is the shebang (#!) line followed by the data, formatted as before. The data file can be plotted simply with $ ./data The caveats here are that on Linux the whole #! line is limited to 127 characters and that the full path to feedgnuplot must be given. The 127 character limit is a serious limitation, but this can likely be resolved with a kernel patch. I have only tried on Linux 2.6. =head3 Self-plotting data with perl inline data Perl supports storing data and code in the same file. This can also be used to create self-plotting files: $ cat plotdata.pl #!/usr/bin/perl use strict; use warnings; open PLOT, "| feedgnuplot --lines --points" or die "Couldn't open plotting pipe"; while( <DATA> ) { my @xy = split; print PLOT "@xy\n"; } __DATA__ 2 1 4 4 6 9 8 16 10 25 12 36 14 49 16 64 18 81 20 100 22 121 24 144 26 169 28 196 30 225 This is especially useful if the logged data is not in a format directly supported by feedgnuplot. Raw data can be stored after the __DATA__ directive, with a small perl script to manipulate the data into a useable format and send it to the plotter. =head1 ARGUMENTS =over =item --[no]domain If enabled, the first element of each line is the domain variable. If not, the point index is used =item --[no]dataid If enabled, each data point is preceded by the ID of the data set that point corresponds to. This ID is interpreted as a string, NOT as just a number. If not enabled, the order of the point is used. As an example, if line 3 of the input is "0 9 1 20" then =over =item '--nodomain --nodataid' would parse the 4 numbers as points in 4 different curves at x=3 =item '--domain --nodataid' would parse the 4 numbers as points in 3 different curves at x=0. Here, 0 is the x-variable and 9,1,20 are the data values =item '--nodomain --dataid' would parse the 4 numbers as points in 2 different curves at x=3. Here 0 and 1 are the data IDs and 9 and 20 are the data values =item '--domain --dataid' would parse the 4 numbers as a single point at x=0. Here 9 is the data ID and 1 is the data value. 20 is an extra value, so it is ignored. If another value followed 20, we'd get another point in curve ID 20 =back =item --[no]3d Do [not] plot in 3D. This only makes sense with --domain. Each domain here is an (x,y) tuple =item --timefmt [format] Interpret the X data as a time/date, parsed with the given format =item --colormap Show a colormapped xy plot. Requires extra data for the color. zmin/zmax can be used to set the extents of the colors. Automatically increments C<--extraValuesPerPoint> =item --stream [period] Plot the data as it comes in, in realtime. If period is given, replot every period seconds. If no period is given, replot at 1Hz. If the period is given as 0 or 'trigger', replot I<only> when the incoming data dictates this. See the L</"Real-time streaming data"> section of the man page. =item --[no]lines Do [not] draw lines to connect consecutive points =item --[no]points Do [not] draw points =item --circles Plot with circles. This requires a radius be specified for each point. Automatically increments C<--extraValuesPerPoint>). C<Not> supported for 3d plots. =item --title xxx Set the title of the plot =item --legend curveID legend Set the label for a curve plot. Use this option multiple times for multiple curves. With --dataid, curveID is the ID. Otherwise, it's the index of the curve, starting at 0 =item --autolegend Use the curve IDs for the legend. Titles given with --legend override these =item --xlen xxx When using --stream, sets the size of the x-window to plot. Omit this or set it to 0 to plot ALL the data. Does not make sense with 3d plots. Implies --monotonic =item --xmin/xmax/ymin/ymax/y2min/y2max/zmin/zmax xxx Set the range for the given axis. These x-axis bounds are ignored in a streaming plot. The y2-axis bound do not apply in 3d plots. The z-axis bounds apply I<only> to 3d plots or colormaps. =item --xlabel/ylabel/y2label/zlabel xxx Label the given axis. The y2-axis label does not apply to 3d plots while the z-axis label applies I<only> to 3d plots. =item --y2 xxx Plot the data specified by this curve ID on the y2 axis. Without --dataid, the ID is just an ordered 0-based index. Does not apply to 3d plots. Can be passed multiple times, or passed a comma-separated list. By default the y2-axis curves look the same as the y-axis ones. I.e. the viewer of the resulting plot has to be told which is which via an axes label, legend, etc. Prior to version 1.25 of feedgnuplot the curves plotted on the y2 axis were drawn with a thicker line. This is no longer the case, but that behavior can be brought back by passing something like --y2 curveid --curvestyle curveid 'linewidth 3' =item --histogram curveID Set up a this specific curve to plot a histogram. The bin width is given with the --binwidth option (assumed 1.0 if omitted). --histogram does NOT touch the drawing style. It is often desired to plot these with boxes, and this MUST be explicitly requested with --curvestyleall 'with boxes'. This works with --domain and/or --stream, but in those cases the x-value is used ONLY to cull old data because of --xlen or --monotonic. I.e. the x-values are NOT drawn in any way. Can be passed multiple times, or passed a comma- separated list =item --binwidth width The width of bins when making histograms. This setting applies to ALL histograms in the plot. Defaults to 1.0 if not given. =item --histstyle style Normally, histograms are generated with the 'smooth freq' gnuplot style. --histstyle can be used to select different 'smooth' settings. Allowed are 'unique', 'cumulative' and 'cnormal'. 'unique' indicates whether a bin has at least one item in it: instead of counting the items, it'll always report 0 or 1. 'cumulative' is the integral of the "normal" histogram. 'cnormal' is like 'cumulative', but rescaled to end up at 1.0. =item --curvestyle curveID style Additional styles per curve. With --dataid, curveID is the ID. Otherwise, it's the index of the curve, starting at 0. Use this option multiple times for multiple curves. --curvestylall does NOT apply to curves that have a --curvestyle =item --curvestyleall xxx Additional styles for all curves that have no --curvestyle =item --extracmds xxx Additional commands. These could contain extra global styles for instance. Can be passed multiple times. =item --square Plot data with aspect ratio 1. For 3D plots, this controls the aspect ratio for all 3 axes =item --square_xy For 3D plots, set square aspect ratio for ONLY the x,y axes =item --hardcopy xxx If not streaming, output to a file specified here. Format inferred from filename, unless specified by --terminal =item --terminal xxx String passed to 'set terminal'. No attempts are made to validate this. --hardcopy sets this to some sensible defaults if --hardcopy is given .png, .pdf, .ps, .eps or .svg. If any other file type is desired, use both --hardcopy and --terminal =item --maxcurves xxx The maximum allowed number of curves. This is 100 by default, but can be reset with this option. This exists purely to prevent perl from allocating all of the system's memory when reading bogus data =item --monotonic If --domain is given, checks to make sure that the x- coordinate in the input data is monotonically increasing. If a given x-variable is in the past, all data currently cached for this curve is purged. Without --monotonic, all data is kept. Does not make sense with 3d plots. No --monotonic by default. The data is replotted before being purged =item --extraValuesPerPoint xxx How many extra values are given for each data point. Normally this is 0, and does not need to be specified, but sometimes we want extra data, like for colors or point sizes or error bars, etc. feedgnuplot options that require this (colormap, circles) automatically set it. This option is ONLY needed if unknown styles are used, with --curvestyleall for instance =item --dump Instead of printing to gnuplot, print to STDOUT. Very useful for debugging. It is possible to send the output produced this way to gnuplot directly. =item --exit Terminate the feedgnuplot process after passing data to gnuplot. The window will persist but will not be interactive. Without this option feedgnuplot keeps running and must be killed by the user. Note that this option works only with later versions of gnuplot and only with some gnuplot terminals. =item --geometry If using X11, specifies the size, position of the plot window =item --version Print the version and exit =back =head1 RECIPES =head2 Basic plotting of piped data $ seq 5 | awk '{print 2*$1, $1*$1}' 2 1 4 4 6 9 8 16 10 25 $ seq 5 | awk '{print 2*$1, $1*$1}' | feedgnuplot --lines --points --legend 0 "data 0" --title "Test plot" --y2 1 =head2 Realtime plot of network throughput Looks at wlan0 on Linux. $ while true; do sleep 1; cat /proc/net/dev; done | gawk '/wlan0/ {if(b) {print $2-b; fflush()} b=$2}' | feedgnuplot --lines --stream --xlen 10 --ylabel 'Bytes/sec' --xlabel seconds =head2 Realtime plot of battery charge in respect to time Uses the result of the C<acpi> command. $ while true; do acpi; sleep 15; done | perl -nE 'BEGIN{ $| = 1; } /([0-9]*)%/; say join(" ", time(), $1);' | feedgnuplot --stream --ymin 0 --ymax 100 --lines --domain --xlabel 'Time' --timefmt '%s' --ylabel "Battery charge (%)" =head2 Realtime plot of temperatures in an IBM Thinkpad Uses C</proc/acpi/ibm/thermal>, which reports temperatures at various locations in a Thinkpad. $ while true; do cat /proc/acpi/ibm/thermal | awk '{$1=""; print}' ; sleep 1; done | feedgnuplot --stream --xlen 100 --lines --autolegend --ymax 100 --ymin 20 --ylabel 'Temperature (deg C)' =head2 Plotting a histogram of file sizes in a directory $ ls -l | awk '{print $5/1e6}' | feedgnuplot --histogram 0 --curvestyleall 'with boxes' --ymin 0 --xlabel 'File size (MB)' --ylabel Frequency =head1 ACKNOWLEDGEMENT This program is originally based on the driveGnuPlots.pl script from Thanassis Tsiodras. It is available from his site at L<http://users.softlab.ece.ntua.gr/~ttsiod/gnuplotStreaming.html> =head1 REPOSITORY L<https://github.com/dkogan/feedgnuplot> =head1 AUTHOR Dima Kogan, C<< <dima@secretsauce.net> >> =head1 LICENSE AND COPYRIGHT Copyright 2011-2012 Dima Kogan. This program is free software; you can redistribute it and/or modify it under the terms of either: the GNU General Public License as published by the Free Software Foundation; or the Artistic License. See http://dev.perl.org/licenses/ for more information. =cut