Telephone +44(0)1524 64544
Email: info@shadowcat.co.uk

ukuug-spring-2010 - parallel-crack-pipe

Sat Dec 22 00:30:00 2012

Slides for the talk parallel-crack-pipe at ukuug-spring-2010

Toking on
the crack
pipe

-

... in
parallel

-

Use case:
analyzing
exim logs

-

How many mails
(a) completed
(b) were rejected

-

Functional
crack
addiction

-

  map { ... } ...
  grep { ... } ...

-

  <${target_dir}/mainlog*>

-

  mainlog
  mainlog.1
  mainlog.2.gz
  mainlog.3.gz

-

  open my $fh, '<', $file;

-

  open my $fh, "<$file";

-

  open my $fh, "<$file";
  open my $fh, "zcat ${file}|";

-

  use String::ShellQuote;
  my @safe = shell_quote @args;

-

  map {
    /\.gz$/
      ? 'zcat '.shell_quote($_).'|'
      : '<'.$_
  } <${target_dir}/mainlog*>;

-

  <mainlog
  <mainlog.1
  zcat 'mainlog.2.gz'|
  zcat 'mainlog.3.gz'|

-

I LOVE
MY PIPE

-

  my @result = map {
    /some (matching .*) expression/;
    $1;
  } <$fh>;

-

  my @result = map {
    /some (matching .*) expression/;
    $1;
  } <$fh>; # SLUUUURP

-

  cat file
    | grep expr
    | sed -e '...'
    | ...

-

  use IO::Pipeline; # new!

-

  pmap {
    /some (matching .*) expression/;
    $1;
  }

-

  my $matcher = pmap {
    /some (matching .*) expression/;
    $1;
  };

-

  my @result;
  $fh
    | $matcher
    | psink { push @result, $_ };

-

  $fh
    | pmap {
        /some (matching .*) expression/;
        "$1\n";
      }
    | \*STDOUT;

-

  mainlog:
    2010-03-21 16:15:30
      1NtNoI-000658-6V Completed
    2010-03-21 16:35:59
      blah blah rejected because blah blah

-

  pmap { [ /^(\S+) (\S+) (.*)$/ ] }

-

  [
    '2001-03-21',
    '16:15:30',
    '1NtNoI-000658-6V Completed'
  ]

-

  pgrep {
    $_->[2] =~ /rejected|Completed/
  }

-

  pmap { [
    @{$_}[0, 1],
    $_->[2] =~ /rejected/
      ? 'Rejected' : 'Completed'
  ] }

-

  [
    '2001-03-21',
    '16:15:30',
    'Completed'
  ]

-

  pmap {  join(' ', @$_)."\n" }

  '2001-03-21 16:15:30 Completed'

-

  my $pipe_filter = pmap { [ /^(\S+) (\S+) (.*)$/ ] }
    | pgrep { $_->[2] =~ /rejected|Completed/ }
    | pmap { [
        @{$_}[0, 1],
        $_->[2] =~ /rejected/
          ? 'Rejected' : 'Completed'
      ] }
    | pmap { join(' ', @$_)."\n" }
    | \*STDOUT;

-

  foreach my $log_open (@log_opens) {
    warn "Processing $log_open";
    open my $log, $log_open
      or die "Couldn't open ${log_open}: $!";

    $log | $pipe_filter;
  }

-

  $ perl slurper host1/
  2010-03-23 06:54:03 Rejected
  2010-03-23 06:54:28 Rejected
  2010-03-23 06:55:10 Rejected
  2010-03-23 06:55:21 Completed
  2010-03-23 06:56:23 Completed
  2010-03-23 06:56:45 Rejected
  ...

-

... in
parallel!

-

  use IPC::Command::Multiplex; # also new!

-

  multiplex(
    run => [
      map { [ 'perl', 'slurper', $_ ] } @hosts 
    ],
    callback => sub {
      chomp(my $line = shift);
      my ($day, $time, $result) = split(' ', $line);
      $days{$day}{$result}++;
    }
  );

-

  perl slurper host1
  perl slurper host2

-

  print "Day\tCompleted\tRejected\n";

  foreach my $day (sort keys %days) {
    print join("\t", $day,
      $days{$day}{'Completed'},
      $days{$day}{'Rejected'}
    )."\n";
  }

-

  use HTML::Tags;

  print HTML::Tags::to_html_string(
    <table>, "\n",
      \"  <tr><th>Day</th><th>Completed</th><th>Rejected</th></tr>\n",
      (map {;
        '  ',
        <tr>,
          <td>, $_, </td>,
          <td>, $days{$_}{Completed}, </td>,
          <td>, $days{$_}{Rejected}, </td>,
        </tr>, "\n",
       } sort keys %days),
    </table>, "\n"
  );

-

  $ ./driver
  Day         Completed Rejected
  2010-03-20  1060      6252
  2010-03-21  1800      6156
  2010-03-22  2196      8236
  2010-03-23  1084      6560

-

  $ ./driver html
  <table>
    <tr><th>Day</th><th>Completed</th><th>Rejected</th></tr>
    <tr><td>2010-03-20</td><td>1060</td><td>6252</td></tr>
    <tr><td>2010-03-21</td><td>1800</td><td>6156</td></tr>
    <tr><td>2010-03-22</td><td>2196</td><td>8236</td></tr>
    <tr><td>2010-03-23</td><td>1084</td><td>6560</td></tr>
  </table>

-

Toke
on!

-

http://enlightenedperl.org/
http://lists.scsys.co.uk/
these slides will be on
http://shadowcat.co.uk/