Slides for the talk parallel-crack-pipe at ukuug-spring-2010
Toking on
the crack
pipe
-
... in
parallel
-
Use case:
analyzing
exim logs
-
How many mails
(a) completed
(b) were rejected
-
Functional
crack
addiction
-
  map { ... } ...
  grep { ... } ...
-
  <${target_dir}/mainlog*>
-
  mainlog
  mainlog.1
  mainlog.2.gz
  mainlog.3.gz
-
  open my $fh, '<', $file;
-
  open my $fh, "<$file";
-
  open my $fh, "<$file";
  open my $fh, "zcat ${file}|";
-
  use String::ShellQuote;
  my @safe = shell_quote @args;
-
  map {
    /\.gz$/
      ? 'zcat '.shell_quote($_).'|'
      : '<'.$_
  } <${target_dir}/mainlog*>;
-
  <mainlog
  <mainlog.1
  zcat 'mainlog.2.gz'|
  zcat 'mainlog.3.gz'|
-
I LOVE
MY PIPE
-
  my @result = map {
    /some (matching .*) expression/;
    $1;
  } <$fh>;
-
  my @result = map {
    /some (matching .*) expression/;
    $1;
  } <$fh>; # SLUUUURP
-
  cat file
    | grep expr
    | sed -e '...'
    | ...
-
  use IO::Pipeline; # new!
-
  pmap {
    /some (matching .*) expression/;
    $1;
  }
-
  my $matcher = pmap {
    /some (matching .*) expression/;
    $1;
  };
-
  my @result;
  $fh
    | $matcher
    | psink { push @result, $_ };
-
  $fh
    | pmap {
        /some (matching .*) expression/;
        "$1\n";
      }
    | \*STDOUT;
-
  mainlog:
    2010-03-21 16:15:30
      1NtNoI-000658-6V Completed
    2010-03-21 16:35:59
      blah blah rejected because blah blah
-
  pmap { [ /^(\S+) (\S+) (.*)$/ ] }
-
  [
    '2001-03-21',
    '16:15:30',
    '1NtNoI-000658-6V Completed'
  ]
-
  pgrep {
    $_->[2] =~ /rejected|Completed/
  }
-
  pmap { [
    @{$_}[0, 1],
    $_->[2] =~ /rejected/
      ? 'Rejected' : 'Completed'
  ] }
-
  [
    '2001-03-21',
    '16:15:30',
    'Completed'
  ]
-
  pmap {  join(' ', @$_)."\n" }
  '2001-03-21 16:15:30 Completed'
-
  my $pipe_filter = pmap { [ /^(\S+) (\S+) (.*)$/ ] }
    | pgrep { $_->[2] =~ /rejected|Completed/ }
    | pmap { [
        @{$_}[0, 1],
        $_->[2] =~ /rejected/
          ? 'Rejected' : 'Completed'
      ] }
    | pmap { join(' ', @$_)."\n" }
    | \*STDOUT;
-
  foreach my $log_open (@log_opens) {
    warn "Processing $log_open";
    open my $log, $log_open
      or die "Couldn't open ${log_open}: $!";
    $log | $pipe_filter;
  }
-
  $ perl slurper host1/
  2010-03-23 06:54:03 Rejected
  2010-03-23 06:54:28 Rejected
  2010-03-23 06:55:10 Rejected
  2010-03-23 06:55:21 Completed
  2010-03-23 06:56:23 Completed
  2010-03-23 06:56:45 Rejected
  ...
-
... in
parallel!
-
  use IPC::Command::Multiplex; # also new!
-
  multiplex(
    run => [
      map { [ 'perl', 'slurper', $_ ] } @hosts 
    ],
    callback => sub {
      chomp(my $line = shift);
      my ($day, $time, $result) = split(' ', $line);
      $days{$day}{$result}++;
    }
  );
-
  perl slurper host1
  perl slurper host2
-
  print "Day\tCompleted\tRejected\n";
  foreach my $day (sort keys %days) {
    print join("\t", $day,
      $days{$day}{'Completed'},
      $days{$day}{'Rejected'}
    )."\n";
  }
-
  use HTML::Tags;
  print HTML::Tags::to_html_string(
    <table>, "\n",
      \"  <tr><th>Day</th><th>Completed</th><th>Rejected</th></tr>\n",
      (map {;
        '  ',
        <tr>,
          <td>, $_, </td>,
          <td>, $days{$_}{Completed}, </td>,
          <td>, $days{$_}{Rejected}, </td>,
        </tr>, "\n",
       } sort keys %days),
    </table>, "\n"
  );
-
  $ ./driver
  Day         Completed Rejected
  2010-03-20  1060      6252
  2010-03-21  1800      6156
  2010-03-22  2196      8236
  2010-03-23  1084      6560
-
  $ ./driver html
  <table>
    <tr><th>Day</th><th>Completed</th><th>Rejected</th></tr>
    <tr><td>2010-03-20</td><td>1060</td><td>6252</td></tr>
    <tr><td>2010-03-21</td><td>1800</td><td>6156</td></tr>
    <tr><td>2010-03-22</td><td>2196</td><td>8236</td></tr>
    <tr><td>2010-03-23</td><td>1084</td><td>6560</td></tr>
  </table>
-
Toke
on!
-
http://enlightenedperl.org/
http://lists.scsys.co.uk/
these slides will be on
http://shadowcat.co.uk/