Slides for the talk parallel-crack-pipe at ukuug-spring-2010
Toking on
the crack
pipe
-
... in
parallel
-
Use case:
analyzing
exim logs
-
How many mails
(a) completed
(b) were rejected
-
Functional
crack
addiction
-
map { ... } ...
grep { ... } ...
-
<${target_dir}/mainlog*>
-
mainlog
mainlog.1
mainlog.2.gz
mainlog.3.gz
-
open my $fh, '<', $file;
-
open my $fh, "<$file";
-
open my $fh, "<$file";
open my $fh, "zcat ${file}|";
-
use String::ShellQuote;
my @safe = shell_quote @args;
-
map {
/\.gz$/
? 'zcat '.shell_quote($_).'|'
: '<'.$_
} <${target_dir}/mainlog*>;
-
<mainlog
<mainlog.1
zcat 'mainlog.2.gz'|
zcat 'mainlog.3.gz'|
-
I LOVE
MY PIPE
-
my @result = map {
/some (matching .*) expression/;
$1;
} <$fh>;
-
my @result = map {
/some (matching .*) expression/;
$1;
} <$fh>; # SLUUUURP
-
cat file
| grep expr
| sed -e '...'
| ...
-
use IO::Pipeline; # new!
-
pmap {
/some (matching .*) expression/;
$1;
}
-
my $matcher = pmap {
/some (matching .*) expression/;
$1;
};
-
my @result;
$fh
| $matcher
| psink { push @result, $_ };
-
$fh
| pmap {
/some (matching .*) expression/;
"$1\n";
}
| \*STDOUT;
-
mainlog:
2010-03-21 16:15:30
1NtNoI-000658-6V Completed
2010-03-21 16:35:59
blah blah rejected because blah blah
-
pmap { [ /^(\S+) (\S+) (.*)$/ ] }
-
[
'2001-03-21',
'16:15:30',
'1NtNoI-000658-6V Completed'
]
-
pgrep {
$_->[2] =~ /rejected|Completed/
}
-
pmap { [
@{$_}[0, 1],
$_->[2] =~ /rejected/
? 'Rejected' : 'Completed'
] }
-
[
'2001-03-21',
'16:15:30',
'Completed'
]
-
pmap { join(' ', @$_)."\n" }
'2001-03-21 16:15:30 Completed'
-
my $pipe_filter = pmap { [ /^(\S+) (\S+) (.*)$/ ] }
| pgrep { $_->[2] =~ /rejected|Completed/ }
| pmap { [
@{$_}[0, 1],
$_->[2] =~ /rejected/
? 'Rejected' : 'Completed'
] }
| pmap { join(' ', @$_)."\n" }
| \*STDOUT;
-
foreach my $log_open (@log_opens) {
warn "Processing $log_open";
open my $log, $log_open
or die "Couldn't open ${log_open}: $!";
$log | $pipe_filter;
}
-
$ perl slurper host1/
2010-03-23 06:54:03 Rejected
2010-03-23 06:54:28 Rejected
2010-03-23 06:55:10 Rejected
2010-03-23 06:55:21 Completed
2010-03-23 06:56:23 Completed
2010-03-23 06:56:45 Rejected
...
-
... in
parallel!
-
use IPC::Command::Multiplex; # also new!
-
multiplex(
run => [
map { [ 'perl', 'slurper', $_ ] } @hosts
],
callback => sub {
chomp(my $line = shift);
my ($day, $time, $result) = split(' ', $line);
$days{$day}{$result}++;
}
);
-
perl slurper host1
perl slurper host2
-
print "Day\tCompleted\tRejected\n";
foreach my $day (sort keys %days) {
print join("\t", $day,
$days{$day}{'Completed'},
$days{$day}{'Rejected'}
)."\n";
}
-
use HTML::Tags;
print HTML::Tags::to_html_string(
<table>, "\n",
\" <tr><th>Day</th><th>Completed</th><th>Rejected</th></tr>\n",
(map {;
' ',
<tr>,
<td>, $_, </td>,
<td>, $days{$_}{Completed}, </td>,
<td>, $days{$_}{Rejected}, </td>,
</tr>, "\n",
} sort keys %days),
</table>, "\n"
);
-
$ ./driver
Day Completed Rejected
2010-03-20 1060 6252
2010-03-21 1800 6156
2010-03-22 2196 8236
2010-03-23 1084 6560
-
$ ./driver html
<table>
<tr><th>Day</th><th>Completed</th><th>Rejected</th></tr>
<tr><td>2010-03-20</td><td>1060</td><td>6252</td></tr>
<tr><td>2010-03-21</td><td>1800</td><td>6156</td></tr>
<tr><td>2010-03-22</td><td>2196</td><td>8236</td></tr>
<tr><td>2010-03-23</td><td>1084</td><td>6560</td></tr>
</table>
-
Toke
on!
-
http://enlightenedperl.org/
http://lists.scsys.co.uk/
these slides will be on
http://shadowcat.co.uk/