``````#!/usr/bin/env perl
use warnings;
use strict;
use Math::Prime::Util qw/moebius mertens/;

my \$lim = shift || 2**50;
my \$method = shift || 'mertens';

# See http://arxiv.org/pdf/1107.4890v1.pdf

#  2.9s  mertens
#  9.8s  block
# 10.0s  monolithic
# 33.0s  simple
# lots   brute

my \$sum = 0;

if (\$method eq 'brute') {
# Far too slow
for (1 .. \$lim) { \$sum++ if moebius(\$_) }
} elsif (\$method eq 'simple') {

# Basic application of theorem 1.
for (1 .. int(sqrt(\$lim)+0.001)) {
\$sum += moebius(\$_) * int(\$lim/(\$_*\$_));
}

} elsif (\$method eq 'monolithic') {

# Efficient theorem 1, but lots of memory.
my @mob = moebius(0, int(sqrt(\$lim)+0.001));
for (1 .. \$#mob) { \$sum += \$mob[\$_] * int(\$lim/(\$_*\$_)) if \$mob[\$_]; }

} elsif (\$method eq 'block') {

# Break up into chunks to constrain memory.
my(\$beg,\$end,\$mlim) = (1, 1, int(sqrt(\$lim)+0.001));
while (\$beg < \$mlim) {
\$end = \$beg + 100_000 - 1;
\$end = \$mlim if \$end > \$mlim;
my @mob = moebius(\$beg,\$end);
for (\$beg .. \$end) {
\$sum += \$mob[\$_-\$beg] * int(\$lim/(\$_*\$_)) if \$mob[\$_-\$beg];
}
\$beg = \$end+1;
}
} elsif (\$method eq 'mertens') {

# Pawlewicz's method, using chunked S1, and no optimization for Mertens.

my \$I = 50;   # Tune as desired.
my \$D = int(sqrt(\$lim/\$I)+0.00001);
my (\$S1, \$S2) = (0,0);

# S1
my \$chunk = 100_000;
for (my \$beg = 1; \$beg < \$D; \$beg += \$chunk) {
my \$end = \$beg + \$chunk - 1;
\$end = \$D if \$end > \$D;
my @mob = moebius(\$beg,\$end);
for (\$beg .. \$end) {
\$S1 += \$mob[\$_-\$beg] * int(\$lim/(\$_*\$_)) if \$mob[\$_-\$beg];
}
}
# S2
for (1 .. \$I-1) {
my \$xi = int(sqrt(\$lim/\$_)+0.00001);
\$S2 += mertens(\$xi);
}
\$S2 -= (\$I-1) * mertens(\$D);

\$sum = \$S1 + \$S2;
}

print "\$sum\n";
``````