mirror of
https://github.com/imapsync/imapsync.git
synced 2024-11-17 00:02:29 +01:00
1252 lines
30 KiB
Perl
1252 lines
30 KiB
Perl
#!/usr/bin/perl
|
|
|
|
# $Header: /mhub4/sources/imap-tools/delIMAPdups.pl.files,v 1.3 2015/02/04 23:32:22 rick Exp $
|
|
|
|
#######################################################################
|
|
# Description #
|
|
# #
|
|
# delIMAPdups looks for duplicate messages in an IMAP account, #
|
|
# looking for messages in each mailbox that have the same Message #
|
|
# ID. When a duplicate message is found the DELETED flag is set. #
|
|
# If the -p argument has been supplied then an EXPUNGE operation #
|
|
# is executed against the mailbox in which the message resides, #
|
|
# causing the messages which are marked for DELETE to be removed. #
|
|
# #
|
|
# Note that delIMAPdups does not check for duplicate copies of #
|
|
# messages across multiple mailboxes since it is often useful to #
|
|
# cross-file messages in multiple mailboxes. #
|
|
# #
|
|
# Usage: ./deldups -S host/user/password #
|
|
# [-i list of users and passwords] #
|
|
# [-m mailbox list (comma-delimited)] #
|
|
# [-L logfile] #
|
|
# [-p] purge messages #
|
|
# [-d] debug mode #
|
|
# See usage() for additional arguments. #
|
|
#######################################################################
|
|
|
|
############################################################################
|
|
# Copyright (c) 2008 Rick Sanders <rfs9999@earthlink.net> #
|
|
# #
|
|
# Permission to use, copy, modify, and distribute this software for any #
|
|
# purpose with or without fee is hereby granted, provided that the above #
|
|
# copyright notice and this permission notice appear in all copies. #
|
|
# #
|
|
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES #
|
|
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF #
|
|
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR #
|
|
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES #
|
|
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN #
|
|
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF #
|
|
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. #
|
|
############################################################################
|
|
|
|
# use Socket;
|
|
use FileHandle;
|
|
use Fcntl;
|
|
use Getopt::Std;
|
|
use IO::Socket;
|
|
use MIME::Base64 qw(encode_base64 decode_base64);
|
|
|
|
#################################################################
|
|
# Main program. #
|
|
#################################################################
|
|
|
|
init();
|
|
sigprc();
|
|
|
|
$grand_total = 0;
|
|
foreach $_ ( @users ) {
|
|
$total = 0;
|
|
s/^\s+|\s+$//g;
|
|
next if /^#/;
|
|
($user,$pwd) = split(/[\s+:]/, $_, 2);
|
|
trim( *user );
|
|
trim( *pwd );
|
|
Log("Checking $user");
|
|
|
|
# Get list of messages
|
|
#
|
|
connectToHost($host, \$conn);
|
|
login($user,$pwd, $conn);
|
|
@mbxs = getMailboxList($user, '', $conn);
|
|
|
|
if ( $recursive and $mbxList ) {
|
|
# The user wants all submbxs under the ones he asked for
|
|
$mbxList = '';
|
|
my @mailboxes;
|
|
foreach $mbx ( @mbxs ) {
|
|
@submbxs = getMailboxList($user, $mbx, $conn);
|
|
push( @mailboxes, @submbxs );
|
|
}
|
|
@mbxs = @mailboxes;
|
|
}
|
|
|
|
if ( $md5_hash ) {
|
|
Log("Looking for duplicate messages using an MD5-digest hash of the body");
|
|
} else {
|
|
Log("Looking for duplicate messages using the $keyfield");
|
|
}
|
|
|
|
$output_file = "/tmp/delIMAPdups.tmp.$$";
|
|
$sorted_file = "/tmp/delIMAPdups.tmp.sorted.$$";
|
|
|
|
unlink $output_file if -e $output_file;
|
|
foreach $mbx ( @mbxs ) {
|
|
Log(" Checking mailbox $mbx") if $debug;
|
|
if ( $global ) {
|
|
if ( !open(FILE, ">>$output_file")) {
|
|
Log("Fatal error: Can't open $output_file: $!");
|
|
exit;
|
|
}
|
|
} else {
|
|
if ( !open(FILE, ">$output_file")) {
|
|
Log("Fatal error: Can't open $output_file: $!");
|
|
exit;
|
|
}
|
|
}
|
|
|
|
$msgcount = selectMbx( $mbx, $conn);
|
|
Log(" There are $msgcount messages in $mbx");
|
|
|
|
@msgs = ();
|
|
|
|
$i = 1;
|
|
$j = 1000;
|
|
while( 1 ) {
|
|
$range = "$i:$j";
|
|
Log(" range $range") if $debug;
|
|
getMsgList( $keyfield, $mbx, $range, \@msgs, $conn );
|
|
$i += 1000;
|
|
$j += 1000;
|
|
$j = $msgcount if $j > $msgcount;
|
|
|
|
foreach $msg ( @msgs ) {
|
|
($uid,$key,$date) = split(/\|\|\|/, $msg);
|
|
|
|
if ( $md5_hash ) {
|
|
Log("Using md5 hash of msg body as the key") if $debug;
|
|
fetch_msg_body( $msgnum, $conn, \$message );
|
|
$key = hash( \$message );
|
|
Log("msgnum:$msgnum hash $key") if $debug;
|
|
} else {
|
|
if ( $use_date ) {
|
|
Log("Using $keyfield + date as the key") if $debug;
|
|
$key = "$key $date";
|
|
Log("key $key") if $debug;
|
|
} else {
|
|
Log("Using $keyfield") if $debug;
|
|
}
|
|
}
|
|
|
|
print FILE "$key|||$uid $mbx\n";
|
|
|
|
}
|
|
|
|
last if $i >= $msgcount;
|
|
}
|
|
|
|
close FILE;
|
|
|
|
# Go through the output file, identify duplicates, and delete them.
|
|
delete_duplicate_msgs( $output_file, \%deletes, $conn ) unless $global;
|
|
|
|
}
|
|
|
|
if ( $global ) {
|
|
Log("Deleting duplicates across all mailboxes");
|
|
delete_duplicate_msgs( $output_file, \%deletes, $conn );
|
|
}
|
|
|
|
logout( $conn );
|
|
|
|
Log(" Total messages purged $total");
|
|
$grand_total += $total;
|
|
|
|
}
|
|
Log("");
|
|
Log("Grand total messages purged $grand_total");
|
|
exit;
|
|
|
|
|
|
sub init {
|
|
|
|
$version = 'V1.2';
|
|
$os = $ENV{'OS'};
|
|
|
|
processArgs();
|
|
|
|
$timeout = 60 unless $timeout;
|
|
|
|
# Determine whether we have SSL support via openSSL and IO::Socket::SSL
|
|
$ssl_installed = 1;
|
|
eval 'use IO::Socket::SSL';
|
|
if ( $@ ) {
|
|
$ssl_installed = 0;
|
|
}
|
|
eval 'use Text::Wrap';
|
|
if ( $@ ) {
|
|
$text_wrap_install = 0;
|
|
}
|
|
|
|
if ( $md5_hash ) {
|
|
use Digest::MD5 qw(md5_hex);
|
|
}
|
|
|
|
# Open the logFile
|
|
#
|
|
if ( $logfile ) {
|
|
if ( !open(LOG, ">> $logfile")) {
|
|
print STDOUT "Can't open $logfile: $!\n";
|
|
}
|
|
select(LOG); $| = 1;
|
|
}
|
|
Log("\n$0 starting");
|
|
$total=$moved=0;
|
|
|
|
Log("Messages will be checked for duplicates across all folders") if $global;
|
|
}
|
|
|
|
#
|
|
# sendCommand
|
|
#
|
|
# This subroutine formats and sends an IMAP protocol command to an
|
|
# IMAP server on a specified connection.
|
|
#
|
|
|
|
sub sendCommand
|
|
{
|
|
local($fd) = shift @_;
|
|
local($cmd) = shift @_;
|
|
|
|
print $fd "$cmd\r\n";
|
|
|
|
if ($showIMAP) { Log (">> $cmd",2); }
|
|
}
|
|
|
|
#
|
|
# readResponse
|
|
#
|
|
# This subroutine reads and formats an IMAP protocol response from an
|
|
# IMAP server on a specified connection.
|
|
#
|
|
|
|
sub readResponse
|
|
{
|
|
local($fd) = shift @_;
|
|
|
|
$response = <$fd>;
|
|
chop $response;
|
|
$response =~ s/\r//g;
|
|
push (@response,$response);
|
|
if ($showIMAP) { Log ("<< $response",2); }
|
|
}
|
|
|
|
#
|
|
# Log
|
|
#
|
|
# This subroutine formats and writes a log message to STDERR.
|
|
#
|
|
|
|
sub Log {
|
|
|
|
my $str = shift;
|
|
|
|
# If a logile has been specified then write the output to it
|
|
# Otherwise write it to STDOUT
|
|
|
|
if ( $str =~ /^\>\> 1 LOGIN (.+) "(.+)"/ ) {
|
|
# Obscure the password for security's sake
|
|
$str =~ s/$2/XXXX/;
|
|
}
|
|
|
|
unless ( $text_wrap_installed == 0 ) {
|
|
$Text::Wrap::columns = $width;
|
|
$str = wrap('', '', $str);
|
|
}
|
|
|
|
if ( $logfile ) {
|
|
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime;
|
|
if ($year < 99) { $yr = 2000; }
|
|
else { $yr = 1900; }
|
|
$line = sprintf ("%.2d-%.2d-%d.%.2d:%.2d:%.2d %s %s\n",
|
|
$mon + 1, $mday, $year + $yr, $hour, $min, $sec,$$,$str);
|
|
print LOG "$line";
|
|
}
|
|
print STDOUT "$str\n";
|
|
|
|
}
|
|
|
|
|
|
# Make a connection to an IMAP host
|
|
|
|
sub connectToHost {
|
|
|
|
my $host = shift;
|
|
my $conn = shift;
|
|
|
|
Log("Connecting to $host") if $debug;
|
|
|
|
($host,$port) = split(/:/, $host);
|
|
$port = 143 unless $port;
|
|
|
|
# We know whether to use SSL for ports 143 and 993. For any
|
|
# other ones we'll have to figure it out.
|
|
$mode = sslmode( $host, $port );
|
|
|
|
if ( $mode eq 'SSL' ) {
|
|
unless( $ssl_installed == 1 ) {
|
|
warn("You must have openSSL and IO::Socket::SSL installed to use an SSL connection");
|
|
Log("You must have openSSL and IO::Socket::SSL installed to use an SSL connection");
|
|
exit;
|
|
}
|
|
Log("Attempting an SSL connection") if $debug;
|
|
$$conn = IO::Socket::SSL->new(
|
|
Proto => "tcp",
|
|
SSL_verify_mode => 0x00,
|
|
PeerAddr => $host,
|
|
PeerPort => $port,
|
|
Domain => AF_INET,
|
|
);
|
|
|
|
unless ( $$conn ) {
|
|
$error = IO::Socket::SSL::errstr();
|
|
Log("Error connecting to $host: $error");
|
|
exit;
|
|
}
|
|
} else {
|
|
# Non-SSL connection
|
|
Log("Attempting a non-SSL connection") if $debug;
|
|
$$conn = IO::Socket::INET->new(
|
|
Proto => "tcp",
|
|
PeerAddr => $host,
|
|
PeerPort => $port,
|
|
);
|
|
|
|
unless ( $$conn ) {
|
|
Log("Error connecting to $host:$port: $@");
|
|
warn "Error connecting to $host:$port: $@";
|
|
exit;
|
|
}
|
|
}
|
|
Log("Connected to $host on port $port") if $debug;
|
|
|
|
}
|
|
|
|
sub sslmode {
|
|
|
|
my $host = shift;
|
|
my $port = shift;
|
|
my $mode;
|
|
|
|
# Determine whether to make an SSL connection
|
|
# to the host. Return 'SSL' if so.
|
|
|
|
if ( $port == 143 ) {
|
|
# Standard non-SSL port
|
|
return '';
|
|
} elsif ( $port == 993 ) {
|
|
# Standard SSL port
|
|
return 'SSL';
|
|
}
|
|
|
|
unless ( $ssl_installed ) {
|
|
# We don't have SSL installed on this machine
|
|
return '';
|
|
}
|
|
|
|
# For any other port we need to determine whether it supports SSL
|
|
|
|
my $conn = IO::Socket::SSL->new(
|
|
Proto => "tcp",
|
|
SSL_verify_mode => 0x00,
|
|
PeerAddr => $host,
|
|
PeerPort => $port,
|
|
);
|
|
|
|
if ( $conn ) {
|
|
close( $conn );
|
|
$mode = 'SSL';
|
|
} else {
|
|
$mode = '';
|
|
}
|
|
|
|
return $mode;
|
|
}
|
|
|
|
# trim
|
|
#
|
|
# remove leading and trailing spaces from a string
|
|
sub trim {
|
|
|
|
local (*string) = @_;
|
|
|
|
$string =~ s/^\s+//;
|
|
$string =~ s/\s+$//;
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
# login
|
|
#
|
|
# login in at the host with the user's name and password
|
|
#
|
|
sub login {
|
|
|
|
my $user = shift;
|
|
my $pwd = shift;
|
|
my $conn = shift;
|
|
|
|
if ( $admin_user ) {
|
|
# An AUTHENTICATE = PLAIN login has been requested
|
|
($authuser,$authpwd) = split(/:/, $admin_user );
|
|
login_plain( $user, $authuser, $authpwd, $conn ) or exit;
|
|
return 1;
|
|
}
|
|
|
|
if ( $pwd =~ /^oauth2:(.+)/i ) {
|
|
$token = $1;
|
|
Log("password is an OAUTH2 token");
|
|
$status = login_xoauth2( $user, $token, $conn );
|
|
return $status;
|
|
}
|
|
|
|
sendCommand ($conn, "1 LOGIN $user \"$pwd\"");
|
|
while (1) {
|
|
readResponse ( $conn );
|
|
if ($response =~ /^1 OK/i) {
|
|
last;
|
|
}
|
|
elsif ($response =~ /NO/) {
|
|
Log ("unexpected LOGIN response: $response");
|
|
return 0;
|
|
}
|
|
}
|
|
Log("Logged in as $user") if $debug;
|
|
|
|
return 1;
|
|
}
|
|
|
|
# login_plain
|
|
#
|
|
# login in at the source host with the user's name and password. If provided
|
|
# with administrator credential, use them as this eliminates the need for the
|
|
# user's password.
|
|
#
|
|
sub login_plain {
|
|
|
|
my $user = shift;
|
|
my $admin = shift;
|
|
my $pwd = shift;
|
|
my $conn = shift;
|
|
|
|
# Do an AUTHENTICATE = PLAIN. If an admin user has been provided then use it.
|
|
|
|
if ( !$admin ) {
|
|
# Log in as the user
|
|
$admin = $user
|
|
}
|
|
|
|
$login_str = sprintf("%s\x00%s\x00%s", $user,$admin,$pwd);
|
|
$login_str = encode_base64("$login_str", "");
|
|
$len = length( $login_str );
|
|
|
|
# sendCommand ($conn, "1 AUTHENTICATE \"PLAIN\" {$len}" );
|
|
sendCommand ($conn, "1 AUTHENTICATE PLAIN" );
|
|
|
|
my $loops;
|
|
while (1) {
|
|
readResponse ( $conn );
|
|
last if $response =~ /\+/;
|
|
if ($response =~ /^1 NO|^1 BAD|^\* BYE/i) {
|
|
Log ("unexpected LOGIN response: $response");
|
|
exit;
|
|
}
|
|
$last if $loops++ > 5;
|
|
}
|
|
|
|
sendCommand ($conn, "$login_str" );
|
|
my $loops;
|
|
while (1) {
|
|
readResponse ( $conn );
|
|
|
|
if ( $response =~ /Microsoft Exchange/i and $conn eq $dst ) {
|
|
# The destination is an Exchange server
|
|
$exchange = 1;
|
|
Log("The destination is an Exchange server");
|
|
}
|
|
|
|
last if $response =~ /^1 OK/i;
|
|
if ($response =~ /^1 NO|^1 BAD|^\* BYE/i) {
|
|
Log ("unexpected LOGIN response: $response");
|
|
exit;
|
|
}
|
|
$last if $loops++ > 5;
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
# login_xoauth2
|
|
#
|
|
# login in at the source host with the user's name and an XOAUTH2 token.
|
|
#
|
|
sub login_xoauth2 {
|
|
|
|
my $user = shift;
|
|
my $token = shift;
|
|
my $conn = shift;
|
|
|
|
# Do an AUTHENTICATE = XOAUTH2 login
|
|
|
|
$login_str = encode_base64("user=". $user ."\x01auth=Bearer ". $token ."\x01\x01", '');
|
|
sendCommand ($conn, "1 AUTHENTICATE XOAUTH2 $login_str" );
|
|
|
|
my $loops;
|
|
while (1) {
|
|
readResponse ( $conn );
|
|
if ( $response =~ /^\+ (.+)/ ) {
|
|
$error = decode_base64( $1 );
|
|
Log("XOAUTH authentication as $user failed: $error");
|
|
return 0;
|
|
}
|
|
last if $response =~ /^1 OK/;
|
|
if ($response =~ /^1 NO|^1 BAD|^\* BYE|failed/i) {
|
|
Log ("unexpected LOGIN response: $response");
|
|
return 0;
|
|
}
|
|
$last if $loops++ > 5;
|
|
}
|
|
|
|
Log("login complete") if $debug;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
# logout
|
|
#
|
|
# log out from the host
|
|
#
|
|
sub logout {
|
|
|
|
my $conn = shift;
|
|
|
|
++$lsn;
|
|
undef @response;
|
|
sendCommand ($conn, "$lsn LOGOUT");
|
|
while ( 1 ) {
|
|
readResponse ($conn);
|
|
if ( $response =~ /^$lsn OK/i ) {
|
|
last;
|
|
}
|
|
elsif ( $response !~ /^\*/ ) {
|
|
Log ("unexpected LOGOUT response: $response");
|
|
last;
|
|
}
|
|
}
|
|
close $conn;
|
|
return;
|
|
}
|
|
|
|
|
|
# getMailboxList
|
|
#
|
|
# get a list of the user's mailboxes from the host
|
|
#
|
|
sub getMailboxList {
|
|
|
|
my $user = shift;
|
|
my $mbx = shift;
|
|
my $conn = shift;
|
|
my @mbxs;
|
|
|
|
# Get a list of the user's mailboxes
|
|
#
|
|
if ( $mbxList ) {
|
|
# The user has supplied a list of mailboxes.
|
|
@mbxs = split(/,/, $mbxList);
|
|
return @mbxs;
|
|
}
|
|
|
|
namespace( $conn, \$srcPrefix, \$srcDelim, $opt_x );
|
|
|
|
if ($debugMode) { Log("Get list of user's mailboxes",2); }
|
|
|
|
my $target = $mbx . '*';
|
|
|
|
sendCommand ($conn, "1 LIST \"\" $target");
|
|
undef @response;
|
|
while ( 1 ) {
|
|
readResponse ($conn);
|
|
if ( $response =~ /^1 OK/i ) {
|
|
last;
|
|
}
|
|
elsif ( $response !~ /^\*/ ) {
|
|
Log ("unexpected response: $response");
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
undef @mbxs;
|
|
for $i (0 .. $#response) {
|
|
$response[$i] =~ s/\s+/ /;
|
|
### ($dmy,$mbx) = split(/"\/" /,$response[$i]);
|
|
($dmy,$mbx) = split(/"$srcDelim" /,$response[$i]);
|
|
# $mbx =~ s/^\s+//; $mbx =~ s/\s+$//;
|
|
$mbx =~ s/"//g;
|
|
|
|
if ($response[$i] =~ /NOSELECT/i) {
|
|
next;
|
|
}
|
|
if (($mbx =~ /^\#/) && ($user ne 'anonymous')) {
|
|
# Skip public mbxs unless we are migrating them
|
|
next;
|
|
}
|
|
if ($mbx =~ /^\./) {
|
|
# Skip mailboxes starting with a dot
|
|
next;
|
|
}
|
|
push ( @mbxs, $mbx ) if $mbx ne '';
|
|
}
|
|
|
|
return @mbxs;
|
|
}
|
|
|
|
# getMsgList
|
|
#
|
|
# Get a list of the user's messages in the indicated mailbox on
|
|
# the host
|
|
#
|
|
sub getMsgList {
|
|
|
|
my $field = shift;
|
|
my $mailbox = shift;
|
|
my $range = shift;
|
|
my $msgs = shift;
|
|
my $conn = shift;
|
|
my $seen;
|
|
my $empty;
|
|
my $msgnum;
|
|
|
|
@$msgs = ();
|
|
sendCommand ( $conn, "1 FETCH $range (uid flags internaldate body[header.fields ($field)])");
|
|
undef @response;
|
|
while ( 1 ) {
|
|
readResponse ( $conn );
|
|
if ( $response =~ /^1 OK/i ) {
|
|
# print STDERR "response $response\n";
|
|
last;
|
|
}
|
|
elsif ( $response =~ /Broken pipe|Connection reset by peer/i ) {
|
|
print STDOUT "Fetch from $mailbox: $response\n";
|
|
exit;
|
|
}
|
|
elsif ( $response =~ /^1 BAD|^1 NO/i ) {
|
|
Log("Unexpected response $response");
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
# Get a list of the msgs in the mailbox
|
|
#
|
|
undef @msgs;
|
|
undef $flags;
|
|
for $i (0 .. $#response) {
|
|
$seen=0;
|
|
$_ = $response[$i];
|
|
|
|
last if /OK FETCH complete/;
|
|
|
|
if ( $response[$i] =~ /FETCH \(UID (.*?) / ) {
|
|
$uid = $1;
|
|
}
|
|
|
|
if ($response[$i] =~ /FLAGS/) {
|
|
# Get the list of flags
|
|
$response[$i] =~ /FLAGS \(([^\)]*)/;
|
|
$flags =~ s/\\Recent//;
|
|
$flags = $1;
|
|
}
|
|
if ( $response[$i] =~ /INTERNALDATE ([^\)]*)/ ) {
|
|
$response[$i] =~ /INTERNALDATE (.+) BODY/i;
|
|
$date = $1;
|
|
$date =~ s/"//g;
|
|
}
|
|
if ( $response[$i] =~ /^Subject:/ ) {
|
|
$response[$i] =~ /Subject: (.+)/;
|
|
$subject = $1;
|
|
}
|
|
if ( $response[$i] =~ /^$field:/i ) {
|
|
($label,$value) = split(/:\s*/, $response[$i],2);
|
|
trim(*value);
|
|
if ( $value eq '' ) {
|
|
# Line-wrap, get it from the next line
|
|
$value = $response[$i+1];
|
|
trim(*value);
|
|
}
|
|
if ( $debug ) {
|
|
Log("$uid $value $date $subject");
|
|
}
|
|
$value = lc( $value );
|
|
push (@$msgs,"$uid|||$value|||$date");
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
|
|
sub fetch_msg_body {
|
|
|
|
my $msgnum = shift;
|
|
my $conn = shift;
|
|
my $message = shift;
|
|
|
|
# Fetch the body of the message less the headers
|
|
|
|
Log(" Fetching msg $msgnum...") if $debug;
|
|
|
|
sendCommand( $conn, "1 FETCH $msgnum (rfc822)");
|
|
while (1) {
|
|
readResponse ($conn);
|
|
if ( $response =~ /^1 OK/i ) {
|
|
$size = length($message);
|
|
last;
|
|
}
|
|
elsif ($response =~ /message number out of range/i) {
|
|
Log ("Error fetching uid $uid: out of range",2);
|
|
$stat=0;
|
|
last;
|
|
}
|
|
elsif ($response =~ /Bogus sequence in FETCH/i) {
|
|
Log ("Error fetching uid $uid: Bogus sequence in FETCH",2);
|
|
$stat=0;
|
|
last;
|
|
}
|
|
elsif ( $response =~ /message could not be processed/i ) {
|
|
Log("Message could not be processed, skipping it ($user,msgnum $msgnum,$destMbx)");
|
|
push(@errors,"Message could not be processed, skipping it ($user,msgnum $msgnum,$destMbx)");
|
|
$stat=0;
|
|
last;
|
|
}
|
|
elsif
|
|
($response =~ /^\*\s+$msgnum\s+FETCH\s+\(.*RFC822\s+\{[0-9]+\}/i) {
|
|
($len) = ($response =~ /^\*\s+$msgnum\s+FETCH\s+\(.*RFC822\s+\{([0-9]+)\}/i);
|
|
$cc = 0;
|
|
$$message = "";
|
|
while ( $cc < $len ) {
|
|
$n = 0;
|
|
$n = read ($conn, $segment, $len - $cc);
|
|
if ( $n == 0 ) {
|
|
Log ("unable to read $len bytes");
|
|
return 0;
|
|
}
|
|
$$message .= $segment;
|
|
$cc += $n;
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
|
|
sub usage {
|
|
|
|
print STDOUT "usage:\n";
|
|
print STDOUT " deldups -S host/user/password\n";
|
|
print STDOUT " Optional arguments:\n";
|
|
print STDOUT " -p purge duplicate messages\n";
|
|
print STDOUT " -M <mailbox to put duplicates into>\n";
|
|
print STDOUT " -d debug\n";
|
|
print STDOUT " -L logfile\n";
|
|
print STDOUT " -m mailbox list (eg \"Inbox, Drafts, Notes\". Default is all mailboxes)\n";
|
|
print STDOUT " -R recursive (used with -m argument\n";
|
|
print STDOUT " -u include the date in the key field to determine uniqueness\n";
|
|
print STDOUT " -H use an MD5 hash of the message body to determine uniqueness\n";
|
|
print STDOUT " -F <field> Use <field> to determine duplicate messages\n";
|
|
print STDOUT " -A <admin_user:admin_pwd>\n";
|
|
print STDOUT " -r <range> Range of messages to examine, eg 1:1000\n";
|
|
print STDOUT " -g check across all folders for uniqueness\n";
|
|
exit;
|
|
|
|
}
|
|
|
|
sub processArgs {
|
|
|
|
if ( !getopts( "dS:L:Im:hpuM:HF:i:RA:F:r:g" ) ) {
|
|
usage();
|
|
}
|
|
|
|
($host,$user,$pwd) = split(/\//, $opt_S);
|
|
$userList = $opt_i;
|
|
$mbxList = $opt_m;
|
|
$logfile = $opt_L;
|
|
$move2mbx = $opt_M;
|
|
$purge = 1 if $opt_p;
|
|
$debug = 1 if $opt_d;
|
|
$showIMAP = 1 if $opt_I;
|
|
$use_date = 1 if $opt_u;
|
|
$md5_hash = 1 if $opt_H;
|
|
$recursive = 1 if $opt_R;
|
|
$global = 1 if $opt_g;
|
|
$keyfield = $opt_F;
|
|
$admin_user = $opt_A;
|
|
$msgs_per_folder = $opt_F;
|
|
$range = $opt_r;
|
|
|
|
$keyfield = 'Message-ID' if !$keyfield;
|
|
|
|
if ( $userList ) {
|
|
if ( !open(F, "<$userList") ) {
|
|
print STDERR "Error opening userlist $userList: $!\n";
|
|
exit;
|
|
}
|
|
while( <F> ) {
|
|
chomp;
|
|
push( @users, $_ );
|
|
}
|
|
close F;
|
|
} else {
|
|
push( @users, "$user $pwd" );
|
|
}
|
|
|
|
usage() if $opt_h;
|
|
|
|
}
|
|
|
|
sub findMsg {
|
|
|
|
my $conn = shift;
|
|
my $msgid = shift;
|
|
my $mbx = shift;
|
|
my $msgnum;
|
|
|
|
Log("SELECT $mbx") if $debug;
|
|
sendCommand ( $conn, "1 SELECT \"$mbx\"");
|
|
while (1) {
|
|
readResponse ($conn);
|
|
last if $response =~ /^1 OK/;
|
|
}
|
|
|
|
Log("Search for $msgid") if $debug;
|
|
sendCommand ( $conn, "1 SEARCH header Message-ID \"$msgid\"");
|
|
while (1) {
|
|
readResponse ($conn);
|
|
if ( $response =~ /\* SEARCH /i ) {
|
|
($dmy, $msgnum) = split(/\* SEARCH /i, $response);
|
|
($msgnum) = split(/ /, $msgnum);
|
|
}
|
|
|
|
last if $response =~ /^1 OK/;
|
|
last if $response =~ /complete/i;
|
|
}
|
|
|
|
return $msgnum;
|
|
}
|
|
|
|
sub delete_duplicates {
|
|
|
|
my $msglist = shift;
|
|
my $mbx = shift;
|
|
my $conn = shift;
|
|
my $rc;
|
|
|
|
Log(" Deleting message list $msglist") if $debug;
|
|
|
|
sendCommand ( $conn, "1 UID STORE $msglist +FLAGS (\\Deleted)");
|
|
while (1) {
|
|
readResponse ($conn);
|
|
if ( $response =~ /^1 OK/i ) {
|
|
$rc = 1;
|
|
Log(" Marked msgs for delete") if $debug;
|
|
last;
|
|
}
|
|
|
|
if ( $response =~ /^1 BAD|^1 NO/i ) {
|
|
Log("Error setting \Deleted flag for the msglist: $response");
|
|
$rc = 0;
|
|
last;
|
|
}
|
|
}
|
|
|
|
expunge_mbx( $mbx, $conn );
|
|
|
|
return $rc;
|
|
|
|
}
|
|
|
|
sub expunge_mbx {
|
|
|
|
my $mbx = shift;
|
|
my $conn = shift;
|
|
|
|
Log("Expunging mailbox $mbx") if $debug;
|
|
|
|
sendCommand ( $conn, "1 EXPUNGE");
|
|
$expunged=0;
|
|
while (1) {
|
|
readResponse ($conn);
|
|
$expunged++ if $response =~ /\* (.+) Expunge/i;
|
|
last if $response =~ /^1 OK/;
|
|
|
|
if ( $response =~ /^1 BAD|^1 NO/i ) {
|
|
print "Error purging messages: $response\n";
|
|
last;
|
|
}
|
|
}
|
|
|
|
Log(" $expunged messages expunged") if $debug;
|
|
|
|
}
|
|
|
|
sub updateFlags {
|
|
|
|
my $conn = shift;
|
|
my $msgid = shift;
|
|
my $mbx = shift;
|
|
my $flags = shift;
|
|
my $rc;
|
|
|
|
if ( $debug ) {
|
|
Log("Find $msgid");
|
|
Log("flags $flags");
|
|
}
|
|
|
|
$msgnum = findMsg( $conn, $msgid, $mbx );
|
|
Log("msgnum is $msgnum") if $debug;
|
|
|
|
sendCommand ( $conn, "1 STORE $msgnum +FLAGS ($flags)");
|
|
while (1) {
|
|
readResponse ($conn);
|
|
if ( $response =~ /^1 OK/i ) {
|
|
Log(" Updated flags for $msgid");
|
|
$rc = 1;
|
|
last;
|
|
}
|
|
|
|
if ( $response =~ /^1 BAD|^1 NO/i ) {
|
|
Log("Error setting flags for $msgid: $response");
|
|
$rc = 0;
|
|
last;
|
|
}
|
|
}
|
|
return $rc;
|
|
}
|
|
|
|
sub dieright {
|
|
local($sig) = @_;
|
|
print STDOUT "caught signal $sig\n";
|
|
logout( $conn );
|
|
exit(-1);
|
|
}
|
|
|
|
sub sigprc {
|
|
|
|
$SIG{'HUP'} = 'dieright';
|
|
$SIG{'INT'} = 'dieright';
|
|
$SIG{'QUIT'} = 'dieright';
|
|
$SIG{'ILL'} = 'dieright';
|
|
$SIG{'TRAP'} = 'dieright';
|
|
$SIG{'IOT'} = 'dieright';
|
|
$SIG{'EMT'} = 'dieright';
|
|
$SIG{'FPE'} = 'dieright';
|
|
$SIG{'BUS'} = 'dieright';
|
|
$SIG{'SEGV'} = 'dieright';
|
|
$SIG{'SYS'} = 'dieright';
|
|
$SIG{'PIPE'} = 'dieright';
|
|
$SIG{'ALRM'} = 'dieright';
|
|
$SIG{'TERM'} = 'dieright';
|
|
$SIG{'URG'} = 'dieright';
|
|
}
|
|
|
|
sub moveMsg {
|
|
|
|
my $mbx = shift;
|
|
my $msgnum = shift;
|
|
my $dstmbx = shift;
|
|
my $conn = shift;
|
|
my $moved=0;
|
|
|
|
# Move a message from one mailbox to another.
|
|
|
|
return 0 unless $msgnum;
|
|
|
|
Log(" Moving msgnum $msgnum to $dstmbx");
|
|
|
|
# Create the mailbox if it doesn't already exist
|
|
sendCommand ($conn, "1 CREATE \"$dstmbx\"");
|
|
while ( 1 ) {
|
|
readResponse ($conn);
|
|
last if $response =~ /^1 OK/i;
|
|
if ( $response !~ /^\*/ ) {
|
|
if (!($response =~ /already exists|file exists|can\'t create/i)) {
|
|
## print STDOUT "WARNING: $response\n";
|
|
}
|
|
last;
|
|
}
|
|
}
|
|
|
|
sendCommand ($conn, "1 COPY $msgnum \"$dstmbx\"");
|
|
while (1) {
|
|
readResponse ( $conn );
|
|
if ( $response =~ /^1 OK/i ) {
|
|
$moved=1;
|
|
last;
|
|
}
|
|
if ($response =~ /^1 NO|^1 BAD/) {
|
|
Log("unexpected COPY response: $response");
|
|
Log("Please verify that mailbox $dstmbx exists");
|
|
exit;
|
|
}
|
|
}
|
|
|
|
return $moved;
|
|
}
|
|
|
|
sub hash {
|
|
|
|
my $msg = shift;
|
|
my $body;
|
|
my $boundary;
|
|
|
|
# Generate an MD5 hash of the message body
|
|
|
|
# Strip the header and the MIME boundary markers
|
|
my $header = 1;
|
|
foreach $_ ( split(/\n/, $$msg ) ) {
|
|
if ( $header ) {
|
|
if (/boundary="(.+)"/i ) {
|
|
$boundary = $1;
|
|
}
|
|
$header = 0 if length( $_ ) == 1;
|
|
}
|
|
|
|
eval 'next if /$boundary/ ); ';
|
|
$body .= "$_\n" unless $header;
|
|
}
|
|
|
|
my $md5 = md5_hex($body);
|
|
Log("md5 hash $md5") if $debug;
|
|
|
|
return $md5;
|
|
}
|
|
|
|
sub fetchMsg {
|
|
|
|
my $msgnum = shift;
|
|
my $conn = shift;
|
|
my $message = shift;
|
|
|
|
Log(" Fetching msg $msgnum...") if $debug;
|
|
|
|
sendCommand( $conn, "1 FETCH $msgnum body[text]");
|
|
while (1) {
|
|
readResponse ($conn);
|
|
last if $response =~ /^1 NO|^1 BAD|^\* BYE/;
|
|
if ( $response =~ /^1 OK/i ) {
|
|
$size = length($message);
|
|
last;
|
|
}
|
|
elsif ($response =~ /message number out of range/i) {
|
|
Log ("Error fetching uid $uid: out of range",2);
|
|
$stat=0;
|
|
last;
|
|
}
|
|
elsif ($response =~ /Bogus sequence in FETCH/i) {
|
|
Log ("Error fetching uid $uid: Bogus sequence in FETCH",2);
|
|
$stat=0;
|
|
last;
|
|
}
|
|
elsif ( $response =~ /message could not be processed/i ) {
|
|
Log("Message could not be processed, skipping it ($user,msgnum $msgnum,$destMbx)");
|
|
push(@errors,"Message could not be processed, skipping it ($user,msgnum $msgnum,$destMbx)");
|
|
$stat=0;
|
|
last;
|
|
}
|
|
elsif
|
|
($response =~ /^\*\s+$msgnum\s+FETCH\s+\(.*RFC822\s+\{[0-9]+\}/i) {
|
|
($len) = ($response =~ /^\*\s+$msgnum\s+FETCH\s+\(.*RFC822\s+\{([0-9]+)\}/i);
|
|
$cc = 0;
|
|
$$message = "";
|
|
while ( $cc < $len ) {
|
|
$n = 0;
|
|
$n = read ($conn, $segment, $len - $cc);
|
|
if ( $n == 0 ) {
|
|
Log ("unable to read $len bytes");
|
|
return 0;
|
|
}
|
|
$$message .= $segment;
|
|
$cc += $n;
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
sub selectMbx {
|
|
|
|
my $mbx = shift;
|
|
my $conn = shift;
|
|
my $msgcount;
|
|
|
|
# Select the mailbox
|
|
|
|
sendCommand( $conn, "1 SELECT \"$mbx\"");
|
|
while ( 1 ) {
|
|
readResponse( $conn );
|
|
if ( $response =~ /^1 OK/i ) {
|
|
last;
|
|
} elsif ( $response =~ /\* (.+) EXISTS/ ) {
|
|
$msgcount = $1;
|
|
} elsif ( $response =~ /^1 NO|^1 BAD|^\* BYE/i ) {
|
|
Log("Unexpected response to SELECT $mbx command: $response");
|
|
last;
|
|
}
|
|
}
|
|
return $msgcount;
|
|
|
|
}
|
|
|
|
sub namespace {
|
|
|
|
my $conn = shift;
|
|
my $prefix = shift;
|
|
my $delimiter = shift;
|
|
my $mbx_delim = shift;
|
|
|
|
# Query the server with NAMESPACE so we can determine its
|
|
# mailbox prefix (if any) and hierachy delimiter.
|
|
|
|
if ( $mbx_delim ) {
|
|
# The user has supplied a mbx delimiter and optionally a prefix.
|
|
Log("Using user-supplied mailbox hierarchy delimiter $mbx_delim");
|
|
($$delimiter,$$prefix) = split(/\s+/, $mbx_delim);
|
|
return;
|
|
}
|
|
|
|
@response = ();
|
|
sendCommand( $conn, "1 NAMESPACE");
|
|
while ( 1 ) {
|
|
readResponse( $conn );
|
|
if ( $response =~ /^1 OK/i ) {
|
|
last;
|
|
} elsif ( $response =~ /^1 NO|^1 BAD|^\* BYE/i ) {
|
|
Log("Unexpected response to NAMESPACE command: $response");
|
|
last;
|
|
}
|
|
}
|
|
|
|
foreach $_ ( @response ) {
|
|
if ( /NAMESPACE/i ) {
|
|
my $i = index( $_, '((' );
|
|
my $j = index( $_, '))' );
|
|
my $val = substr($_,$i+2,$j-$i-3);
|
|
($val) = split(/\)/, $val);
|
|
($$prefix,$$delimiter) = split( / /, $val );
|
|
$$prefix =~ s/"//g;
|
|
$$delimiter =~ s/"//g;
|
|
|
|
# Experimental
|
|
if ( $public_mbxs ) {
|
|
# Figure out the public mailbox settings
|
|
/\(\((.+)\)\)\s+\(\((.+)\s+\(\((.+)\)\)/;
|
|
$public = $3;
|
|
$public =~ /"(.+)"\s+"(.+)"/;
|
|
$src_public_prefix = $1 if $conn eq $src;
|
|
$src_public_delim = $2 if $conn eq $src;
|
|
$dst_public_prefix = $1 if $conn eq $dst;
|
|
$dst_public_delim = $2 if $conn eq $dst;
|
|
}
|
|
last;
|
|
}
|
|
last if /^1 NO|^1 BAD|^\* BYE/;
|
|
}
|
|
|
|
unless ( $$delimiter ) {
|
|
# NAMESPACE command is not supported by the server
|
|
# so we will have to figure it out another way.
|
|
$delim = getDelimiter( $conn );
|
|
$$delimiter = $delim;
|
|
$$prefix = '';
|
|
}
|
|
|
|
if ( $debug ) {
|
|
Log("prefix >$$prefix<");
|
|
Log("delim >$$delimiter<");
|
|
}
|
|
}
|
|
|
|
sub delete_duplicate_msgs {
|
|
|
|
my $output_file = shift;
|
|
my $deletes = shift;
|
|
my $conn = shift;
|
|
my $uid;
|
|
|
|
`sort $output_file > $sorted_file`;
|
|
if ( !open(S, "<$sorted_file")) {
|
|
Log("Fatal error: can't open $sorted_file: $!");
|
|
exit;
|
|
}
|
|
|
|
my $previous;
|
|
%$deletes = () unless $global;
|
|
|
|
while( <S> ) {
|
|
chomp;
|
|
($key,$uid) = split(/\|\|\|/, $_);
|
|
if ( $debug ) {
|
|
Log(" key = $key uid = $uid");
|
|
Log(" prev = $previous uid = $uid");
|
|
}
|
|
|
|
if ( $key eq $previous ) {
|
|
Log(" Found a duplicate, delete uid $uid") if $debug;
|
|
($uid,$mbx) = split(/ /, $uid, 2);
|
|
$$deletes{"$mbx"} .= "$uid,";
|
|
}
|
|
$previous = $key;
|
|
|
|
}
|
|
|
|
close S;
|
|
unlink $output_file if -e $output_file;
|
|
unlink $sorted_file if -e $sorted_file;
|
|
|
|
if ( !$purge ) {
|
|
$n = keys %$deletes;
|
|
Log(" Would have deleted $n duplicates") if $n != 0;
|
|
return;
|
|
}
|
|
|
|
my $mbxlist;
|
|
while(($mbx,$msglist) = each( %$deletes ) ) {
|
|
$msglist =~ s/,$//;
|
|
Log(" Duplicate messages $msglist");
|
|
selectMbx( $mbx, $conn);
|
|
my @deletes = split(/,/, $msglist);
|
|
my $n = scalar @deletes;
|
|
Log(" Deleting $n duplicates");
|
|
Log(" Deleting in batches of 500") if $n > 499;
|
|
|
|
$i=0;
|
|
my $msglist;
|
|
foreach $_ ( @deletes ) {
|
|
$i++;
|
|
$total++;
|
|
$msglist .= "$_,";
|
|
if ( $i == 500 or $i == scalar @deletes ) {
|
|
chop $msglist;
|
|
Log(" Expunging $i messages") if $debug;
|
|
delete_duplicates( $msglist, $mbx, $conn );
|
|
|
|
$msglist = '';
|
|
$i=0;
|
|
}
|
|
}
|
|
}
|
|
}
|