diff --git a/ChangeLog b/ChangeLog index 25483ee..e85d190 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,17 +1,24 @@ RCS file: RCS/imapsync,v Working file: imapsync -head: 1.343 +head: 1.344 branch: locks: strict - gilles: 1.343 + gilles: 1.344 access list: symbolic names: keyword substitution: kv -total revisions: 343; selected revisions: 343 +total revisions: 344; selected revisions: 344 description: ---------------------------- -revision 1.343 locked by: gilles; +revision 1.344 locked by: gilles; +date: 2010/08/20 02:06:13; author: gilles; state: Exp; lines: +123 -49 +Added function fetch_hash_2() allowing a list of uid to be fetched. +Changed fetch_hash() calls by fetch_hash_2() calls +This fixed the bug "Be more effiscient with large mailboxes", +search ALL was useless with --maxage or --minage in fetch_hash() +---------------------------- +revision 1.343 date: 2010/08/15 18:02:11; author: gilles; state: Exp; lines: +9 -7 Fix. bug in _read_line sysread can return undef; ---------------------------- diff --git a/README b/README index 8f9cbb0..e31ce97 100644 --- a/README +++ b/README @@ -3,7 +3,7 @@ NAME Synchronise mailboxes between two imap servers. Good at IMAP migration. More than 36 different IMAP server softwares supported with success. - $Revision: 1.343 $ + $Revision: 1.344 $ SYNOPSIS To synchronise imap account "foo" on "imap.truc.org" to imap account @@ -87,7 +87,7 @@ USAGE [--pidfile ] [--tmpdir ] [--version] [--help] - + DESCRIPTION The command imapsync is a tool allowing incremental and recursive imap transfer from one mailbox to another. @@ -400,5 +400,5 @@ SIMILAR SOFTWARES Feedback (good or bad) will often be welcome. - $Id: imapsync,v 1.343 2010/08/15 18:02:11 gilles Exp gilles $ + $Id: imapsync,v 1.344 2010/08/20 02:06:13 gilles Exp gilles $ diff --git a/TIME b/TIME index 45033e6..5ee82a9 100644 --- a/TIME +++ b/TIME @@ -1,3 +1,4 @@ +150 Various. Upload 1.342 75 Fix. capability issue after authenticate. 240 Various 50 3 replies on list. diff --git a/TODO b/TODO index 9e9e4d2..d94e1a1 100644 --- a/TODO +++ b/TODO @@ -1,5 +1,5 @@ #!/bin/cat -# $Id: TODO,v 1.81 2010/08/15 11:15:42 gilles Exp gilles $ +# $Id: TODO,v 1.83 2010/08/20 02:12:05 gilles Exp gilles $ TODO file for imapsync ---------------------- @@ -22,9 +22,7 @@ http://www.rackspace.com/apps/email_hosting/migrations http://www.yippiemove.com/ -Be more effiscient with large mailboxes -Write a Mail::IMAPClient::fetch_hash allowing selecting messages to fetch -4 hours estimated time coding. +Add --noauthmd51 --noauthmd52 to permit noauthmd5 by host Add a well described problem for each problem detected and counted in error counter statistics. @@ -44,6 +42,9 @@ Not everything but flag synchronization will be nice" Add sync imap keywords. Sync gmail labels to imap keyword http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=503159 +http://www.linux-france.org/prj/imapsync_list/msg00022.html +http://mail.google.com/support/bin/answer.py?hl=en&answer=77657 +http://mail.google.com/support/bin/answer.py?answer=78892 Add and option to sync to & from files. @@ -157,6 +158,10 @@ Explain expunge behavior. =========================================================================== +DONE.Be more effiscient with large mailboxes +Write a Mail::IMAPClient::fetch_hash allowing selecting messages to fetch +4 hours estimated time coding. Time spent 4h30 (with public release and emails) + DONE. Bugfix. Duplicate messages on host2 are not deleted with --delete2 Reason: "Skipping msg #120:508 in host2 folder INBOX.2005-INBOX (duplicate so we ignore this message)" diff --git a/VERSION b/VERSION index 6533362..25f7daf 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.343 +1.344 diff --git a/imapsync b/imapsync index eb7e181..4710342 100755 --- a/imapsync +++ b/imapsync @@ -19,7 +19,7 @@ tool. Synchronise mailboxes between two imap servers. Good at IMAP migration. More than 36 different IMAP server softwares supported with success. -$Revision: 1.343 $ +$Revision: 1.344 $ =head1 SYNOPSIS @@ -469,7 +469,7 @@ Entries for imapsync: Feedback (good or bad) will often be welcome. -$Id: imapsync,v 1.343 2010/08/15 18:02:11 gilles Exp gilles $ +$Id: imapsync,v 1.344 2010/08/20 02:06:13 gilles Exp gilles $ =cut @@ -557,7 +557,7 @@ my( # global variables initialisation -$rcs = '$Id: imapsync,v 1.343 2010/08/15 18:02:11 gilles Exp gilles $ '; +$rcs = '$Id: imapsync,v 1.344 2010/08/20 02:06:13 gilles Exp gilles $ '; $total_bytes_transferred = 0; $total_bytes_skipped = 0; @@ -1176,34 +1176,24 @@ sub foldersizes { print("does not exist yet\n"); next; } - unless ($imap->select($folder)) { + unless ($imap->examine($folder)) { warn - "$side Folder $folder: Could not select: ", + "$side Folder $folder: Could not examine: ", $imap->LastError, "\n"; $nb_errors++; next; } - if (defined($maxage) or defined($minage)) { - # The pb is fetch_hash() can only be applied on ALL messages - my @msgs = select_msgs($imap); - $smess = scalar(@msgs); - foreach my $m (@msgs) { - my $s = $imap->size($m) - or warn "Could not find size of message $m: $@\n"; - $stot += $s; - } - } - else{ - my $hashref = {}; - $smess = $imap->message_count(); - unless ($smess == 0) { - #$imap->Ranges(1); - $imap->fetch_hash("RFC822.SIZE",$hashref) or die_clean("$@"); - #$imap->Ranges(0); - #print map {$hashref->{$_}->{"RFC822.SIZE"}, " "} keys %$hashref; - map {$stot += $hashref->{$_}->{"RFC822.SIZE"}} keys %$hashref; - } + + my $hash_ref = {}; + my @msgs = select_msgs($imap); + $smess = scalar(@msgs); + @$hash_ref{@msgs} = (undef); + unless ($smess == 0) { + $imap->fetch_hash_2("RFC822.SIZE",$hash_ref) or die_clean("$@"); + #print map {$hash_ref->{$_}->{"RFC822.SIZE"}, " "} keys %$hash_ref; + map {$stot += $hash_ref->{$_}->{"RFC822.SIZE"}} keys %$hash_ref; } + printf(" Size: %9s", $stot); printf(" Messages: %5s\n", $smess); $tot += $stot; @@ -1607,23 +1597,22 @@ FOLDER: foreach my $h1_fold (@h1_folders) { my %h1_hash = (); my %h2_hash = (); - #print "++++ Using cache ++++\n"; - print "++++ Host1 [$h1_fold] parsing headers ++++\n"; last FOLDER if $imap1->IsUnconnected(); last FOLDER if $imap2->IsUnconnected(); - my ($h1_heads, $h1_fir) = ({}, {}); - $h1_heads = $imap1->parse_headers([@h1_msgs], @useheader) if (@h1_msgs); + my ($h1_heads_ref, $h1_fir_ref) = ({}, {}); + $h1_heads_ref = $imap1->parse_headers([@h1_msgs], @useheader) if (@h1_msgs); $debug and print "Time headers: ", timenext(), " s\n"; last FOLDER if $imap1->IsUnconnected(); - $h1_fir = $imap1->fetch_hash("FLAGS", "INTERNALDATE", "RFC822.SIZE") + @$h1_fir_ref{@h1_msgs} = (undef); + $h1_fir_ref = $imap1->fetch_hash_2("FLAGS", "INTERNALDATE", "RFC822.SIZE", $h1_fir_ref) if (@h1_msgs); $debug and print "Time fir: ", timenext(), " s\n"; - unless ($h1_fir) { + unless ($h1_fir_ref) { warn - "Host1 Folder $h1_fold: Could not fetch_hash ", + "Host1 Folder $h1_fold: Could not fetch_hash_2 ", scalar(@h1_msgs), " msgs: ", $imap1->LastError, "\n"; $nb_errors++; next FOLDER; @@ -1633,9 +1622,9 @@ FOLDER: foreach my $h1_fold (@h1_folders) { my @h1_msgs_duplicate; foreach my $m (@h1_msgs) { - my $rc = parse_header_msg($imap1, $m, $h1_heads, $h1_fir, "F", \%h1_hash); + my $rc = parse_header_msg($imap1, $m, $h1_heads_ref, $h1_fir_ref, "F", \%h1_hash); if (! defined($rc)) { - my $h1_size = $h1_fir->{$m}->{"RFC822.SIZE"} || 0; + my $h1_size = $h1_fir_ref->{$m}->{"RFC822.SIZE"} || 0; print "+ Skipping msg #$m:$h1_size on host1 folder $h1_fold (no header so we ignore this message)\n"; $total_bytes_skipped += $h1_size; $nb_msg_skipped += 1; @@ -1644,7 +1633,7 @@ FOLDER: foreach my $h1_fold (@h1_folders) { # duplicate push(@h1_msgs_duplicate, $m); # duplicate, same id same size? - my $h1_size = $h1_fir->{$m}->{"RFC822.SIZE"} || 0; + my $h1_size = $h1_fir_ref->{$m}->{"RFC822.SIZE"} || 0; $nb_msg_skipped += 1; $h1_total_bytes_duplicate += $h1_size; $h1_nb_msg_duplicate += 1; @@ -1654,26 +1643,27 @@ FOLDER: foreach my $h1_fold (@h1_folders) { print "++++ Host2 [$h2_fold] parsing headers ++++\n"; - my ($h2_heads, $h2_fir) = ({}, {}); - $h2_heads = $imap2->parse_headers([@h2_msgs], @useheader) if (@h2_msgs); + my ($h2_heads_ref, $h2_fir_ref) = ({}, {}); + $h2_heads_ref = $imap2->parse_headers([@h2_msgs], @useheader) if (@h2_msgs); $debug and print "Time headers: ", timenext(), " s\n"; last FOLDER if $imap2->IsUnconnected(); - $h2_fir = $imap2->fetch_hash("FLAGS", "INTERNALDATE", "RFC822.SIZE") + @$h2_fir_ref{@h2_msgs} = (undef); # fetch_hash_2 can select by uid with last arg as ref + $h2_fir_ref = $imap2->fetch_hash_2("FLAGS", "INTERNALDATE", "RFC822.SIZE", $h2_fir_ref) if (@h2_msgs); $debug and print "Time fir: ", timenext(), " s\n"; last FOLDER if $imap2->IsUnconnected(); my @h2_msgs_duplicate; foreach my $m (@h2_msgs) { - my $rc = parse_header_msg($imap2, $m, $h2_heads, $h2_fir, "T", \%h2_hash); + my $rc = parse_header_msg($imap2, $m, $h2_heads_ref, $h2_fir_ref, "T", \%h2_hash); if (! defined($rc)) { - my $h2_size = $h2_fir->{$m}->{"RFC822.SIZE"} || 0; + my $h2_size = $h2_fir_ref->{$m}->{"RFC822.SIZE"} || 0; print "+ Skipping msg #$m:$h2_size in host2 folder $h2_fold (no header so we ignore this message)\n"; $h2_nb_msg_noheader += 1 ; } elsif(0 == $rc) { # duplicate - my $h2_size = $h2_fir->{$m}->{"RFC822.SIZE"} || 0; + my $h2_size = $h2_fir_ref->{$m}->{"RFC822.SIZE"} || 0; $h2_nb_msg_duplicate += 1; $h2_total_bytes_duplicate += $h2_size; push(@h2_msgs_duplicate, $m); @@ -2096,7 +2086,7 @@ exit_clean(0); # subroutines sub imapsync_version { - my $rcs = '$Id: imapsync,v 1.343 2010/08/15 18:02:11 gilles Exp gilles $ '; + my $rcs = '$Id: imapsync,v 1.344 2010/08/20 02:06:13 gilles Exp gilles $ '; $rcs =~ m/,v (\d+\.\d+)/; my $VERSION = ($1) ? $1: "UNKNOWN"; return($VERSION); @@ -2180,8 +2170,8 @@ sub banner_imapsync { my @argv_copy = @_; my $banner_imapsync = join("", '$RCSfile: imapsync,v $ ', - '$Revision: 1.343 $ ', - '$Date: 2010/08/15 18:02:11 $ ', + '$Revision: 1.344 $ ', + '$Date: 2010/08/20 02:06:13 $ ', "\n",localhost_info(), "\n", "Command line used:\n", "$0 ", command_line_nopassword(@argv_copy), "\n", @@ -2927,12 +2917,12 @@ use constant NonFolderArg => 1; # Value to pass to Massage to s/([\( ])FAST([\) ])/${1}FLAGS INTERNALDATE RFC822\.SIZE$2/i ; s/([\( ])FULL([\) ])/${1}FLAGS INTERNALDATE RFC822\.SIZE ENVELOPE BODY$2/i ; } - my $msgref_all = scalar($self->messages); - my $split = $self->Split() || scalar(@$msgref_all); - while(my @msgs = splice(@$msgref_all, 0, $split)) { + my $msgs_ref_all = scalar($self->messages); + my $split = $self->Split() || scalar(@$msgs_ref_all); + while(my @msgs = splice(@$msgs_ref_all, 0, $split)) { #print "SPLIT: @msgs\n"; - my $msgref = \@msgs; - my $output = scalar($self->fetch($msgref,"(" . join(" ",@_) . ")")) + my $msgs_ref = \@msgs; + my $output = scalar($self->fetch($msgs_ref,"(" . join(" ",@_) . ")")) ; # unless grep(/\b(?:FAST|FULL)\b/i,@words); my $x; for ($x = 0; $x <= $#$output ; $x++) { @@ -3920,3 +3910,87 @@ sub capability_update { $self->capability; } +sub fetch_hash_2 { + # taken from above *Mail::IMAPClient::fetch_hash + # if last arg is a ref then the fetch is done only + # on the messages listed as the keys of this hash. + # Init an "empty" $hash_ref by value can be done this way: + # @$hash_ref{2, 3, 4, 55} = (undef); + + my $self = shift; + my $hash_ref = ref($_[-1]) ? pop @_ : {}; + my @words = @_; + for (@words) { + s/([\( ])FAST([\) ])/${1}FLAGS INTERNALDATE RFC822\.SIZE$2/i ; + s/([\( ])FULL([\) ])/${1}FLAGS INTERNALDATE RFC822\.SIZE ENVELOPE BODY$2/i ; + } + + my $msgs_ref_all; + if (scalar %$hash_ref) { + $msgs_ref_all = [ sort { $a <=> $b } keys (%$hash_ref) ]; + #print "ZZZZ 1 [@$msgs_ref_all]\n"; + }else{ + $msgs_ref_all = scalar($self->messages); + #print "ZZZZ 2 [@$msgs_ref_all]\n"; + } + + my $split = $self->Split() || scalar(@$msgs_ref_all); + while(my @msgs = splice(@$msgs_ref_all, 0, $split)) { + #print "SPLIT: @msgs\n"; + my $msgs_ref = \@msgs; + my $output = scalar($self->fetch($msgs_ref,"(" . join(" ",@_) . ")")) + ; # unless grep(/\b(?:FAST|FULL)\b/i,@words); + my $x; + for ($x = 0; $x <= $#$output ; $x++) { + my $entry = {}; + my $l = $output->[$x]; + if ($self->Uid) { + my($uid) = $l =~ /\((?:.* )?UID (\d+).*\)/i; + next unless $uid; + if ( defined $hash_ref->{$uid} ) { + $entry = $hash_ref->{$uid} ; + } + else { + $hash_ref->{$uid} ||= $entry; + } + } + else { + my($mid) = $l =~ /^\* (\d+) FETCH/i; + next unless $mid; + if ( defined $hash_ref->{$mid} ) { + $entry = $hash_ref->{$mid} ; + } + else { + $hash_ref->{$mid} ||= $entry; + } + } + + foreach my $w (@words) { + if ( $l =~ /\Q$w\E\s*$/i ) { + $entry->{$w} = $output->[$x+1]; + $entry->{$w} =~ s/(?:\x0a?\x0d)+$//g; + chomp $entry->{$w}; + } + else { + $l =~ /\( # open paren followed by ... + (?:.*\s)? # ...optional stuff and a space + \Q$w\E\s # escaped fetch field + (?:" # then: a dbl-quote + (\\.| # then bslashed anychar(s) or ... + [^"]+) # ... nonquote char(s) + "| # then closing quote; or ... + \( # ...an open paren + (\\.| # then bslashed anychar or ... + [^\)]*) # ... non-close-paren char + \)| # then closing paren; or ... + (\S+)) # unquoted string + (?:\s.*)? # possibly followed by space-stuff + \) # close paren + /xi; + $entry->{$w}=defined($1)?$1:defined($2)?$2:$3; + } + } + } +} + return wantarray ? %$hash_ref : $hash_ref; +} diff --git a/index.shtml b/index.shtml index d4af36c..923bda6 100644 --- a/index.shtml +++ b/index.shtml @@ -5,7 +5,7 @@ imapsync <!--#exec cmd="cat VERSION" --> - + @@ -22,11 +22,14 @@

imapsync software is a command line tool allowing incremental and recursive imap transfers from one mailbox to another, both anywhere on the internet. +

-imapsync is useful for imap account migration or imap account backup. +

imapsync is useful for imap account migration or imap account backup. +

-imapsync is not adequat for maintening two active imap accounts in synchronization -(where user plays independently on both sides). +

imapsync is not adequat for maintening two active imap accounts in synchronization +where the user plays independently on both sides. Use offlineimap +(written by John Goerzen) for this purpose.

@@ -140,13 +143,14 @@ If you really want a feature you can donate money and I'll code it.

Some features and their time/money to be done evaluation

- - - - - - - + + + + + + + +
Feature Time guessedTime spentMoney receivedMoney needed
Speedup 50% 10 hours 80 min 10 $ 300 $
Backup to files 8 hours 60 min 0 $ 240 $
--deletefolder2 3 hours 30 min 0 $ 90 $
NTLM auth 3 hours 60 min 0 $ 90 $
Win32 imapsync.exe 8 hours 520 min 0 $ 240 $
Fix capability changes 1 hour 80 min 0 $ 240 $
DONEFeature Time guessedTime spentMoney receivedMoney needed
NoSpeedup 50% 10 hours 80 min 10 $ 300 $
NoBackup to files 8 hours 60 min 0 $ 240 $
No--deletefolder2 3 hours 30 min 0 $ 90 $
NoNTLM auth 3 hours 60 min 0 $ 90 $
YesWin32 imapsync.exe 8 hours 520 min 0 $ 240 $
YesFix capability changes 1 hour 80 min 0 $ 30 $
YesLarge mailbox --maxage 4 hours 270 min 0 $ 120 $

COPYING

@@ -164,7 +168,7 @@ If you really want a feature you can donate money and I'll code it.
This document last modified
-$Id: index.shtml,v 1.18 2010/08/15 11:33:12 gilles Exp gilles $ +$Id: index.shtml,v 1.20 2010/08/20 03:14:54 gilles Exp gilles $

diff --git a/tests.sh b/tests.sh index 037e502..0c11f6a 100644 --- a/tests.sh +++ b/tests.sh @@ -1,10 +1,15 @@ #!/bin/sh -# $Id: tests.sh,v 1.111 2010/08/15 17:35:37 gilles Exp gilles $ +# $Id: tests.sh,v 1.114 2010/08/20 02:05:26 gilles Exp gilles $ -# Example: +# Example 1: # CMD_PERL='perl -I./Mail-IMAPClient-3.25/lib' sh -x tests.sh +# Example 2: +# To select which Mail-IMAPClient within arguments: +# sh -x tests.sh 2 locallocal 3 locallocal +# run locallocal() with Mail-IMAPClient-2.2.9 then +# again with Mail-IMAPClient-3.xx HOST1=${HOST1:-'localhost'} echo HOST1=$HOST1 @@ -43,6 +48,8 @@ run_test() { run_tests() { for t in "$@"; do + test X"$t" = X2 && CMD_PERL=$CMD_PERL_2xx && continue + test X"$t" = X3 && CMD_PERL=$CMD_PERL_3xx && continue test_count=`expr 1 + $test_count` run_test "$t" #sleep 1 @@ -97,6 +104,7 @@ sendtestmessage() { can_send() { test X`hostname` = X"plume" && return 0; test X`hostname` = X"vadrouille" && return 0; + test X`hostname` = X"petite" && return 0; return 1 } @@ -219,10 +227,17 @@ ll_oneemail() { --passfile1 ../../var/pass/secret.tata \ --host2 $HOST2 --user2 titi \ --passfile2 ../../var/pass/secret.titi \ - --folder INBOX.oneemail \ - --allow3xx + --folder INBOX.oneemail } +ll_few_emails() { + $CMD_PERL ./imapsync \ + --host1 $HOST1 --user1 tata \ + --passfile1 ../../var/pass/secret.tata \ + --host2 $HOST2 --user2 titi \ + --passfile2 ../../var/pass/secret.titi \ + --folder INBOX.few_emails +} ll_folderrec() { @@ -435,19 +450,25 @@ ll_noauthmd5() ll_maxage() { - if can_send; then - #echo3 Here is plume - sendtestmessage - else - : - fi - $CMD_PERL ./imapsync \ - --host1 $HOST1 --user1 tata \ - --passfile1 ../../var/pass/secret.tata \ - --host2 $HOST2 --user2 titi \ - --passfile2 ../../var/pass/secret.titi \ - --maxage 1 \ - --allow3xx + can_send && sendtestmessage + $CMD_PERL ./imapsync \ + --host1 $HOST1 --user1 tata \ + --passfile1 ../../var/pass/secret.tata \ + --host2 $HOST2 --user2 titi \ + --passfile2 ../../var/pass/secret.titi \ + --maxage 1 +} + +ll_maxage_9999() +{ +# can_send && sendtestmessage + $CMD_PERL ./imapsync \ + --host1 $HOST1 --user1 tata \ + --passfile1 ../../var/pass/secret.tata \ + --host2 $HOST2 --user2 titi \ + --passfile2 ../../var/pass/secret.titi \ + --justfoldersizes --folder INBOX \ + --maxage 9999 } @@ -1009,7 +1030,7 @@ gmail_gmail() { --ssl2 \ --user2 gilles.lamiral@gmail.com \ --passfile2 ../../var/pass/secret.gilles_gmail \ - --useheader 'Message-Id' --skipsize \ + --useheader 'Message-Id' --useheader="X-Gmail-Received" \ --regextrans2 's¤INBOX¤inbox_copy¤' \ --folder INBOX \ --authmech1 LOGIN --authmech2 LOGIN \ @@ -1361,6 +1382,7 @@ ll_ask_password ll_bug_folder_name_with_blank ll_timeout ll_folder +ll_oneemail ll_buffersize ll_justfolders ll_prefix12 @@ -1425,6 +1447,7 @@ l() { run_tests perl_syntax + # All tests test $# -eq 0 && run_tests $mandatory_tests