1
0
mirror of https://github.com/imapsync/imapsync.git synced 2024-11-17 08:12:48 +01:00
imapsync/W/Mail-IMAPClient-3.40/lib/Mail/IMAPClient/BodyStructure.pm

577 lines
17 KiB
Perl
Raw Normal View History

2019-07-03 01:20:46 +02:00
use warnings;
use strict;
package Mail::IMAPClient::BodyStructure;
use Mail::IMAPClient::BodyStructure::Parse;
# BUG?: old code used name "HEAD" instead of "HEADER", change?
my $HEAD = "HEAD";
# my has file scope, not limited to package!
my $parser = Mail::IMAPClient::BodyStructure::Parse->new
or die "Cannot parse rules: $@\n"
. "Try remaking Mail::IMAPClient::BodyStructure::Parse.\n";
sub new {
my $class = shift;
my $bodystructure = shift;
my $self = $parser->start($bodystructure)
or return undef;
$self->{_prefix} = "";
$self->{_id} = exists $self->{bodystructure} ? $HEAD : 1;
$self->{_top} = 1;
bless $self, ref($class) || $class;
}
sub _get_thingy {
my $thingy = shift;
my $object = shift || ( ref $thingy ? $thingy : undef );
unless ( $object && ref $object ) {
warn $@ = "No argument passed to $thingy method.";
return undef;
}
unless ( UNIVERSAL::isa( $object, 'HASH' ) && exists $object->{$thingy} ) {
my $a = $thingy =~ /^[aeiou]/i ? 'an' : 'a';
my $has = ref $object eq 'HASH' ? join( ", ", keys %$object ) : '';
warn $@ =
ref($object)
. " $object does not have $a $thingy. "
. ( $has ? "It has $has" : '' );
return undef;
}
my $value = $object->{$thingy};
$value =~ s/\\ ( [\\\(\)"\x0d\x0a] )/$1/gx;
$value =~ s/^"(.*)"$/$1/;
$value;
}
BEGIN {
no strict 'refs';
foreach my $datum (
qw/ bodytype bodysubtype bodyparms bodydisp bodyid bodydesc bodyenc
bodysize bodylang envelopestruct textlines /
)
{
*$datum = sub { _get_thingy( $datum, @_ ) };
}
}
sub parts {
my $self = shift;
return wantarray ? @{ $self->{PartsList} } : $self->{PartsList}
if exists $self->{PartsList};
my @parts;
$self->{PartsList} = \@parts;
# BUG?: should this default to ($HEAD, TEXT)
unless ( exists $self->{bodystructure} ) {
$self->{PartsIndex}{1} = $self;
@parts = ( $HEAD, 1 );
return wantarray ? @parts : \@parts;
}
foreach my $p ( $self->bodystructure ) {
my $id = $p->id;
push @parts, $id;
$self->{PartsIndex}{$id} = $p;
my $type = uc $p->bodytype || '';
push @parts, "$id.$HEAD"
if $type eq 'MESSAGE';
}
wantarray ? @parts : \@parts;
}
sub bodystructure {
my $self = shift;
my $partno = 0;
my @parts;
if ( $self->{_top} ) {
$self->{_id} ||= $HEAD;
$self->{_prefix} ||= $HEAD;
$partno = 0;
foreach my $b ( @{ $self->{bodystructure} } ) {
$b->{_id} = ++$partno;
$b->{_prefix} = $partno;
push @parts, $b, $b->bodystructure;
}
return wantarray ? @parts : \@parts;
}
my $prefix = $self->{_prefix} || "";
$prefix =~ s/\.?$/./;
foreach my $p ( @{ $self->{bodystructure} } ) {
$partno++;
# BUG?: old code didn't add .TEXT sections, should we skip these?
# - This code needs to be generalised (maybe it belongs in parts()?)
# - Should every message should have HEAD (actually MIME) and TEXT?
# at least dovecot and iplanet appear to allow this even for
# non-multipart sections
my $pno = $partno;
my $stype = $self->{bodytype} || "";
my $ptype = $p->{bodytype} || "";
# a message and the multipart inside of it "collapse together"
if ( $partno == 1 and $stype eq 'MESSAGE' and $ptype eq 'MULTIPART' ) {
$pno = "TEXT";
$p->{_prefix} = "$prefix";
}
else {
$p->{_prefix} = "$prefix$partno";
}
$p->{_id} ||= "$prefix$pno";
push @parts, $p, $p->{bodystructure} ? $p->bodystructure : ();
}
wantarray ? @parts : \@parts;
}
sub id {
my $self = shift;
return $self->{_id}
if exists $self->{_id};
return $HEAD
if $self->{_top};
# BUG?: can this be removed? ... seems wrong
if ( $self->{bodytype} eq 'MULTIPART' ) {
my $p = $self->{_id} || $self->{_prefix};
$p =~ s/\.$//;
return $p;
}
else {
return $self->{_id} ||= 1;
}
}
package Mail::IMAPClient::BodyStructure::Part;
our @ISA = qw/Mail::IMAPClient::BodyStructure/;
package Mail::IMAPClient::BodyStructure::Envelope;
our @ISA = qw/Mail::IMAPClient::BodyStructure/;
sub new {
my ( $class, $envelope ) = @_;
$parser->envelope($envelope);
}
sub parse_string {
my ( $class, $envelope ) = @_;
$envelope = "(" . $envelope . ")" unless ( $envelope =~ /^\(/ );
$parser->envelopestruct($envelope);
}
sub from_addresses { shift->_addresses( from => 1 ) }
sub sender_addresses { shift->_addresses( sender => 1 ) }
sub replyto_addresses { shift->_addresses( replyto => 1 ) }
sub to_addresses { shift->_addresses( to => 0 ) }
sub cc_addresses { shift->_addresses( cc => 0 ) }
sub bcc_addresses { shift->_addresses( bcc => 0 ) }
sub _addresses($$$) {
my ( $self, $name, $isSender ) = @_;
ref $self->{$name} eq 'ARRAY'
or return ();
my @list;
foreach ( @{ $self->{$name} } ) {
my $pn = $_->personalname;
my $name = $pn && $pn ne 'NIL' ? "$pn " : '';
push @list, $name . '<' . $_->mailboxname . '@' . $_->hostname . '>';
}
wantarray ? @list
: $isSender ? $list[0]
: \@list;
}
BEGIN {
no strict 'refs';
for my $datum (
qw(subject inreplyto from messageid bcc date
replyto to sender cc)
)
{
*$datum = sub { @_ > 1 ? $_[0]->{$datum} = $_[1] : $_[0]->{$datum} }
}
}
package Mail::IMAPClient::BodyStructure::Address;
our @ISA = qw/Mail::IMAPClient::BodyStructure/;
for my $datum (qw(personalname mailboxname hostname sourcename)) {
no strict 'refs';
*$datum = sub { shift->{$datum}; };
}
1;
__END__
=head1 NAME
Mail::IMAPClient::BodyStructure - parse fetched results
=head1 SYNOPSIS
use Mail::IMAPClient;
use Mail::IMAPClient::BodyStructure;
my $imap = Mail::IMAPClient->new(
Server => $server, User => $login, Password => $pass
);
$imap->select("INBOX") or die "Could not select INBOX: $@\n";
my @recent = $imap->search("recent") or die "No recent msgs in INBOX\n";
foreach my $id (@recent) {
my $bsdat = $imap->fetch( $id, "bodystructure" );
my $bso = Mail::IMAPClient::BodyStructure->new( join("", $imap->History) );
my $mime = $bso->bodytype . "/" . $bso->bodysubtype;
my $parts = map( "\n\t" . $_, $bso->parts );
print "Msg $id (Content-type: $mime) contains these parts:$parts\n";
}
=head1 DESCRIPTION
This extension will parse the result of an IMAP FETCH BODYSTRUCTURE
command into a perl data structure. It also provides helper methods
to help pull information out of the data structure.
This module requires Parse::RecDescent.
=head1 Class Methods
The following class method is available:
=head2 new
This class method is the constructor method for instantiating new
Mail::IMAPClient::BodyStructure objects. The B<new> method accepts
one argument, a string containing a server response to a FETCH
BODYSTRUCTURE directive.
The module B<Mail::IMAPClient> provides the B<get_bodystructure>
convenience method to simplify use of this module when starting with
just a messages sequence number or unique ID (UID).
=head1 Object Methods
The following object methods are available:
=head2 bodytype
The B<bodytype> object method requires no arguments. It returns the
bodytype for the message whose structure is described by the calling
B<Mail::IMAPClient::Bodystructure> object.
=head2 bodysubtype
The B<bodysubtype> object method requires no arguments. It returns the
bodysubtype for the message whose structure is described by the calling
B<Mail::IMAPClient::Bodystructure> object.
=head2 bodyparms
The B<bodyparms> object method requires no arguments. It returns the
bodyparms for the message whose structure is described by the calling
B<Mail::IMAPClient::Bodystructure> object.
=head2 bodydisp
The B<bodydisp> object method requires no arguments. It returns the
bodydisp for the message whose structure is described by the calling
B<Mail::IMAPClient::Bodystructure> object.
=head2 bodyid
The B<bodyid> object method requires no arguments. It returns the
bodyid for the message whose structure is described by the calling
B<Mail::IMAPClient::Bodystructure> object.
=head2 bodydesc
The B<bodydesc> object method requires no arguments. It returns the
bodydesc for the message whose structure is described by the calling
B<Mail::IMAPClient::Bodystructure> object.
=head2 bodyenc
The B<bodyenc> object method requires no arguments. It returns the
bodyenc for the message whose structure is described by the calling
B<Mail::IMAPClient::Bodystructure> object.
=head2 bodysize
The B<bodysize> object method requires no arguments. It returns the
bodysize for the message whose structure is described by the calling
B<Mail::IMAPClient::Bodystructure> object.
=head2 bodylang
The B<bodylang> object method requires no arguments. It returns the
bodylang for the message whose structure is described by the calling
B<Mail::IMAPClient::Bodystructure> object.
=head2 bodystructure
The B<bodystructure> object method requires no arguments. It returns
the bodystructure for the message whose structure is described by the
calling B<Mail::IMAPClient::Bodystructure> object.
=head2 envelopestruct
The B<envelopestruct> object method requires no arguments. It returns
a B<Mail::IMAPClient::BodyStructure::Envelope> object for the message
from the calling B<Mail::IMAPClient::Bodystructure> object.
=head2 textlines
The B<textlines> object method requires no arguments. It returns the
textlines for the message whose structure is described by the calling
B<Mail::IMAPClient::Bodystructure> object.
=head1 Mail::IMAPClient::BodyStructure::Envelope
The IMAP standard specifies that output from the IMAP B<FETCH
ENVELOPE> command will be an RFC2060 envelope structure. It further
specifies that output from the B<FETCH BODYSTRUCTURE> command may also
contain embedded envelope structures (if, for example, a message's
subparts contain one or more included messages). Objects belonging to
B<Mail::IMAPClient::BodyStructure::Envelope> are Perl representations
of these envelope structures, which is to say the nested parenthetical
lists of RFC2060 translated into a Perl datastructure.
Note that all of the fields relate to the specific part to which they
belong. In other words, output from a FETCH nnnn ENVELOPE command
(or, in B<Mail::IMAPClient>, C<$imap->fetch($msgid,"ENVELOPE")> or
C<my $env = $imap->get_envelope($msgid)>) are for the message, but
fields from within a bodystructure relate to the message subpart and
not the parent message.
An envelope structure's B<Mail::IMAPClient::BodyStructure::Envelope>
representation is a hash of thingies that looks like this:
{
subject => "subject",
inreplyto => "reference_message_id",
from => [ addressStruct1 ],
messageid => "message_id",
bcc => [ addressStruct1, addressStruct2 ],
date => "Tue, 09 Jul 2002 14:15:53 -0400",
replyto => [ adressStruct1, addressStruct2 ],
to => [ adressStruct1, addressStruct2 ],
sender => [ adressStruct1 ],
cc => [ adressStruct1, addressStruct2 ],
}
The B<...::Envelope> object also has methods for accessing data in the
structure. They are:
=over 4
=item date
Returns the date of the message.
=item inreplyto
Returns the message id of the message to which this message is a reply.
=item subject
Returns the subject of the message.
=item messageid
Returns the message id of the message.
=back
You can also use the following methods to get addressing information.
Each of these methods returns an array of
B<Mail::IMAPClient::BodyStructure::Address> objects, which are perl
data structures representing RFC2060 address structures. Some of
these arrays would naturally contain one element (such as B<from>,
which normally contains a single "From:" address); others will often
contain more than one address. However, because RFC2060 defines all
of these as "lists of address structures", they are all translated
into arrays of B<...::Address> objects.
See the section on B<Mail::IMAPClient::BodyStructure::Address>, below,
for alternate (and preferred) ways of accessing these data.
The methods available are:
=over 4
=item bcc
Returns an array of blind cc'ed recipients' address structures.
(Don't expect much in here unless the message was sent from the
mailbox you're poking around in, by the way.)
=item cc
Returns an array of cc'ed recipients' address structures.
=item from
Returns an array of "From:" address structures--usually just one.
=item replyto
Returns an array of "Reply-to:" address structures. Once again there
is usually just one address in the list.
=item sender
Returns an array of senders' address structures--usually just one and
usually the same as B<from>.
=item to
Returns an array of recipients' address structures.
=back
Each of the methods that returns a list of address structures (i.e. a
list of B<Mail::IMAPClient::BodyStructure::Address> arrays) also has
an analogous method that will return a list of E-Mail addresses
instead. The addresses are in the format C<personalname
E<lt>mailboxname@hostnameE<gt>> (see the section on
B<Mail::IMAPClient::BodyStructure::Address>, below) However, if the
personal name is 'NIL' then it is omitted from the address.
These methods are:
=over 4
=item bcc_addresses
Returns a list (or an array reference if called in scalar context) of
blind cc'ed recipients' email addresses. (Don't expect much in here
unless the message was sent from the mailbox you're poking around in,
by the way.)
=item cc_addresses
Returns a list of cc'ed recipients' email addresses. If called in a
scalar context it returns a reference to an array of email addresses.
=item from_addresses
Returns a list of "From:" email addresses. If called in a scalar
context it returns the first email address in the list. (It's usually
a list of just one anyway.)
=item replyto_addresses
Returns a list of "Reply-to:" email addresses. If called in a scalar
context it returns the first email address in the list.
=item sender_addresses
Returns a list of senders' email addresses. If called in a scalar
context it returns the first email address in the list.
=item to_addresses
Returns a list of recipients' email addresses. If called in a scalar
context it returns a reference to an array of email addresses.
=back
Note that context affects the behavior of all of the above methods.
Those fields that will commonly contain multiple entries (i.e. they
are recipients) will return an array reference when called in scalar
context. You can use this behavior to optimize performance.
Those fields that will commonly contain just one address (the
sender's) will return the first (and usually only) address. You can
use this behavior to optimize your development time.
=head1 Addresses and the Mail::IMAPClient::BodyStructure::Address
Several components of an envelope structure are address structures.
They are each parsed into their own object,
B<Mail::IMAPClient::BodyStructure::Address>, which looks like this:
{
mailboxname => 'somebody.special',
hostname => 'somplace.weird.com'
personalname => 'Somebody Special
sourceroute => 'NIL'
}
RFC2060 specifies that each address component of a bodystructure is a
list of address structures, so B<Mail::IMAPClient::BodyStructure>
parses each of these into an array of
B<Mail::IMAPClient::BodyStructure::Address> objects.
Each of these objects has the following methods available to it:
=over 4
=item mailboxname
Returns the "mailboxname" portion of the address, which is the part to
the left of the '@' sign.
=item hostname
Returns the "hostname" portion of the address, which is the part to
the right of the '@' sign.
=item personalname
Returns the "personalname" portion of the address, which is the part
of the address that's treated like a comment.
=item sourceroute
Returns the "sourceroute" portion of the address, which is typically "NIL".
=back
Taken together, the parts of an address structure form an address that
will look something like this:
C<personalname E<lt>mailboxname@hostnameE<gt>>
Note that because the B<Mail::IMAPClient::BodyStructure::Address>
objects come in arrays, it's generally easier to use the methods
available to B<Mail::IMAPClient::BodyStructure::Envelope> to obtain
all of the addresses in a particular array in one operation. These
methods are provided, however, in case you'd rather do things the hard
way. (And also because the aforementioned methods from
B<Mail::IMAPClient::BodyStructure::Envelope> need them anyway.)
=cut
=head1 AUTHOR
Original author: David J. Kernen; Reworked by: Mark Overmeer;
Maintained by Phil Pearl.
=head1 SEE ALSO
perl(1), Mail::IMAPClient, Parse::RecDescent, and RFC2060.
=cut