#!/usr/bin/perl # evMbox2maildir # # author: Jacob Barde # date: 12 Sep 2003 # desc: Converts Ximian Evolution's mboxes into maildir # keywords: Ximian, Evolution, mbox, Maildir, convert, conversion, # IMAP, parse, evolution2maildir, evMbox2maildir, # ev2maildir # # Copyright (c) 2003 Jacob Barde # Released under the GNU General Public License (GPL) # # # This work is based on Bruce Guenter and Russ Nelson's mbox2maildir scripts. # See the THANKS TO section. # # NO GUARANTEE AT ALL # # See CHANGELOG (at bottom) for release information. #### Includes / Use #### use Getopt::Long qw(:config bundling no_ignore_case); use Pod::Usage; #### Process Options #### @ARGS = (); @OTHERFOLDERS = (); $DO_DELETED = $IMAPSUB = $MAN = $OLDASREAD = 0; $PROCESSED_MESS = $RECURSIVE = $SKIPPED_MESS = $USAGE = 0; $QUIET = $VERBOSE = 0; $IMAPDIR = 0; undef $XSTATUS; undef $XEVO; undef $EVLocal; undef $MAILDIR; undef $MBOX; $DEBUG = 0; GetOptions( 'h|help|?|usage' => \$USAGE, 'changelog' => \&echoChangeLog, 'd|deleted' => \$DO_DELETED, 'e|evlocal=s' => \$EVLocal, 'E|xevolution' => \$XEVO, 'f|folder=s' => \@OTHERFOLDERS, 'i|imapdir!' => \$IMAPDIR, 'm|man' => \$MAN, 'o|old' => \$OLDASREAD, 'q|quiet' => \$QUIET, 'r|recursive' => \$RECURSIVE, 's|subscription' => \$IMAPSUB, 'S|status' => \$XSTATUS, 'v+' => \$VERBOSE, 'verbose=i' => \$VERBOSE, 'V|version' => \&echoVersion, '<>' => \&pushArgs ) or pod2usage(2); # Get the maildir and/or mbox arguments if($#ARGS == 1) { $MBOX = $ARGS[0]; $MAILDIR = $ARGS[1]; } elsif($#ARGS == 0) { $MAILDIR = $ARGS[0]; } # Set the default if nothing is specified for for EVLocal if(!defined($MBOX) && !defined($EVLocal)) { $EVLocal = "$ENV{'HOME'}/evolution/local"; } #### Do Help Pages If Necessary #### if(!defined($MAILDIR) && !$DEBUG) { $USAGE = 1; } # For preferences of either of the headers # Both can't be defined, print 'em the usage # otherwise set according to their wishes if(defined($XEVO) && defined($XSTATUS)) { $USAGE = 1; } elsif(defined($XEVO) && $XEVO) { $XSTATUS = 0; } elsif(defined($XSTATUS) && $XSTATUS) { $XSTATUS = 1; } # Make sure that there is an Inbox subdirectory in the # $EVLocal they specified. if(defined($EVLocal) && ! -d "$EVLocal/Inbox") { &error("The location, '$EVLocal' for Evolution mail folders " . "(usually '\$HOME/evolution/local') does not exist " . "or does not contain an 'Inbox' folder."); } elsif(defined($MBOX) && ! -e $MBOX) # if the mbox doesn't exist... error out { &error("The mbox you specified, '$MBOX', does not exist."); } # if it is found their arguments or options leaving something to be # desired, give 'em the usage or man page pod2usage(1) if $USAGE && ! $MAN; pod2usage(-exitstatus => 0, -verbose => 2) if $MAN; # Don't continue if the usage or man pages had to be called if($USAGE || $MAN) { exit(0); } #### VERBOSITY LEVELS #### # If debugging, be verbose if($DEBUG) { $VERBOSE = 2; } # If quiet, suppress everything if($QUIET) { open(STDOUT, ">/dev/null"); open(STDERR, ">/dev/null"); } print "Verbosity level: $VERBOSE\n" if $VERBOSE; #### BEGIN MAIN #### # remove trailing slash $EVLocal =~ s/\/$//; @mboxes = (); $mbox_count = 0; # First check on how this was called. Accordingly, add mboxes to the # @mboxes queue. # This is recursive folder processing if(!defined($MBOX) && -d "$EVLocal/Inbox" && $RECURSIVE) { print "Finding the mbox files (recursive)... " if $VERBOSE; push(@mboxes, sort(`find "$EVLocal/Inbox" -iname mbox`)); $mbox_count = $#mboxes+1; print "done.\n\t", $mbox_count, " mboxes found.\n" if $VERBOSE; } # add a single mbox to the queue for processing elsif((defined($MBOX) || -d "$EVLocal/Inbox") && ! $RECURSIVE) { print "Finding the mbox file(s) (non-recursive)... " if $VERBOSE; if (!defined($MBOX)) { push(@mboxes,"$EVLocal/Inbox/mbox"); } else { push(@mboxes,$MBOX); } $mbox_count = $#mboxes+1; print "done.\n\t", $mbox_count, " mboxes found.\n" if $VERBOSE; } # give them an error message else { &error("If you specify the -e or the --evlocal option, need to pass", "the Evolution directory with the Inbox folder in it (not the", "actual Inbox directory)", "\nUse the --man option for the man page\n"); } # If other folders were specified, add them to the queue too. if($#OTHERFOLDERS > -1) { print "Finding the *other folders* that you specified... " if $VERBOSE; foreach $o (@OTHERFOLDERS) { if($RECURSIVE) { push(@mboxes, sort(`find "$EVLocal/$o" -iname mbox`)); } else { push(@mboxes, "$EVLocal/$o"); } } print "done.\n\t", $#mboxes+1-$mbox_count, " mboxes found\n" if $VERBOSE; $mbox_count = $#mboxes+1; } print "Total mboxes to process: $mbox_count\n" if $VERBOSE; print "Ahh, you're doing IMAP named maildirs, a fine choice!\n" if $VERBOSE && $IMAPDIR && !defined($MBOX); # See where the script was called from in case of specifying a # subdirectory of present directory for an maildir $CWD = `pwd`; chomp($CWD); # Create the Maildir specified if it doesn't exist. Bomb out if it # can't be created. -d $MAILDIR || mkdir $MAILDIR,0700 || &error("maildir '$MAILDIR' doesn't exist and can't be created."); chown($uid,$gid,$MAILDIR) if defined($uid) && defined($gid); chdir($MAILDIR) || &error("fatal: unable to chdir to $MAILDIR."); # Here where we process all the mboxes in the queue foreach $mbox (@mboxes) { chomp($mbox); print "\t$count messages converted.\n" if (defined($count)); $count = 0 if $VERBOSE > 1; print "\nProcessing '$mbox'...\n" if $VERBOSE > 1; $foldername = $mbox; # Get rid of the usual evolution garbage $foldername =~ s/^$EVLocal// if defined($EVLocal); $foldername =~ s/subfolders\///g if defined($EVLocal); $foldername =~ s/\/mbox$//; $foldername =~ s/^\///; # error out if we can't open the current $mbox &error("can't open mbox '$mbox'") unless open(SPOOL, $mbox); chdir("$CWD"); # back to starting directory in case Maildir was # created there as a subdirectory chdir("$MAILDIR"); # for all directories, except the actual Inbox, strip the name # and name according the preference, then create them if($foldername !~ /Inbox$/ && !defined($MBOX)) { $foldername =~ s/^Inbox\///; if($IMAPDIR) { $foldername =~ s/\//\./g; $foldername = "." . $foldername; } print "\tCreating '$foldername'...\n" if $VERBOSE; `mkdir -p "$foldername"`; `touch "$foldername/maildirfolder"` if $IMAPDIR; # create the Maildir subdirs print "\tCreating tmp, new, cur subdirectories...\n" if $VERBOSE; -d "$foldername/tmp" || mkdir("$foldername/tmp",0700) || &error("unable to make tmp/ subdir"); -d "$foldername/new" || mkdir("$foldername/new",0700) || &error("unable to make new/ subdir"); -d "$foldername/cur" || mkdir("$foldername/cur",0700) || &error("unable to make cur/ subdir"); chown($uid,$gid,"$foldername/tmp","$foldername/new", "$foldername/cur") if defined($uid) && defined($gid); } # for the actual Inbox folder, create the subdirs necessary. else { -d "tmp" || mkdir("tmp",0700) || &error("unable to make tmp/ subdir"); -d "new" || mkdir("new",0700) || &error("unable to make new/ subdir"); -d "cur" || mkdir("cur",0700) || &error("unable to make cur/ subdir"); chown($uid,$gid,"tmp","new","cur") if defined($uid) && defined($gid); } # move into the directory/folder chdir($foldername); # used in naming the individual message files $stamp = time; # starting off processing the mbox file $in_header = 0; while() { if(/^From /) { open_msg($flags, \@header) if $in_header; undef $flags; undef @header; $in_header = 1; push @header, "MBOX-Line: $_"; } elsif($in_header) { # hit a blank line, must be end of message header if(/^\s+$/o) { $in_header = 0; open_msg($flags, \@header); } else { # unless X-Status header checking is explicitly turned off # by using the -E option (only use X-Evolution), this will # be used to determine message status if((!defined($XSTATUS) || $XSTATUS) && /^(X-)?Status:/oi) { $flags .= $2 if /^(X-)?Status:\s+(\S+)/oi; } # For those interested in the details of the X-Evolution # header, see the X-EVOLUTION HEADER section of the man # page (--man) # Unless this is turned off by using the -S option (only # use [X-]Status), this will be used to determin message # status if(!$XSTATUS && /^X-Evolution:/oi) { my $b = $1 if(/^X\-Evolution:\s+\w{8}\-(\w{4})/oi); $b = pack("H4", $b); $b = unpack("B32", $b); #unpack the hex into bit string # only the last six bits are used. Though in my own # personal use, about 0.15% of my messages had a # seventh bit activated. This bit is not documented # as yet. If you know what it does, email me. # See X-EVOLUTION HEADER section. # message flags... $flags .= "T" if($b =~ /[01]{13}1[01]{2}/); #draft $flags .= "F" if($b =~ /[01]{12}1[01]{3}/); #flagged $flags .= "A" if($b =~ /[01]{15}1/); #replied/answered $flags .= "R" if($b =~ /[01]{11}1[01]{4}/); #seen $flags .= "D" if($b =~ /[01]{14}1[01]{1}/); #trashed # Evolution 1.4 does mark passed/forwarded messages # here for possible future support # $flags .= "P" if($b =~ /[01]{}1[01]{}/); #forwarded # Evolution 1.4 does not mark messages as old # here for possible future support # $flags .= "O" if($b =~ /[01]{}1[01]{}/); #old } # Otherwise it's something else in the header. push @header, $_; } } # must be the message body else { s/^>From /From /; print OUT || &error("unable to write to new message"); } } # done with the mbox folder. close(SPOOL); # last message, finish it up. open_msg($flags, \@header) if $in_header; # close the message file close(OUT); } # print out a report &printReport() if $VERBOSE; if($QUIET) { close(STDOUT); close(STDERR); } #### END OF MAIN #### # this subroutine prints out a report of the message processed sub printReport { print <) { ($version, $details) = unpack("A10 x A*", $_); if($version =~ /^[0-9]/) { last; } } print "\n$0, version: $version\n", "\tUse --changelog for changelog information\n\n"; "\tUse --man for complete documentation\n\n"; exit(0); } # print out the changelog sub echoChangeLog { while() { print; } exit(0); } # from the IMAP compatible flag string, return the file extension/suffix # maildir message file. If Evolution enables the status of a message # as being forwarded, a 'P' (non-IMAP compatible at this time) will be # appended to the argument passed to this function. sub get_flags { $IMAPflags = shift; undef $f; # available message status settings for this message $f .= 'D' if($IMAPflags =~ /T/oi); #draft $f .= 'F' if($IMAPflags =~ /F/oi); #flagged $f .= 'P' if($IMAPflags =~ /P/oi); #passed/forwarded $f .= 'R' if($IMAPflags =~ /A/oi); #answered $f .= 'S' if($IMAPflags =~ /R/oi); #read/seen if(($IMAPflags =~ /O/oi) && $OLDASREAD && $f !~ /S/oi) #old => read { $f .= 'S'; } $f .= 'T' if($IMAPflags =~ /D/oi); #deleted if(defined($f)) { $f = ":2,$f"; } return $f; } sub open_msg { my($flags,$header) = @_; # process the IMAP-compatible message status flags $flags = get_flags($flags); #if it's a deleted (trashed) message and we aren't doing them, skip it. if($flags =~ /T/ && !$DO_DELETED) { $SKIPPED_MESS++; return ; } # name the files to where they're supposed to be (via flags) # naming convention: # TIMESTAMP(incremented by message number).PROCESS_ID.mbox[:2,FLAGS] if($flags) { $fn = "cur/$stamp.$PID.mbox" . $flags; } else { $fn = "new/$stamp.$PID.mbox"; } # increment the stamp $stamp++; # close the previous message to prepare for the next message close(OUT); # open the next message file open(OUT, ">$fn") || &error("unable to create new message"); chown ($uid,$gid,$fn) if defined($uid) && defined($gid); print OUT @$header, "\n"; $PROCESSED_MESS++; $count++; } # this subroutine will print out error messages sub error { print STDERR join("\n", @_), "\n\n"; print STDERR "Use --man option to view the manual page.\n"; exit(1); } ########################################################################### # END OF PROGRAM, begin the POD documentation ########################################################################### =head1 NAME B =head1 SYNOPSIS B [?defghimoqrsuvV] [E|S] [I] I =head1 DESCRIPTION This program converts an Evolution mbox (or a normal mbox) into Maildir format. Can also operate recursively and use IMAP convention folder naming. See NOTE ON SUBDIRECTORY FOLDERS This is a derived work of Bruce Guenter's B, see the B section. =head1 OPTIONS =over 10 =item B<-?, -h, --help, --usage> This message. =item B<-m, --man> The man (manual) page. =item B<-d, --deleted> Use this option if you want to include messages that Evolution has marked as deleted. They stay in the mbox file until a manual expunge is initiated. =item B<-e, --evlocal> I Specify Ximian Evolution's 'local' directory. For non-recursive behavior, only the I folder will be processed. This option is ignored if an I is specified at the command line. Default: $HOME/evolution/local =item B<-E, --xevolution> Use this option if you only want to use the B header for message status. By default, both B and B<[X-]Status:> headers are used. =item B<-f, --folder> I By default, only the Inbox and it's subfolders are processed. The folder names are case-sensitive (you should known this, you are running *IX, aren't you?). Use this option for each non-Inbox folders you wish to process. This option is ignored if an I is specified at the command line. Available folders: I, I, I, I See the B section. =item B<-g, --gid> I Group ID =item B<-i, --imapdir> When creating the Maildir subfolders, use Courier-IMAP's naming convention. E.g.: instead of Friends/John_Q_Doe/, it's named, .Friends.John_Q_Doe/ This option is ignored if an I is specified at the command line. Also see the B<-s, --subscription> option (to build a subscription file) =item B<-o, --old> Treat "old" (Status: O) messages as "read" messages. From the previous versions of B, if a message was "read" and "old", (Status: RO) it was marked as *read* in maildir. However, if it was marked "old" (Status: O) and not "read", the message left as *new* in maildir and would be listed by some (or all) email clients as "new". So if you are the type to skip over SPAM and unwanted email, and don't want it set as new, use this. =item B<-q, --quiet> No verbosity, no progress, no error messages. NOTHING! =item B<-r, --recursive> Recursively process folders (and subfolders). Default: Inbox and it's subfolders. To process non-Inbox folders, see B<--otherfolders> This option is ignored if an I is specified at the command line. =item B<-s, --subscription> Create a Courier IMAP subscription file. NOT YET IMPLIMENTED =item B<-S, --status> Use this option if you only want to use the B<[X-]Status:> header for message status. By default, both B and B<[X-]Status:> headers are used. =item B<-u, --uid> I User ID =item B<-v, --verbose> I Tell 'em what's going on. For more verbosity use multiple 'v's. Or simply, specify a numerical level (0 and above) with --verbose E.g. -vv, -vvv. Same two using --verbose: --verbose 2, --verbose 3. The B<-q, --quiet> option overides all verbosity. =item B<-V, --version> Prints the version of this program =back =head1 EXAMPLES # On a Gentoo system, the default Maildir is '.maildir' # this recursively converts the Inbox, Sent, and Drafts # folders into IMAP maildirs. evMbox2maildir \ --evlocal=/home/jqdoe/evolution/local \ --deleted --imapdirs --folder=Sent --folder=Drafts \ --recursive --verbose \ /home/jqdoe/.maildir # Recursively process into the ~/Maildir. evMbox2maildir.pl --evlocal ~/evolution/local \ -f Drafts -f Trash --folder Sent --old -r --verbose 3 ~/Maildir =head1 THE X-EVOLUTION HEADER Ximian Evolution used the X-Evolution header instead of [X-]Status: In the C header, there are hex strings denoting message id (XXXXXXXX) and flags (YYYY). New messages by default get flags of 0000 (0x0000). Various flag markings are accomplished thru arthmetic bit shifts on this hex string's 32-bit value. Shifts performed on the bit strings (see CVS: evolution/camel) ANSWERED = 1<<0, DELETED = 1<<1, DRAFT = 1<<2, FLAGGED = 1<<3, SEEN = 1<<4, ATTACHMENTS = 1<<5 Documented flags with their hex and binary representations Answered = 0x0001 = 0000000000000001 Deleted = 0x0002 = 0000000000000010 Draft = 0x0004 = 0000000000000100 Flagged = 0x0008 = 0000000000001000 Seen = 0x0010 = 0000000000010000 Attach. = 0x0020 = 0000000000100000 # Codes I have in my mboxes # Couldn't find documentation for the 7th bit that creates 0x0051 and # 0x0071 0x0000 : 0000000000000000 # new 0x0010 : 0000000000010000 # seen 0x0011 : 0000000000010001 # seen and replied (answered) 0x0012 : 0000000000010010 # seen and deleted 0x0014 : 0000000000010100 # seen and draft 0x0016 : 0000000000010110 # seen, draft, deleted 0x0018 : 0000000000011000 # seen and flagged 0x0020 : 0000000000100000 # attachment 0x0030 : 0000000000110000 # attachment and seen 0x0031 : 0000000000110001 # attachment, seen and replied 0x0036 : 0000000000110110 # attachment, seen, draft and deleted 0x0051 : 0000000001010001 # had 14 of these messages. here's some MIME-type info # MIME: [multipart/alternative|text/plain charset=xxxxx] # 7-bit text US-ASCII # Think these were from hotmail or other web-based accounts 0x0071 : 0000000001110001 # only had 1 of these messages. again, here's MIME-type info # MIME: multipart/mixed # 7-bit text ISO-8859-1 # 1 text part, no other parts. Email me if you know what this 7th bit flags. =head1 THE (X-)STATUS HEADER The B or B header is used by many email clients. Evolution by default does I use these headers, though somewhere on a mailing list it was mentioned that this was an option now. Status flags: R (read), O (old), D (deleted), F (flagged), A (answered/replied), T (draft). Or so at least UW-IMAP uses these flags. =head1 NOTE ON SUBDIRECTORY FOLDERS Haven't found anywhere that really addresses the propper naming conventions of subdirectory folders. Unless B<--imapdir> is specified, subdirectory folders will be created in a hierarchal fashion (E.g. $MAILDIR/Friends/Joe_Shmoe). =head1 COPYRIGHT Copyright (c) 2003 Jacob Barde Released under the GNU General Public License (GPL) L NO GUARANTEE AT ALL =head1 AUTHOR Jacob Barde =head1 THANKS TO Russell Nelson for the B mbox2maildir. Bruce Guenter for the modified mbox2maildir which this work is derived from. * Both mbox2maildir versions are in the public domain, available at L. =head1 REPORTING BUGS Report bugs to - info on the maildir format. Evolution's source code - To kill time, or to put yourself to sleep :-) L Courier-IMAP - L Ximian - L =cut ########################################################################### # TODO - email me if you'd like to see a feature ########################################################################### # # * Build a Courier-IMAP subscription file (-s). # * Contribute correct folder naming capability for X-Evolution support to # * the mb2md.pl project (Juri Haberland). ########################################################################### ########################################################################### # CHANGELOG ########################################################################### # the DATA file stream is for getting version and changelog information. # Version: ENHANCEMENT __DATA__ 1.0.2 Re-tooled options for using or not using the X-Evolution: or [X-]Status: headers support (-E). Only -E or -S may be used at runtime. 1.0.1 Optimized some code that was sloppily conceived of due to fatigue. :-) 1.0 INITIAL PUBLIC RELEASE. 8 September 2003 0.4.1 Created a lot more documentation. Contributed X-Evolution header parsing to mb2md.pl project (original by Robin Whittle) now maintained by Juri Haberland. 0.4 Specifying non-Inbox subdirectory folders (-f). 0.3.1 Correct processing of deleted (unexpunged) messages (-d). 0.3 X-Status header support (-S). Allow "old as read" support (-o). 0.2.2 Created initial documentation. Verbosity and debug (-v). Quiet (-q). 0.2.1 IMAP directory support (-i). 0.2 Rewrite correctly parsing X-Evolution header. 0.1 Basic functionality including recursive feature (-r).