#!/usr/bin/perl

###################################################
# ANALYZE MAIL v1.1                               #
# (C) 2014 Domero, Groningen, NL                  #
# http://domero.nl                                #
###################################################

package anamail;

use strict;
use warnings;
use Encode qw(decode encode);
use gutil;
use Exporter;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);

$VERSION     = '1.01';
@ISA         = qw(Exporter);
@EXPORT      = ();
@EXPORT_OK   = qw(analyze);

1;

########################### CALLABLE #######################################################

sub analyze {
  my ($cont,$headeronly) = @_;
  $cont =~ s/\r//g;
  my @lines=split("\n",$cont);
  my $self={}; bless $self;
  $self->{header} = {};
  $self->{plaintext} = "";
  $self->{html} = "";
  $self->{error} = "";
  $self->{multipart} = "";
  $self->{boundary} = "";
  $self->{attach} = [];
  $self->{inline} = [];
  $self->{encoding}  = "";
  $self->{charset} = 'utf8';
  my $header=1; my $curhead; my $cnt=0; my $plain=0; my $boundary; my $multipart; my $mpboundary;
  my $ln=0; my $tl=$#lines;
  while ($ln<=$tl) {
    my $line=$lines[$ln];  
    if (length($line)==0) {
      $self->decode_subject;
      if ($headeronly) { return $self }
      $self->getcontenttype;
      my $body=join("\n",@lines[$ln+1..$tl]);
      my $decoded;
      if ($self->{encoding} eq 'Q') {
        $decoded=$self->decode_quotedprintable(\$body)
      } elsif ($self->{encoding} eq 'B') {
        $decoded=$self->decode_base64(\$body)
      } else {
        $decoded=\$body
      }
      $body=decode($self->{charset},${$decoded});
      if ($self->{isplain}) {
        $self->{plaintext}=$body; return $self
      }  
      if ($self->{ishtml}) {
        $self->{html}=$body; return $self
      }
      if ($self->{boundary}) {
        my @bl=split(/--$self->{boundary}/,$body);
        foreach my $block (@bl) {
          $self->analyzeblock($block);          
        }
        return $self
      }
      $self->analyzeblock($body)
    }
    else {
      my $fc=ord(substr($line,0,1));
      if (($fc == 9) || ($fc == 32)) {
        $line =~ s/^[\t\s]+//;
        $self->{header}{$curhead}.="; $line"
      } else {
        my @vl; ($curhead,@vl) = split(/: /,$line);
        $curhead=lc($curhead); my $val=join(": ",@vl);
        $self->{header}{$curhead}=$val;
      }
    }
    $ln++
  }
  return $self
}

########################## INTERNAL ####################################################

sub analyzeblock {
  my ($self,$block) = @_;
  if (!$block || ($block eq '--')) { return }
  $block =~ s/^\n//; $block =~ s/\n$//;
#  print "BLOCK\n$block\n";  
  my @blines=split(/\n/,$block);
  my %header=(); my $first=1;
  if ($blines[0] =~ /content-type: multipart\/([a-z-]+)/i) {
    my $multipart=lc($1); my $boundary;
    if ($blines[0] =~ /boundary=\"(.+)\"/i) {
      $boundary=$1;
    } elsif ($blines[0] =~ /boundary=([a-z0-9]+)/i) {
      $boundary=$1;
    } elsif ($blines[1] =~ /boundary=\"(.+)\"/i) {
      $boundary=$1; $first=2;
    } elsif ($blines[1] =~ /boundary=([a-z0-9]+)/i) {
      $boundary=$1; $first=2;
    } else {
      $self->{error}="No boundary given in multipart data"; return $self
    }
    my $body=join("\n",@blines[$first..$#blines]);
    my @bl=split(/--$boundary/,$body);
    foreach my $block (@bl) {
      $self->analyzeblock($block);          
    }
  } else {
    if (!$blines[0]) {
      $self->{plaintext}.=join("\n",@blines[1..$#blines]); return $self
    }
    my %header=();
    my $lnr=0; my $curhead; my $tln=$#blines;
    while ($blines[$lnr] && ($lnr<=$tln)) {
      my $line=$blines[$lnr];
      my $fc=ord(substr($line,0,1));
      if (($fc == 9) || ($fc == 32)) {
        $line =~ s/^[\t\s]+//;
        $header{$curhead}.="; $line"
      } else {
        my @vl; ($curhead,@vl) = split(/: /,$line);
        $curhead=lc($curhead); my $val=join(": ",@vl);
        $header{$curhead}=$val;
      }
      $lnr++
    }    
    my $data=join("\n",@blines[$lnr+1..$tln]); my $decoded;
    my $class; my $type; my $charset='utf8';
    if ($header{'content-type'} =~ /(^[a-z-]+)\/([a-z.+-]+)/i) {
      $class=$1; $type=$2;
      if ($header{'content-type'} =~ /charset\s?\=\s?(.+)/i) {
        $charset=$1; $charset =~ s/\"//g;
      }
    }
    if ($header{'content-transfer-encoding'} =~ /base64/i) {
      $decoded=$self->decode_base64(\$data);
    } elsif ($header{'content-transfer-encoding'} =~ /quoted-printable/i) {
      $decoded=$self->decode_quotedprintable(\$data)
    } else {
      $decoded=\$data
    }
    if (($header{'content-disposition'} eq 'attachment') || ($header{'content-disposition'} eq 'inline')) {
      $header{data}=$decoded;
      if ($header{'content-disposition'} eq 'inline') {
        push @{$self->{inline}},\%header
      } else {
        push @{$self->{attach}},\%header;
      }
      return $self
    }
    $data=decode($charset,${$decoded});
    if ($type =~ /html/i) {
      $self->{html}.=$data
    } else {
      $self->{plaintext}.=$data
    }
  }  
  return $self
}

sub getcontenttype {
  my ($self) = @_;
  my $ct=$self->{header}{'content-type'};
  if (!$ct) { $self->{isplain}=1; return }
  if ($ct =~ /multipart\/([a-z-]+)/i) {
    $self->{multipart} = lc($1);
    if ($ct =~ /boundary=\"(.+)\"/i) {
      $self->{boundary} = $1;      
    } elsif ($ct =~ /boundary=([a-z0-9]+)/i) {
      $self->{boundary} = $1;
    } else {
      $self->{error}="No boundary given in multipart data"
    }
  } elsif ($ct =~ /text\/plain/i) {
    if ($ct =~ /charset=(.+)/i) {
      $self->{charset}=$1; $self->{charset} =~ s/\"//g;
    }
    $self->{isplain}=1  
  } elsif ($ct =~ /text\/html/i) {
    $self->{ishtml}=1
  }
  if ($self->{header}{'content-transfer-encoding'} =~ /base64/i) {
    $self->{encoding}='B'
  } elsif ($self->{header}{'content-transfer-encoding'} =~ /quoted-printable/i) {
    $self->{encoding}='Q'
  }  
}

sub decode_base64 {
  my ($self,$data) = @_;
  return gutil::decode_base64($data)
}

sub decode_quotedprintable {
  my ($self,$data) = @_;
  return gutil::decode_quotedprintable($data)
}

sub decode_subject {
  # RFC 1342
  my ($self) = @_;
  my $code = $self->{header}{subject};
  if ($code =~ /^\=\?.*\?\=/) {
    # subject is encoded
    my @decoded=();
    my @blocks=split(/ /,$code);
    foreach my $block (@blocks) {
      if ($block =~ /^\=\?(.+)\?([BQ])\?(.+)\?\=\;?$/) {
        my $charset=$1; my $encoding=$2; my $data=$3; my $text;
        if ($encoding eq 'B') {
          $text=$self->decode_base64(\$data)
        } else {
          $text=$self->decode_quotedprintable(\$data)
        }
        push @decoded,decode($charset,${$text})
      } else {
        push @decoded,$block
      }
    }
    $self->{header}{subject}=join("",@decoded)
  }
}