• Skip to content
  • Skip to link menu
KDE 4.3 API Reference
  • KDE API Reference
  • KDE-PIM Libraries
  • Sitemap
  • Contact Us
 

KMIME Library

kmime_header_parsing.cpp

00001 /*  -*- c++ -*-
00002     kmime_header_parsing.cpp
00003 
00004     KMime, the KDE internet mail/usenet news message library.
00005     Copyright (c) 2001-2002 Marc Mutz <mutz@kde.org>
00006 
00007     This library is free software; you can redistribute it and/or
00008     modify it under the terms of the GNU Library General Public
00009     License as published by the Free Software Foundation; either
00010     version 2 of the License, or (at your option) any later version.
00011 
00012     This library is distributed in the hope that it will be useful,
00013     but WITHOUT ANY WARRANTY; without even the implied warranty of
00014     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015     Library General Public License for more details.
00016 
00017     You should have received a copy of the GNU Library General Public License
00018     along with this library; see the file COPYING.LIB.  If not, write to
00019     the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00020     Boston, MA 02110-1301, USA.
00021 */
00022 
00023 #include "kmime_header_parsing.h"
00024 
00025 #include "kmime_codecs.h"
00026 #include "kmime_util.h"
00027 #include "kmime_dateformatter.h"
00028 #include "kmime_warning.h"
00029 
00030 #include <kglobal.h>
00031 #include <kcharsets.h>
00032 
00033 #include <QtCore/QTextCodec>
00034 #include <QtCore/QMap>
00035 #include <QtCore/QStringList>
00036 #include <QtCore/QUrl>
00037 
00038 #include <ctype.h> // for isdigit
00039 #include <cassert>
00040 
00041 using namespace KMime;
00042 using namespace KMime::Types;
00043 
00044 namespace KMime {
00045 
00046 namespace Types {
00047 
00048 // QUrl::fromAce is extremely expensive, so only use it when necessary.
00049 // Fortunately, the presence of IDNA is readily detected with a substring match...
00050 static inline QString QUrl_fromAce_wrapper( const QString & domain )
00051 {
00052     if ( domain.contains( QLatin1String( "xn--" ) ) )
00053         return QUrl::fromAce( domain.toLatin1() );
00054     else
00055         return domain;
00056 }
00057 
00058 static QString addr_spec_as_string( const AddrSpec & as, bool pretty )
00059 {
00060   if ( as.isEmpty() ) {
00061     return QString();
00062   }
00063 
00064   bool needsQuotes = false;
00065   QString result;
00066   result.reserve( as.localPart.length() + as.domain.length() + 1 );
00067   for ( int i = 0 ; i < as.localPart.length() ; ++i ) {
00068     const char ch = as.localPart[i].toLatin1();
00069     if ( ch == '.' || isAText( ch ) ) {
00070       result += ch;
00071     } else {
00072       needsQuotes = true;
00073       if ( ch == '\\' || ch == '"' ) {
00074         result += '\\';
00075       }
00076       result += ch;
00077     }
00078   }
00079   const QString dom = pretty ? QUrl_fromAce_wrapper( as.domain ) : as.domain ;
00080   if ( needsQuotes ) {
00081     return '"' + result + "\"@" + dom;
00082   } else {
00083     return result + '@' + dom;
00084   }
00085 }
00086 
00087 QString AddrSpec::asString() const
00088 {
00089     return addr_spec_as_string( *this, false );
00090 }
00091 
00092 QString AddrSpec::asPrettyString() const
00093 {
00094     return addr_spec_as_string( *this, true );
00095 }
00096 
00097 bool AddrSpec::isEmpty() const
00098 {
00099   return localPart.isEmpty() && domain.isEmpty();
00100 }
00101 
00102 QByteArray Mailbox::address() const
00103 {
00104   return mAddrSpec.asString().toLatin1();
00105 }
00106 
00107 AddrSpec Mailbox::addrSpec() const
00108 {
00109   return mAddrSpec;
00110 }
00111 
00112 QString Mailbox::name() const
00113 {
00114   return mDisplayName;
00115 }
00116 
00117 void Mailbox::setAddress( const AddrSpec &addr )
00118 {
00119   mAddrSpec = addr;
00120 }
00121 
00122 void Mailbox::setAddress( const QByteArray &addr )
00123 {
00124   const char *cursor = addr.constData();
00125   if ( !HeaderParsing::parseAngleAddr( cursor,
00126                                        cursor + addr.length(), mAddrSpec ) ) {
00127     if ( !HeaderParsing::parseAddrSpec( cursor, cursor + addr.length(),
00128                                         mAddrSpec ) ) {
00129       kWarning() << "Invalid address";
00130       return;
00131     }
00132   }
00133 }
00134 
00135 void Mailbox::setName( const QString &name )
00136 {
00137   mDisplayName = removeBidiControlChars( name );
00138 }
00139 
00140 void Mailbox::setNameFrom7Bit( const QByteArray &name,
00141                                const QByteArray &defaultCharset )
00142 {
00143   QByteArray cs;
00144   setName( decodeRFC2047String( name, cs, defaultCharset, false ) );
00145 }
00146 
00147 bool Mailbox::hasAddress() const
00148 {
00149   return !mAddrSpec.isEmpty();
00150 }
00151 
00152 bool Mailbox::hasName() const
00153 {
00154   return !mDisplayName.isEmpty();
00155 }
00156 
00157 QString Mailbox::prettyAddress() const
00158 {
00159   if ( !hasName() ) {
00160     return address();
00161   }
00162   QString s = name();
00163   if ( hasAddress() ) {
00164     s += QLatin1String(" <") + address() + QLatin1Char('>');
00165   }
00166   return s;
00167 }
00168 
00169 void Mailbox::fromUnicodeString( const QString &s )
00170 {
00171   from7BitString( encodeRFC2047String( s, "utf-8", false ) );
00172 }
00173 
00174 void Mailbox::from7BitString( const QByteArray &s )
00175 {
00176   const char *cursor = s.constData();
00177   HeaderParsing::parseMailbox( cursor, cursor + s.length(), *this );
00178 }
00179 
00180 QByteArray KMime::Types::Mailbox::as7BitString( const QByteArray &encCharset ) const
00181 {
00182   if ( !hasName() ) {
00183     return address();
00184   }
00185   QByteArray rv;
00186   if ( isUsAscii( name() ) ) {
00187     QByteArray tmp = name().toLatin1();
00188     addQuotes( tmp, false );
00189     rv += tmp;
00190   } else {
00191     rv += encodeRFC2047String( name(), encCharset, true );
00192   }
00193   if ( hasAddress() ) {
00194     rv += " <" + address() + '>';
00195   }
00196   return rv;
00197 }
00198 
00199 } // namespace Types
00200 
00201 namespace HeaderParsing {
00202 
00203 // parse the encoded-word (scursor points to after the initial '=')
00204 bool parseEncodedWord( const char* &scursor, const char * const send,
00205                        QString &result, QByteArray &language,
00206                        QByteArray &usedCS, const QByteArray &defaultCS,
00207                        bool forceCS )
00208 {
00209   // make sure the caller already did a bit of the work.
00210   assert( *(scursor-1) == '=' );
00211 
00212   //
00213   // STEP 1:
00214   // scan for the charset/language portion of the encoded-word
00215   //
00216 
00217   char ch = *scursor++;
00218 
00219   if ( ch != '?' ) {
00220     // kDebug(5320) << "first";
00221     KMIME_WARN_PREMATURE_END_OF( EncodedWord );
00222     return false;
00223   }
00224 
00225   // remember start of charset (ie. just after the initial "=?") and
00226   // language (just after the first '*') fields:
00227   const char * charsetStart = scursor;
00228   const char * languageStart = 0;
00229 
00230   // find delimiting '?' (and the '*' separating charset and language
00231   // tags, if any):
00232   for ( ; scursor != send ; scursor++ ) {
00233     if ( *scursor == '?') {
00234       break;
00235     } else if ( *scursor == '*' && languageStart == 0 ) {
00236       languageStart = scursor + 1;
00237     }
00238   }
00239 
00240   // not found? can't be an encoded-word!
00241   if ( scursor == send || *scursor != '?' ) {
00242     // kDebug(5320) << "second";
00243     KMIME_WARN_PREMATURE_END_OF( EncodedWord );
00244     return false;
00245   }
00246 
00247   // extract the language information, if any (if languageStart is 0,
00248   // language will be null, too):
00249   QByteArray maybeLanguage( languageStart, scursor - languageStart );
00250   // extract charset information (keep in mind: the size given to the
00251   // ctor is one off due to the \0 terminator):
00252   QByteArray maybeCharset( charsetStart,
00253                            ( languageStart ? languageStart - 1 : scursor ) - charsetStart );
00254 
00255   //
00256   // STEP 2:
00257   // scan for the encoding portion of the encoded-word
00258   //
00259 
00260   // remember start of encoding (just _after_ the second '?'):
00261   scursor++;
00262   const char * encodingStart = scursor;
00263 
00264   // find next '?' (ending the encoding tag):
00265   for ( ; scursor != send ; scursor++ ) {
00266     if ( *scursor == '?' ) {
00267       break;
00268     }
00269   }
00270 
00271   // not found? Can't be an encoded-word!
00272   if ( scursor == send || *scursor != '?' ) {
00273     // kDebug(5320) << "third";
00274     KMIME_WARN_PREMATURE_END_OF( EncodedWord );
00275     return false;
00276   }
00277 
00278   // extract the encoding information:
00279   QByteArray maybeEncoding( encodingStart, scursor - encodingStart );
00280 
00281   // kDebug(5320) << "parseEncodedWord: found charset == \"" << maybeCharset
00282   //         << "\"; language == \"" << maybeLanguage
00283   //         << "\"; encoding == \"" << maybeEncoding << "\"";
00284 
00285   //
00286   // STEP 3:
00287   // scan for encoded-text portion of encoded-word
00288   //
00289 
00290   // remember start of encoded-text (just after the third '?'):
00291   scursor++;
00292   const char * encodedTextStart = scursor;
00293 
00294   // find next '?' (ending the encoded-text):
00295   for ( ; scursor != send ; scursor++ ) {
00296     if ( *scursor == '?' ) {
00297       break;
00298     }
00299   }
00300 
00301   // not found? Can't be an encoded-word!
00302   // ### maybe evaluate it nonetheless if the rest is OK?
00303   if ( scursor == send || *scursor != '?' ) {
00304     // kDebug(5320) << "fourth";
00305     KMIME_WARN_PREMATURE_END_OF( EncodedWord );
00306     return false;
00307   }
00308   scursor++;
00309   // check for trailing '=':
00310   if ( scursor == send || *scursor != '=' ) {
00311     // kDebug(5320) << "fifth";
00312     KMIME_WARN_PREMATURE_END_OF( EncodedWord );
00313     return false;
00314   }
00315   scursor++;
00316 
00317   // set end sentinel for encoded-text:
00318   const char * const encodedTextEnd = scursor - 2;
00319 
00320   //
00321   // STEP 4:
00322   // setup decoders for the transfer encoding and the charset
00323   //
00324 
00325   // try if there's a codec for the encoding found:
00326   Codec * codec = Codec::codecForName( maybeEncoding );
00327   if ( !codec ) {
00328     KMIME_WARN_UNKNOWN( Encoding, maybeEncoding );
00329     return false;
00330   }
00331 
00332   // get an instance of a corresponding decoder:
00333   Decoder * dec = codec->makeDecoder();
00334   assert( dec );
00335 
00336   // try if there's a (text)codec for the charset found:
00337   bool matchOK = false;
00338   QTextCodec *textCodec = 0;
00339   if ( forceCS || maybeCharset.isEmpty() ) {
00340     textCodec = KGlobal::charsets()->codecForName( defaultCS, matchOK );
00341     usedCS = cachedCharset( defaultCS );
00342   } else {
00343     textCodec = KGlobal::charsets()->codecForName( maybeCharset, matchOK );
00344     if ( !matchOK ) {  //no suitable codec found => use default charset
00345       textCodec = KGlobal::charsets()->codecForName( defaultCS, matchOK );
00346       usedCS = cachedCharset( defaultCS );
00347     } else {
00348       usedCS = cachedCharset( maybeCharset );
00349     }
00350   }
00351 
00352   if ( !matchOK || !textCodec ) {
00353     KMIME_WARN_UNKNOWN( Charset, maybeCharset );
00354     delete dec;
00355     return false;
00356   };
00357 
00358   // kDebug(5320) << "mimeName(): \"" << textCodec->name() << "\"";
00359 
00360   // allocate a temporary buffer to store the 8bit text:
00361   int encodedTextLength = encodedTextEnd - encodedTextStart;
00362   QByteArray buffer;
00363   buffer.resize( codec->maxDecodedSizeFor( encodedTextLength ) );
00364   char *bbegin = buffer.data();
00365   char *bend = bbegin + buffer.length();
00366 
00367   //
00368   // STEP 5:
00369   // do the actual decoding
00370   //
00371 
00372   if ( !dec->decode( encodedTextStart, encodedTextEnd, bbegin, bend ) ) {
00373     KMIME_WARN << codec->name() << "codec lies about its maxDecodedSizeFor("
00374                << encodedTextLength << ")\nresult may be truncated";
00375   }
00376 
00377   result = textCodec->toUnicode( buffer.data(), bbegin - buffer.data() );
00378 
00379   // kDebug(5320) << "result now: \"" << result << "\"";
00380   // cleanup:
00381   delete dec;
00382   language = maybeLanguage;
00383 
00384   return true;
00385 }
00386 
00387 static inline void eatWhiteSpace( const char* &scursor, const char * const send )
00388 {
00389   while ( scursor != send &&
00390           ( *scursor == ' ' || *scursor == '\n' ||
00391             *scursor == '\t' || *scursor == '\r' ) )
00392     scursor++;
00393 }
00394 
00395 bool parseAtom( const char * &scursor, const char * const send,
00396                 QString &result, bool allow8Bit )
00397 {
00398   QPair<const char*,int> maybeResult;
00399 
00400   if ( parseAtom( scursor, send, maybeResult, allow8Bit ) ) {
00401     result += QString::fromLatin1( maybeResult.first, maybeResult.second );
00402     return true;
00403   }
00404 
00405   return false;
00406 }
00407 
00408 bool parseAtom( const char * &scursor, const char * const send,
00409                 QPair<const char*,int> &result, bool allow8Bit )
00410 {
00411   bool success = false;
00412   const char *start = scursor;
00413 
00414   while ( scursor != send ) {
00415     signed char ch = *scursor++;
00416     if ( ch > 0 && isAText( ch ) ) {
00417       // AText: OK
00418       success = true;
00419     } else if ( allow8Bit && ch < 0 ) {
00420       // 8bit char: not OK, but be tolerant.
00421       KMIME_WARN_8BIT( ch );
00422       success = true;
00423     } else {
00424       // CTL or special - marking the end of the atom:
00425       // re-set sursor to point to the offending
00426       // char and return:
00427       scursor--;
00428       break;
00429     }
00430   }
00431   result.first = start;
00432   result.second = scursor - start;
00433   return success;
00434 }
00435 
00436 bool parseToken( const char * &scursor, const char * const send,
00437                  QString &result, bool allow8Bit )
00438 {
00439   QPair<const char*,int> maybeResult;
00440 
00441   if ( parseToken( scursor, send, maybeResult, allow8Bit ) ) {
00442     result += QString::fromLatin1( maybeResult.first, maybeResult.second );
00443     return true;
00444   }
00445 
00446   return false;
00447 }
00448 
00449 bool parseToken( const char * &scursor, const char * const send,
00450                  QPair<const char*,int> &result, bool allow8Bit )
00451 {
00452   bool success = false;
00453   const char * start = scursor;
00454 
00455   while ( scursor != send ) {
00456     signed char ch = *scursor++;
00457     if ( ch > 0 && isTText( ch ) ) {
00458       // TText: OK
00459       success = true;
00460     } else if ( allow8Bit && ch < 0 ) {
00461       // 8bit char: not OK, but be tolerant.
00462       KMIME_WARN_8BIT( ch );
00463       success = true;
00464     } else {
00465       // CTL or tspecial - marking the end of the atom:
00466       // re-set sursor to point to the offending
00467       // char and return:
00468       scursor--;
00469       break;
00470     }
00471   }
00472   result.first = start;
00473   result.second = scursor - start;
00474   return success;
00475 }
00476 
00477 #define READ_ch_OR_FAIL if ( scursor == send ) {        \
00478     KMIME_WARN_PREMATURE_END_OF( GenericQuotedString ); \
00479     return false;                                       \
00480   } else {                                              \
00481     ch = *scursor++;                                    \
00482   }
00483 
00484 // known issues:
00485 //
00486 // - doesn't handle quoted CRLF
00487 
00488 bool parseGenericQuotedString( const char* &scursor, const char * const send,
00489                                QString &result, bool isCRLF,
00490                                const char openChar, const char closeChar )
00491 {
00492   char ch;
00493   // We are in a quoted-string or domain-literal or comment and the
00494   // cursor points to the first char after the openChar.
00495   // We will apply unfolding and quoted-pair removal.
00496   // We return when we either encounter the end or unescaped openChar
00497   // or closeChar.
00498 
00499   assert( *(scursor-1) == openChar || *(scursor-1) == closeChar );
00500 
00501   while ( scursor != send ) {
00502     ch = *scursor++;
00503 
00504     if ( ch == closeChar || ch == openChar ) {
00505       // end of quoted-string or another opening char:
00506       // let caller decide what to do.
00507       return true;
00508     }
00509 
00510     switch( ch ) {
00511     case '\\':      // quoted-pair
00512       // misses "\" CRLF LWSP-char handling, see rfc822, 3.4.5
00513       READ_ch_OR_FAIL;
00514       KMIME_WARN_IF_8BIT( ch );
00515       result += QChar( ch );
00516       break;
00517     case '\r':
00518       // ###
00519       // The case of lonely '\r' is easy to solve, as they're
00520       // not part of Unix Line-ending conventions.
00521       // But I see a problem if we are given Unix-native
00522       // line-ending-mails, where we cannot determine anymore
00523       // whether a given '\n' was part of a CRLF or was occurring
00524       // on it's own.
00525       READ_ch_OR_FAIL;
00526       if ( ch != '\n' ) {
00527         // CR on it's own...
00528         KMIME_WARN_LONE( CR );
00529         result += QChar('\r');
00530         scursor--; // points to after the '\r' again
00531       } else {
00532         // CRLF encountered.
00533         // lookahead: check for folding
00534         READ_ch_OR_FAIL;
00535         if ( ch == ' ' || ch == '\t' ) {
00536           // correct folding;
00537           // position cursor behind the CRLF WSP (unfolding)
00538           // and add the WSP to the result
00539           result += QChar( ch );
00540         } else {
00541           // this is the "shouldn't happen"-case. There is a CRLF
00542           // inside a quoted-string without it being part of FWS.
00543           // We take it verbatim.
00544           KMIME_WARN_NON_FOLDING( CRLF );
00545           result += "\r\n";
00546           // the cursor is decremented again, so's we need not
00547           // duplicate the whole switch here. "ch" could've been
00548           // everything (incl. openChar or closeChar).
00549           scursor--;
00550         }
00551       }
00552       break;
00553     case '\n':
00554       // Note: CRLF has been handled above already!
00555       // ### LF needs special treatment, depending on whether isCRLF
00556       // is true (we can be sure a lonely '\n' was meant this way) or
00557       // false ('\n' alone could have meant LF or CRLF in the original
00558       // message. This parser assumes CRLF iff the LF is followed by
00559       // either WSP (folding) or NULL (premature end of quoted-string;
00560       // Should be fixed, since NULL is allowed as per rfc822).
00561       READ_ch_OR_FAIL;
00562       if ( !isCRLF && ( ch == ' ' || ch == '\t' ) ) {
00563         // folding
00564         // correct folding
00565         result += QChar( ch );
00566       } else {
00567         // non-folding
00568         KMIME_WARN_LONE( LF );
00569         result += QChar('\n');
00570         // pos is decremented, so's we need not duplicate the whole
00571         // switch here. ch could've been everything (incl. <">, "\").
00572         scursor--;
00573       }
00574       break;
00575     default:
00576       KMIME_WARN_IF_8BIT( ch );
00577       result += QChar( ch );
00578     }
00579   }
00580 
00581   return false;
00582 }
00583 
00584 // known issues:
00585 //
00586 // - doesn't handle encoded-word inside comments.
00587 
00588 bool parseComment( const char* &scursor, const char * const send,
00589                    QString &result, bool isCRLF, bool reallySave )
00590 {
00591   int commentNestingDepth = 1;
00592   const char *afterLastClosingParenPos = 0;
00593   QString maybeCmnt;
00594   const char *oldscursor = scursor;
00595 
00596   assert( *(scursor-1) == '(' );
00597 
00598   while ( commentNestingDepth ) {
00599     QString cmntPart;
00600     if ( parseGenericQuotedString( scursor, send, cmntPart, isCRLF, '(', ')' ) ) {
00601       assert( *(scursor-1) == ')' || *(scursor-1) == '(' );
00602       // see the kdoc for above function for the possible conditions
00603       // we have to check:
00604       switch ( *(scursor-1) ) {
00605       case ')':
00606         if ( reallySave ) {
00607           // add the chunk that's now surely inside the comment.
00608           result += maybeCmnt;
00609           result += cmntPart;
00610           if ( commentNestingDepth > 1 ) {
00611             // don't add the outermost ')'...
00612             result += QChar(')');
00613           }
00614           maybeCmnt.clear();
00615         }
00616         afterLastClosingParenPos = scursor;
00617         --commentNestingDepth;
00618         break;
00619       case '(':
00620         if ( reallySave ) {
00621           // don't add to "result" yet, because we might find that we
00622           // are already outside the (broken) comment...
00623           maybeCmnt += cmntPart;
00624           maybeCmnt += QChar('(');
00625         }
00626         ++commentNestingDepth;
00627         break;
00628       default: assert( 0 );
00629       } // switch
00630     } else {
00631       // !parseGenericQuotedString, ie. premature end
00632       if ( afterLastClosingParenPos ) {
00633         scursor = afterLastClosingParenPos;
00634       } else {
00635         scursor = oldscursor;
00636       }
00637       return false;
00638     }
00639   } // while
00640 
00641   return true;
00642 }
00643 
00644 // known issues: none.
00645 
00646 bool parsePhrase( const char* &scursor, const char * const send,
00647                   QString &result, bool isCRLF )
00648 {
00649   enum {
00650     None, Phrase, Atom, EncodedWord, QuotedString
00651   } found = None;
00652 
00653   QString tmp;
00654   QByteArray lang, charset;
00655   const char *successfullyParsed = 0;
00656   // only used by the encoded-word branch
00657   const char *oldscursor;
00658   // used to suppress whitespace between adjacent encoded-words
00659   // (rfc2047, 6.2):
00660   bool lastWasEncodedWord = false;
00661 
00662   while ( scursor != send ) {
00663     char ch = *scursor++;
00664     switch ( ch ) {
00665     case '.': // broken, but allow for intorop's sake
00666       if ( found == None ) {
00667         --scursor;
00668         return false;
00669       } else {
00670         if ( scursor != send && ( *scursor == ' ' || *scursor == '\t' ) ) {
00671           result += ". ";
00672         } else {
00673           result += '.';
00674         }
00675         successfullyParsed = scursor;
00676       }
00677       break;
00678     case '"': // quoted-string
00679       tmp.clear();
00680       if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) ) {
00681         successfullyParsed = scursor;
00682         assert( *(scursor-1) == '"' );
00683         switch ( found ) {
00684         case None:
00685           found = QuotedString;
00686           break;
00687         case Phrase:
00688         case Atom:
00689         case EncodedWord:
00690         case QuotedString:
00691           found = Phrase;
00692           result += QChar(' '); // rfc822, 3.4.4
00693           break;
00694         default:
00695           assert( 0 );
00696         }
00697         lastWasEncodedWord = false;
00698         result += tmp;
00699       } else {
00700         // premature end of quoted string.
00701         // What to do? Return leading '"' as special? Return as quoted-string?
00702         // We do the latter if we already found something, else signal failure.
00703         if ( found == None ) {
00704           return false;
00705         } else {
00706           result += QChar(' '); // rfc822, 3.4.4
00707           result += tmp;
00708           return true;
00709         }
00710       }
00711       break;
00712     case '(': // comment
00713       // parse it, but ignore content:
00714       tmp.clear();
00715       if ( parseComment( scursor, send, tmp, isCRLF,
00716                          false /*don't bother with the content*/ ) ) {
00717         successfullyParsed = scursor;
00718         lastWasEncodedWord = false; // strictly interpreting rfc2047, 6.2
00719       } else {
00720         if ( found == None ) {
00721           return false;
00722         } else {
00723           scursor = successfullyParsed;
00724           return true;
00725         }
00726       }
00727       break;
00728     case '=': // encoded-word
00729       tmp.clear();
00730       oldscursor = scursor;
00731       lang.clear();
00732       charset.clear();
00733       if ( parseEncodedWord( scursor, send, tmp, lang, charset ) ) {
00734         successfullyParsed = scursor;
00735         switch ( found ) {
00736         case None:
00737           found = EncodedWord;
00738           break;
00739         case Phrase:
00740         case EncodedWord:
00741         case Atom:
00742         case QuotedString:
00743           if ( !lastWasEncodedWord ) {
00744             result += QChar(' '); // rfc822, 3.4.4
00745           }
00746           found = Phrase;
00747           break;
00748         default: assert( 0 );
00749         }
00750         lastWasEncodedWord = true;
00751         result += tmp;
00752         break;
00753       } else {
00754         // parse as atom:
00755         scursor = oldscursor;
00756       }
00757       // fall though...
00758 
00759     default: //atom
00760       tmp.clear();
00761       scursor--;
00762       if ( parseAtom( scursor, send, tmp, true /* allow 8bit */ ) ) {
00763         successfullyParsed = scursor;
00764         switch ( found ) {
00765         case None:
00766           found = Atom;
00767           break;
00768         case Phrase:
00769         case Atom:
00770         case EncodedWord:
00771         case QuotedString:
00772           found = Phrase;
00773           result += QChar(' '); // rfc822, 3.4.4
00774           break;
00775         default:
00776           assert( 0 );
00777         }
00778         lastWasEncodedWord = false;
00779         result += tmp;
00780       } else {
00781         if ( found == None ) {
00782           return false;
00783         } else {
00784           scursor = successfullyParsed;
00785           return true;
00786         }
00787       }
00788     }
00789     eatWhiteSpace( scursor, send );
00790   }
00791 
00792   return found != None;
00793 }
00794 
00795 bool parseDotAtom( const char* &scursor, const char * const send,
00796                    QString &result, bool isCRLF )
00797 {
00798   eatCFWS( scursor, send, isCRLF );
00799 
00800   // always points to just after the last atom parsed:
00801   const char *successfullyParsed;
00802 
00803   QString tmp;
00804   if ( !parseAtom( scursor, send, tmp, false /* no 8bit */ ) ) {
00805     return false;
00806   }
00807   result += tmp;
00808   successfullyParsed = scursor;
00809 
00810   while ( scursor != send ) {
00811 
00812     // end of header or no '.' -> return
00813     if ( scursor == send || *scursor != '.' ) {
00814       return true;
00815     }
00816     scursor++; // eat '.'
00817 
00818     if ( scursor == send || !isAText( *scursor ) ) {
00819       // end of header or no AText, but this time following a '.'!:
00820       // reset cursor to just after last successfully parsed char and
00821       // return:
00822       scursor = successfullyParsed;
00823       return true;
00824     }
00825 
00826     // try to parse the next atom:
00827     QString maybeAtom;
00828     if ( !parseAtom( scursor, send, maybeAtom, false /*no 8bit*/ ) ) {
00829       scursor = successfullyParsed;
00830       return true;
00831     }
00832 
00833     result += QChar('.');
00834     result += maybeAtom;
00835     successfullyParsed = scursor;
00836   }
00837 
00838   scursor = successfullyParsed;
00839   return true;
00840 }
00841 
00842 void eatCFWS( const char* &scursor, const char * const send, bool isCRLF )
00843 {
00844   QString dummy;
00845 
00846   while ( scursor != send ) {
00847     const char *oldscursor = scursor;
00848 
00849     char ch = *scursor++;
00850 
00851     switch( ch ) {
00852     case ' ':
00853     case '\t': // whitespace
00854     case '\r':
00855     case '\n': // folding
00856       continue;
00857 
00858     case '(': // comment
00859       if ( parseComment( scursor, send, dummy, isCRLF, false /*don't save*/ ) ) {
00860         continue;
00861       }
00862       scursor = oldscursor;
00863       return;
00864 
00865     default:
00866       scursor = oldscursor;
00867       return;
00868     }
00869   }
00870 }
00871 
00872 bool parseDomain( const char* &scursor, const char * const send,
00873                   QString &result, bool isCRLF )
00874 {
00875   eatCFWS( scursor, send, isCRLF );
00876   if ( scursor == send ) {
00877     return false;
00878   }
00879 
00880   // domain := dot-atom / domain-literal / atom *("." atom)
00881   //
00882   // equivalent to:
00883   // domain = dot-atom / domain-literal,
00884   // since parseDotAtom does allow CFWS between atoms and dots
00885 
00886   if ( *scursor == '[' ) {
00887     // domain-literal:
00888     QString maybeDomainLiteral;
00889     // eat '[':
00890     scursor++;
00891     while ( parseGenericQuotedString( scursor, send, maybeDomainLiteral,
00892                                       isCRLF, '[', ']' ) ) {
00893       if ( scursor == send ) {
00894         // end of header: check for closing ']':
00895         if ( *(scursor-1) == ']' ) {
00896           // OK, last char was ']':
00897           result = maybeDomainLiteral;
00898           return true;
00899         } else {
00900           // not OK, domain-literal wasn't closed:
00901           return false;
00902         }
00903       }
00904       // we hit openChar in parseGenericQuotedString.
00905       // include it in maybeDomainLiteral and keep on parsing:
00906       if ( *(scursor-1) == '[' ) {
00907         maybeDomainLiteral += QChar('[');
00908         continue;
00909       }
00910       // OK, real end of domain-literal:
00911       result = maybeDomainLiteral;
00912       return true;
00913     }
00914   } else {
00915     // dot-atom:
00916     QString maybeDotAtom;
00917     if ( parseDotAtom( scursor, send, maybeDotAtom, isCRLF ) ) {
00918       result = maybeDotAtom;
00919       // Domain may end with '.', if so preserve it'
00920       if ( scursor != send && *scursor == '.' ) {
00921         result += QChar('.');
00922         scursor++;
00923       }
00924       return true;
00925     }
00926   }
00927   return false;
00928 }
00929 
00930 bool parseObsRoute( const char* &scursor, const char* const send,
00931                     QStringList &result, bool isCRLF, bool save )
00932 {
00933   while ( scursor != send ) {
00934     eatCFWS( scursor, send, isCRLF );
00935     if ( scursor == send ) {
00936       return false;
00937     }
00938 
00939     // empty entry:
00940     if ( *scursor == ',' ) {
00941       scursor++;
00942       if ( save ) {
00943         result.append( QString() );
00944       }
00945       continue;
00946     }
00947 
00948     // empty entry ending the list:
00949     if ( *scursor == ':' ) {
00950       scursor++;
00951       if ( save ) {
00952         result.append( QString() );
00953       }
00954       return true;
00955     }
00956 
00957     // each non-empty entry must begin with '@':
00958     if ( *scursor != '@' ) {
00959       return false;
00960     } else {
00961       scursor++;
00962     }
00963 
00964     QString maybeDomain;
00965     if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) ) {
00966       return false;
00967     }
00968     if ( save ) {
00969       result.append( maybeDomain );
00970     }
00971 
00972     // eat the following (optional) comma:
00973     eatCFWS( scursor, send, isCRLF );
00974     if ( scursor == send ) {
00975       return false;
00976     }
00977     if ( *scursor == ':' ) {
00978       scursor++;
00979       return true;
00980     }
00981     if ( *scursor == ',' ) {
00982       scursor++;
00983     }
00984   }
00985 
00986   return false;
00987 }
00988 
00989 bool parseAddrSpec( const char* &scursor, const char * const send,
00990                     AddrSpec &result, bool isCRLF )
00991 {
00992   //
00993   // STEP 1:
00994   // local-part := dot-atom / quoted-string / word *("." word)
00995   //
00996   // this is equivalent to:
00997   // local-part := word *("." word)
00998 
00999   QString maybeLocalPart;
01000   QString tmp;
01001 
01002   while ( scursor != send ) {
01003     // first, eat any whitespace
01004     eatCFWS( scursor, send, isCRLF );
01005 
01006     char ch = *scursor++;
01007     switch ( ch ) {
01008     case '.': // dot
01009       maybeLocalPart += QChar('.');
01010       break;
01011 
01012     case '@':
01013       goto SAW_AT_SIGN;
01014       break;
01015 
01016     case '"': // quoted-string
01017       tmp.clear();
01018       if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) ) {
01019         maybeLocalPart += tmp;
01020       } else {
01021         return false;
01022       }
01023       break;
01024 
01025     default: // atom
01026       scursor--; // re-set scursor to point to ch again
01027       tmp.clear();
01028       if ( parseAtom( scursor, send, tmp, false /* no 8bit */ ) ) {
01029         maybeLocalPart += tmp;
01030       } else {
01031         return false; // parseAtom can only fail if the first char is non-atext.
01032       }
01033       break;
01034     }
01035   }
01036 
01037   return false;
01038 
01039   //
01040   // STEP 2:
01041   // domain
01042   //
01043 
01044 SAW_AT_SIGN:
01045 
01046   assert( *(scursor-1) == '@' );
01047 
01048   QString maybeDomain;
01049   if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) ) {
01050     return false;
01051   }
01052 
01053   result.localPart = maybeLocalPart;
01054   result.domain = maybeDomain;
01055 
01056   return true;
01057 }
01058 
01059 bool parseAngleAddr( const char* &scursor, const char * const send,
01060                      AddrSpec &result, bool isCRLF )
01061 {
01062   // first, we need an opening angle bracket:
01063   eatCFWS( scursor, send, isCRLF );
01064   if ( scursor == send || *scursor != '<' ) {
01065     return false;
01066   }
01067   scursor++; // eat '<'
01068 
01069   eatCFWS( scursor, send, isCRLF );
01070   if ( scursor == send ) {
01071     return false;
01072   }
01073 
01074   if ( *scursor == '@' || *scursor == ',' ) {
01075     // obs-route: parse, but ignore:
01076     KMIME_WARN << "obsolete source route found! ignoring.";
01077     QStringList dummy;
01078     if ( !parseObsRoute( scursor, send, dummy,
01079                          isCRLF, false /* don't save */ ) ) {
01080       return false;
01081     }
01082     // angle-addr isn't complete until after the '>':
01083     if ( scursor == send ) {
01084       return false;
01085     }
01086   }
01087 
01088   // parse addr-spec:
01089   AddrSpec maybeAddrSpec;
01090   if ( !parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) {
01091     return false;
01092   }
01093 
01094   eatCFWS( scursor, send, isCRLF );
01095   if ( scursor == send || *scursor != '>' ) {
01096     return false;
01097   }
01098   scursor++;
01099 
01100   result = maybeAddrSpec;
01101   return true;
01102 
01103 }
01104 
01105 bool parseMailbox( const char* &scursor, const char * const send,
01106                    Mailbox &result, bool isCRLF )
01107 {
01108   eatCFWS( scursor, send, isCRLF );
01109   if ( scursor == send ) {
01110     return false;
01111   }
01112 
01113   AddrSpec maybeAddrSpec;
01114   QString maybeDisplayName;
01115 
01116   // first, try if it's a vanilla addr-spec:
01117   const char * oldscursor = scursor;
01118   if ( parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) {
01119     result.setAddress( maybeAddrSpec );
01120     // check for the obsolete form of display-name (as comment):
01121     eatWhiteSpace( scursor, send );
01122     if ( scursor != send && *scursor == '(' ) {
01123       scursor++;
01124       if ( !parseComment( scursor, send, maybeDisplayName, isCRLF, true /*keep*/ ) ) {
01125         return false;
01126       }
01127     }
01128     result.setNameFrom7Bit( maybeDisplayName.toLatin1() );
01129     return true;
01130   }
01131   scursor = oldscursor;
01132 
01133   // second, see if there's a display-name:
01134   if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) ) {
01135     // failed: reset cursor, note absent display-name
01136     maybeDisplayName.clear();
01137     scursor = oldscursor;
01138   } else {
01139     // succeeded: eat CFWS
01140     eatCFWS( scursor, send, isCRLF );
01141     if ( scursor == send ) {
01142       return false;
01143     }
01144   }
01145 
01146   // third, parse the angle-addr:
01147   if ( !parseAngleAddr( scursor, send, maybeAddrSpec, isCRLF ) ) {
01148     return false;
01149   }
01150 
01151   if ( maybeDisplayName.isNull() ) {
01152     // check for the obsolete form of display-name (as comment):
01153     eatWhiteSpace( scursor, send );
01154     if ( scursor != send && *scursor == '(' ) {
01155       scursor++;
01156       if ( !parseComment( scursor, send, maybeDisplayName, isCRLF, true /*keep*/ ) ) {
01157         return false;
01158       }
01159     }
01160   }
01161 
01162   result.setName( maybeDisplayName );
01163   result.setAddress( maybeAddrSpec );
01164   return true;
01165 }
01166 
01167 bool parseGroup( const char* &scursor, const char * const send,
01168                  Address &result, bool isCRLF )
01169 {
01170   // group         := display-name ":" [ mailbox-list / CFWS ] ";" [CFWS]
01171   //
01172   // equivalent to:
01173   // group   := display-name ":" [ obs-mbox-list ] ";"
01174 
01175   eatCFWS( scursor, send, isCRLF );
01176   if ( scursor == send ) {
01177     return false;
01178   }
01179 
01180   // get display-name:
01181   QString maybeDisplayName;
01182   if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) ) {
01183     return false;
01184   }
01185 
01186   // get ":":
01187   eatCFWS( scursor, send, isCRLF );
01188   if ( scursor == send || *scursor != ':' ) {
01189     return false;
01190   }
01191 
01192   // KDE5 TODO: Don't expose displayName as public, but rather add setter for it that
01193   //            automatically calls removeBidiControlChars
01194   result.displayName = removeBidiControlChars( maybeDisplayName );
01195 
01196   // get obs-mbox-list (may contain empty entries):
01197   scursor++;
01198   while ( scursor != send ) {
01199     eatCFWS( scursor, send, isCRLF );
01200     if ( scursor == send ) {
01201       return false;
01202     }
01203 
01204     // empty entry:
01205     if ( *scursor == ',' ) {
01206       scursor++;
01207       continue;
01208     }
01209 
01210     // empty entry ending the list:
01211     if ( *scursor == ';' ) {
01212       scursor++;
01213       return true;
01214     }
01215 
01216     Mailbox maybeMailbox;
01217     if ( !parseMailbox( scursor, send, maybeMailbox, isCRLF ) ) {
01218       return false;
01219     }
01220     result.mailboxList.append( maybeMailbox );
01221 
01222     eatCFWS( scursor, send, isCRLF );
01223     // premature end:
01224     if ( scursor == send ) {
01225       return false;
01226     }
01227     // regular end of the list:
01228     if ( *scursor == ';' ) {
01229       scursor++;
01230       return true;
01231     }
01232     // eat regular list entry separator:
01233     if ( *scursor == ',' ) {
01234       scursor++;
01235     }
01236   }
01237   return false;
01238 }
01239 
01240 bool parseAddress( const char* &scursor, const char * const send,
01241                    Address &result, bool isCRLF )
01242 {
01243   // address       := mailbox / group
01244 
01245   eatCFWS( scursor, send, isCRLF );
01246   if ( scursor == send ) {
01247     return false;
01248   }
01249 
01250   // first try if it's a single mailbox:
01251   Mailbox maybeMailbox;
01252   const char * oldscursor = scursor;
01253   if ( parseMailbox( scursor, send, maybeMailbox, isCRLF ) ) {
01254     // yes, it is:
01255     result.displayName.clear();
01256     result.mailboxList.append( maybeMailbox );
01257     return true;
01258   }
01259   scursor = oldscursor;
01260 
01261   Address maybeAddress;
01262 
01263   // no, it's not a single mailbox. Try if it's a group:
01264   if ( !parseGroup( scursor, send, maybeAddress, isCRLF ) ) {
01265     return false;
01266   }
01267 
01268   result = maybeAddress;
01269   return true;
01270 }
01271 
01272 bool parseAddressList( const char* &scursor, const char * const send,
01273                        AddressList &result, bool isCRLF )
01274 {
01275   while ( scursor != send ) {
01276     eatCFWS( scursor, send, isCRLF );
01277     // end of header: this is OK.
01278     if ( scursor == send ) {
01279       return true;
01280     }
01281     // empty entry: ignore:
01282     if ( *scursor == ',' ) {
01283       scursor++;
01284       continue;
01285     }
01286     // broken clients might use ';' as list delimiter, accept that as well
01287     if ( *scursor == ';' ) {
01288       scursor++;
01289       continue;
01290     }
01291 
01292     // parse one entry
01293     Address maybeAddress;
01294     if ( !parseAddress( scursor, send, maybeAddress, isCRLF ) ) {
01295       return false;
01296     }
01297     result.append( maybeAddress );
01298 
01299     eatCFWS( scursor, send, isCRLF );
01300     // end of header: this is OK.
01301     if ( scursor == send ) {
01302       return true;
01303     }
01304     // comma separating entries: eat it.
01305     if ( *scursor == ',' ) {
01306       scursor++;
01307     }
01308   }
01309   return true;
01310 }
01311 
01312 static QString asterisk = QString::fromLatin1( "*0*", 1 );
01313 static QString asteriskZero = QString::fromLatin1( "*0*", 2 );
01314 //static QString asteriskZeroAsterisk = QString::fromLatin1( "*0*", 3 );
01315 
01316 bool parseParameter( const char* &scursor, const char * const send,
01317                      QPair<QString,QStringOrQPair> &result, bool isCRLF )
01318 {
01319   // parameter = regular-parameter / extended-parameter
01320   // regular-parameter = regular-parameter-name "=" value
01321   // extended-parameter =
01322   // value = token / quoted-string
01323   //
01324   // note that rfc2231 handling is out of the scope of this function.
01325   // Therefore we return the attribute as QString and the value as
01326   // (start,length) tupel if we see that the value is encoded
01327   // (trailing asterisk), for parseParameterList to decode...
01328 
01329   eatCFWS( scursor, send, isCRLF );
01330   if ( scursor == send ) {
01331     return false;
01332   }
01333 
01334   //
01335   // parse the parameter name:
01336   //
01337   QString maybeAttribute;
01338   if ( !parseToken( scursor, send, maybeAttribute, false /* no 8bit */ ) ) {
01339     return false;
01340   }
01341 
01342   eatCFWS( scursor, send, isCRLF );
01343   // premature end: not OK (haven't seen '=' yet).
01344   if ( scursor == send || *scursor != '=' ) {
01345     return false;
01346   }
01347   scursor++; // eat '='
01348 
01349   eatCFWS( scursor, send, isCRLF );
01350   if ( scursor == send ) {
01351     // don't choke on attribute=, meaning the value was omitted:
01352     if ( maybeAttribute.endsWith( asterisk ) ) {
01353       KMIME_WARN << "attribute ends with \"*\", but value is empty!"
01354         "Chopping away \"*\".";
01355       maybeAttribute.truncate( maybeAttribute.length() - 1 );
01356     }
01357     result = qMakePair( maybeAttribute.toLower(), QStringOrQPair() );
01358     return true;
01359   }
01360 
01361   const char * oldscursor = scursor;
01362 
01363   //
01364   // parse the parameter value:
01365   //
01366   QStringOrQPair maybeValue;
01367   if ( *scursor == '"' ) {
01368     // value is a quoted-string:
01369     scursor++;
01370     if ( maybeAttribute.endsWith( asterisk ) ) {
01371       // attributes ending with "*" designate extended-parameters,
01372       // which cannot have quoted-strings as values. So we remove the
01373       // trailing "*" to not confuse upper layers.
01374       KMIME_WARN << "attribute ends with \"*\", but value is a quoted-string!"
01375         "Chopping away \"*\".";
01376       maybeAttribute.truncate( maybeAttribute.length() - 1 );
01377     }
01378 
01379     if ( !parseGenericQuotedString( scursor, send, maybeValue.qstring, isCRLF ) ) {
01380       scursor = oldscursor;
01381       result = qMakePair( maybeAttribute.toLower(), QStringOrQPair() );
01382       return false; // this case needs further processing by upper layers!!
01383     }
01384   } else {
01385     // value is a token:
01386     if ( !parseToken( scursor, send, maybeValue.qpair, false /* no 8bit */ ) ) {
01387       scursor = oldscursor;
01388       result = qMakePair( maybeAttribute.toLower(), QStringOrQPair() );
01389       return false; // this case needs further processing by upper layers!!
01390     }
01391   }
01392 
01393   result = qMakePair( maybeAttribute.toLower(), maybeValue );
01394   return true;
01395 }
01396 
01397 bool parseRawParameterList( const char* &scursor, const char * const send,
01398                             QMap<QString,QStringOrQPair> &result,
01399                             bool isCRLF )
01400 {
01401   // we use parseParameter() consecutively to obtain a map of raw
01402   // attributes to raw values. "Raw" here means that we don't do
01403   // rfc2231 decoding and concatenation. This is left to
01404   // parseParameterList(), which will call this function.
01405   //
01406   // The main reason for making this chunk of code a separate
01407   // (private) method is that we can deal with broken parameters
01408   // _here_ and leave the rfc2231 handling solely to
01409   // parseParameterList(), which will still be enough work.
01410 
01411   while ( scursor != send ) {
01412     eatCFWS( scursor, send, isCRLF );
01413     // empty entry ending the list: OK.
01414     if ( scursor == send ) {
01415       return true;
01416     }
01417     // empty list entry: ignore.
01418     if ( *scursor == ';' ) {
01419       scursor++;
01420       continue;
01421     }
01422 
01423     QPair<QString,QStringOrQPair> maybeParameter;
01424     if ( !parseParameter( scursor, send, maybeParameter, isCRLF ) ) {
01425       // we need to do a bit of work if the attribute is not
01426       // NULL. These are the cases marked with "needs further
01427       // processing" in parseParameter(). Specifically, parsing of the
01428       // token or the quoted-string, which should represent the value,
01429       // failed. We take the easy way out and simply search for the
01430       // next ';' to start parsing again. (Another option would be to
01431       // take the text between '=' and ';' as value)
01432       if ( maybeParameter.first.isNull() ) {
01433         return false;
01434       }
01435       while ( scursor != send ) {
01436         if ( *scursor++ == ';' ) {
01437           goto IS_SEMICOLON;
01438         }
01439       }
01440       // scursor == send case: end of list.
01441       return true;
01442     IS_SEMICOLON:
01443       // *scursor == ';' case: parse next entry.
01444       continue;
01445     }
01446     // successful parsing brings us here:
01447     result.insert( maybeParameter.first, maybeParameter.second );
01448 
01449     eatCFWS( scursor, send, isCRLF );
01450     // end of header: ends list.
01451     if ( scursor == send ) {
01452       return true;
01453     }
01454     // regular separator: eat it.
01455     if ( *scursor == ';' ) {
01456       scursor++;
01457     }
01458   }
01459   return true;
01460 }
01461 
01462 static void decodeRFC2231Value( Codec* &rfc2231Codec,
01463                                 QTextCodec* &textcodec,
01464                                 bool isContinuation, QString &value,
01465                                 QPair<const char*,int> &source )
01466 {
01467   //
01468   // parse the raw value into (charset,language,text):
01469   //
01470 
01471   const char * decBegin = source.first;
01472   const char * decCursor = decBegin;
01473   const char * decEnd = decCursor + source.second;
01474 
01475   if ( !isContinuation ) {
01476     // find the first single quote
01477     while ( decCursor != decEnd ) {
01478       if ( *decCursor == '\'' ) {
01479         break;
01480       } else {
01481         decCursor++;
01482       }
01483     }
01484 
01485     if ( decCursor == decEnd ) {
01486       // there wasn't a single single quote at all!
01487       // take the whole value to be in latin-1:
01488       KMIME_WARN << "No charset in extended-initial-value."
01489         "Assuming \"iso-8859-1\".";
01490       value += QString::fromLatin1( decBegin, source.second );
01491       return;
01492     }
01493 
01494     QByteArray charset( decBegin, decCursor - decBegin );
01495 
01496     const char * oldDecCursor = ++decCursor;
01497     // find the second single quote (we ignore the language tag):
01498     while ( decCursor != decEnd ) {
01499       if ( *decCursor == '\'' ) {
01500         break;
01501       } else {
01502         decCursor++;
01503       }
01504     }
01505     if ( decCursor == decEnd ) {
01506       KMIME_WARN << "No language in extended-initial-value."
01507         "Trying to recover.";
01508       decCursor = oldDecCursor;
01509     } else {
01510       decCursor++;
01511     }
01512 
01513     // decCursor now points to the start of the
01514     // "extended-other-values":
01515 
01516     //
01517     // get the decoders:
01518     //
01519 
01520     bool matchOK = false;
01521     textcodec = KGlobal::charsets()->codecForName( charset, matchOK );
01522     if ( !matchOK ) {
01523       textcodec = 0;
01524       KMIME_WARN_UNKNOWN( Charset, charset );
01525     }
01526   }
01527 
01528   if ( !rfc2231Codec ) {
01529     rfc2231Codec = Codec::codecForName("x-kmime-rfc2231");
01530     assert( rfc2231Codec );
01531   }
01532 
01533   if ( !textcodec ) {
01534     value += QString::fromLatin1( decCursor, decEnd - decCursor );
01535     return;
01536   }
01537 
01538   Decoder * dec = rfc2231Codec->makeDecoder();
01539   assert( dec );
01540 
01541   //
01542   // do the decoding:
01543   //
01544 
01545   QByteArray buffer;
01546   buffer.resize( rfc2231Codec->maxDecodedSizeFor( decEnd - decCursor ) );
01547   QByteArray::Iterator bit = buffer.begin();
01548   QByteArray::ConstIterator bend = buffer.end();
01549 
01550   if ( !dec->decode( decCursor, decEnd, bit, bend ) ) {
01551     KMIME_WARN << rfc2231Codec->name()
01552                << "codec lies about its maxDecodedSizeFor()" << endl
01553                << "result may be truncated";
01554   }
01555 
01556   value += textcodec->toUnicode( buffer.begin(), bit - buffer.begin() );
01557 
01558   // kDebug(5320) << "value now: \"" << value << "\"";
01559   // cleanup:
01560   delete dec;
01561 }
01562 
01563 // known issues:
01564 //  - permutes rfc2231 continuations when the total number of parts
01565 //    exceeds 10 (other-sections then becomes *xy, ie. two digits)
01566 
01567 bool parseParameterList( const char* &scursor, const char * const send,
01568                          QMap<QString,QString> &result, bool isCRLF )
01569 {
01570   // parse the list into raw attribute-value pairs:
01571   QMap<QString,QStringOrQPair> rawParameterList;
01572   if (!parseRawParameterList( scursor, send, rawParameterList, isCRLF ) ) {
01573     return false;
01574   }
01575 
01576   if ( rawParameterList.isEmpty() ) {
01577     return true;
01578   }
01579 
01580   // decode rfc 2231 continuations and alternate charset encoding:
01581 
01582   // NOTE: this code assumes that what QMapIterator delivers is sorted
01583   // by the key!
01584 
01585   Codec * rfc2231Codec = 0;
01586   QTextCodec * textcodec = 0;
01587   QString attribute;
01588   QString value;
01589   enum Modes {
01590     NoMode = 0x0, Continued = 0x1, Encoded = 0x2
01591   } mode;
01592 
01593   QMap<QString,QStringOrQPair>::Iterator it, end = rawParameterList.end();
01594 
01595   for ( it = rawParameterList.begin() ; it != end ; ++it ) {
01596     if ( attribute.isNull() || !it.key().startsWith( attribute ) ) {
01597       //
01598       // new attribute:
01599       //
01600 
01601       // store the last attribute/value pair in the result map now:
01602       if ( !attribute.isNull() ) {
01603         result.insert( attribute, value );
01604       }
01605       // and extract the information from the new raw attribute:
01606       value.clear();
01607       attribute = it.key();
01608       mode = NoMode;
01609       // is the value encoded?
01610       if ( attribute.endsWith( asterisk ) ) {
01611         attribute.truncate( attribute.length() - 1 );
01612         mode = (Modes) ((int) mode | Encoded);
01613       }
01614       // is the value continued?
01615       if ( attribute.endsWith( asteriskZero ) ) {
01616         attribute.truncate( attribute.length() - 2 );
01617         mode = (Modes) ((int) mode | Continued);
01618       }
01619       //
01620       // decode if necessary:
01621       //
01622       if ( mode & Encoded ) {
01623         decodeRFC2231Value( rfc2231Codec, textcodec,
01624                             false, /* isn't continuation */
01625                             value, (*it).qpair );
01626       } else {
01627         // not encoded.
01628         if ( (*it).qpair.first ) {
01629           value += QString::fromLatin1( (*it).qpair.first, (*it).qpair.second );
01630         } else {
01631           value += (*it).qstring;
01632         }
01633       }
01634 
01635       //
01636       // shortcut-processing when the value isn't encoded:
01637       //
01638 
01639       if ( !(mode & Continued) ) {
01640         // save result already:
01641         result.insert( attribute, value );
01642         // force begin of a new attribute:
01643         attribute.clear();
01644       }
01645     } else { // it.key().startsWith( attribute )
01646       //
01647       // continuation
01648       //
01649 
01650       // ignore the section and trust QMap to have sorted the keys:
01651       if ( it.key().endsWith( asterisk ) ) {
01652         // encoded
01653         decodeRFC2231Value( rfc2231Codec, textcodec,
01654                             true, /* is continuation */
01655                             value, (*it).qpair );
01656       } else {
01657         // not encoded
01658         if ( (*it).qpair.first ) {
01659           value += QString::fromLatin1( (*it).qpair.first, (*it).qpair.second );
01660         } else {
01661           value += (*it).qstring;
01662         }
01663       }
01664     }
01665   }
01666 
01667   // write last attr/value pair:
01668   if ( !attribute.isNull() ) {
01669     result.insert( attribute, value );
01670   }
01671 
01672   return true;
01673 }
01674 
01675 static const char * const stdDayNames[] = {
01676   "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
01677 };
01678 static const int stdDayNamesLen = sizeof stdDayNames / sizeof *stdDayNames;
01679 
01680 static bool parseDayName( const char* &scursor, const char * const send )
01681 {
01682   // check bounds:
01683   if ( send - scursor < 3 ) {
01684     return false;
01685   }
01686 
01687   for ( int i = 0 ; i < stdDayNamesLen ; ++i ) {
01688     if ( qstrnicmp( scursor, stdDayNames[i], 3 ) == 0 ) {
01689       scursor += 3;
01690       // kDebug(5320) << "found" << stdDayNames[i];
01691       return true;
01692     }
01693   }
01694 
01695   return false;
01696 }
01697 
01698 static const char * const stdMonthNames[] = {
01699   "Jan", "Feb", "Mar", "Apr", "May", "Jun",
01700   "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
01701 };
01702 static const int stdMonthNamesLen =
01703                               sizeof stdMonthNames / sizeof *stdMonthNames;
01704 
01705 static bool parseMonthName( const char* &scursor, const char * const send,
01706                             int &result )
01707 {
01708   // check bounds:
01709   if ( send - scursor < 3 ) {
01710     return false;
01711   }
01712 
01713   for ( result = 0 ; result < stdMonthNamesLen ; ++result ) {
01714     if ( qstrnicmp( scursor, stdMonthNames[result], 3 ) == 0 ) {
01715       scursor += 3;
01716       return true;
01717     }
01718   }
01719 
01720   // not found:
01721   return false;
01722 }
01723 
01724 static const struct {
01725   const char * tzName;
01726   long int secsEastOfGMT;
01727 } timeZones[] = {
01728   // rfc 822 timezones:
01729   { "GMT", 0 },
01730   { "UT", 0 },
01731   { "EDT", -4*3600 },
01732   { "EST", -5*3600 },
01733   { "MST", -5*3600 },
01734   { "CST", -6*3600 },
01735   { "MDT", -6*3600 },
01736   { "MST", -7*3600 },
01737   { "PDT", -7*3600 },
01738   { "PST", -8*3600 },
01739   // common, non-rfc-822 zones:
01740   { "CET", 1*3600 },
01741   { "MET", 1*3600 },
01742   { "UTC", 0 },
01743   { "CEST", 2*3600 },
01744   { "BST", 1*3600 },
01745   // rfc 822 military timezones:
01746   { "Z", 0 },
01747   { "A", -1*3600 },
01748   { "B", -2*3600 },
01749   { "C", -3*3600 },
01750   { "D", -4*3600 },
01751   { "E", -5*3600 },
01752   { "F", -6*3600 },
01753   { "G", -7*3600 },
01754   { "H", -8*3600 },
01755   { "I", -9*3600 },
01756   // J is not used!
01757   { "K", -10*3600 },
01758   { "L", -11*3600 },
01759   { "M", -12*3600 },
01760   { "N", 1*3600 },
01761   { "O", 2*3600 },
01762   { "P", 3*3600 },
01763   { "Q", 4*3600 },
01764   { "R", 5*3600 },
01765   { "S", 6*3600 },
01766   { "T", 7*3600 },
01767   { "U", 8*3600 },
01768   { "V", 9*3600 },
01769   { "W", 10*3600 },
01770   { "X", 11*3600 },
01771   { "Y", 12*3600 },
01772 };
01773 static const int timeZonesLen = sizeof timeZones / sizeof *timeZones;
01774 
01775 static bool parseAlphaNumericTimeZone( const char* &scursor,
01776                                        const char * const send,
01777                                        long int &secsEastOfGMT,
01778                                        bool &timeZoneKnown )
01779 {
01780   QPair<const char*,int> maybeTimeZone( 0, 0 );
01781   if ( !parseToken( scursor, send, maybeTimeZone, false /*no 8bit*/ ) ) {
01782     return false;
01783   }
01784   for ( int i = 0 ; i < timeZonesLen ; ++i ) {
01785     if ( qstrnicmp( timeZones[i].tzName,
01786                     maybeTimeZone.first, maybeTimeZone.second ) == 0 ) {
01787       scursor += maybeTimeZone.second;
01788       secsEastOfGMT = timeZones[i].secsEastOfGMT;
01789       timeZoneKnown = true;
01790       return true;
01791     }
01792   }
01793 
01794   // don't choke just because we don't happen to know the time zone
01795   KMIME_WARN_UNKNOWN( time zone,
01796                       QByteArray( maybeTimeZone.first, maybeTimeZone.second ) );
01797   secsEastOfGMT = 0;
01798   timeZoneKnown = false;
01799   return true;
01800 }
01801 
01802 // parse a number and return the number of digits parsed:
01803 int parseDigits( const char* &scursor, const char * const send, int &result )
01804 {
01805   result = 0;
01806   int digits = 0;
01807   for ( ; scursor != send && isdigit( *scursor ) ; scursor++, digits++ ) {
01808     result *= 10;
01809     result += int( *scursor - '0' );
01810   }
01811   return digits;
01812 }
01813 
01814 static bool parseTimeOfDay( const char* &scursor, const char * const send,
01815                             int &hour, int &min, int &sec, bool isCRLF=false )
01816 {
01817   // time-of-day := 2DIGIT [CFWS] ":" [CFWS] 2DIGIT [ [CFWS] ":" 2DIGIT ]
01818 
01819   //
01820   // 2DIGIT representing "hour":
01821   //
01822   if ( !parseDigits( scursor, send, hour ) ) {
01823     return false;
01824   }
01825 
01826   eatCFWS( scursor, send, isCRLF );
01827   if ( scursor == send || *scursor != ':' ) {
01828     return false;
01829   }
01830   scursor++; // eat ':'
01831 
01832   eatCFWS( scursor, send, isCRLF );
01833   if ( scursor == send ) {
01834     return false;
01835   }
01836 
01837   //
01838   // 2DIGIT representing "minute":
01839   //
01840   if ( !parseDigits( scursor, send, min ) ) {
01841     return false;
01842   }
01843 
01844   eatCFWS( scursor, send, isCRLF );
01845   if ( scursor == send ) {
01846     return true; // seconds are optional
01847   }
01848 
01849   //
01850   // let's see if we have a 2DIGIT representing "second":
01851   //
01852   if ( *scursor == ':' ) {
01853     // yepp, there are seconds:
01854     scursor++; // eat ':'
01855     eatCFWS( scursor, send, isCRLF );
01856     if ( scursor == send ) {
01857       return false;
01858     }
01859 
01860     if ( !parseDigits( scursor, send, sec ) ) {
01861       return false;
01862     }
01863   } else {
01864     sec = 0;
01865   }
01866 
01867   return true;
01868 }
01869 
01870 bool parseTime( const char* &scursor, const char * send,
01871                 int &hour, int &min, int &sec, long int &secsEastOfGMT,
01872                 bool &timeZoneKnown, bool isCRLF )
01873 {
01874   // time := time-of-day CFWS ( zone / obs-zone )
01875   //
01876   // obs-zone    := "UT" / "GMT" /
01877   //                "EST" / "EDT" / ; -0500 / -0400
01878   //                "CST" / "CDT" / ; -0600 / -0500
01879   //                "MST" / "MDT" / ; -0700 / -0600
01880   //                "PST" / "PDT" / ; -0800 / -0700
01881   //                "A"-"I" / "a"-"i" /
01882   //                "K"-"Z" / "k"-"z"
01883 
01884   eatCFWS( scursor, send, isCRLF );
01885   if ( scursor == send ) {
01886     return false;
01887   }
01888 
01889   if ( !parseTimeOfDay( scursor, send, hour, min, sec, isCRLF ) ) {
01890     return false;
01891   }
01892 
01893   eatCFWS( scursor, send, isCRLF );
01894   if ( scursor == send ) {
01895     timeZoneKnown = false;
01896     secsEastOfGMT = 0;
01897     return true; // allow missing timezone
01898   }
01899 
01900   timeZoneKnown = true;
01901   if ( *scursor == '+' || *scursor == '-' ) {
01902     // remember and eat '-'/'+':
01903     const char sign = *scursor++;
01904     // numerical timezone:
01905     int maybeTimeZone;
01906     if ( parseDigits( scursor, send, maybeTimeZone ) != 4 ) {
01907       return false;
01908     }
01909     secsEastOfGMT = 60 * ( maybeTimeZone / 100 * 60 + maybeTimeZone % 100 );
01910     if ( sign == '-' ) {
01911       secsEastOfGMT *= -1;
01912       if ( secsEastOfGMT == 0 ) {
01913         timeZoneKnown = false; // -0000 means indetermined tz
01914       }
01915     }
01916   } else {
01917     // maybe alphanumeric timezone:
01918     if ( !parseAlphaNumericTimeZone( scursor, send, secsEastOfGMT, timeZoneKnown ) ) {
01919       return false;
01920     }
01921   }
01922   return true;
01923 }
01924 
01925 bool parseDateTime( const char* &scursor, const char * const send,
01926                     KDateTime &result, bool isCRLF )
01927 {
01928   // Parsing date-time; strict mode:
01929   //
01930   // date-time   := [ [CFWS] day-name [CFWS] "," ]                      ; wday
01931   // (expanded)     [CFWS] 1*2DIGIT CFWS month-name CFWS 2*DIGIT [CFWS] ; date
01932   //                time
01933   //
01934   // day-name    := "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun"
01935   // month-name  := "Jan" / "Feb" / "Mar" / "Apr" / "May" / "Jun" /
01936   //                "Jul" / "Aug" / "Sep" / "Oct" / "Nov" / "Dec"
01937 
01938   result = KDateTime();
01939   QDateTime maybeDateTime;
01940 
01941   eatCFWS( scursor, send, isCRLF );
01942   if ( scursor == send ) {
01943     return false;
01944   }
01945 
01946   //
01947   // let's see if there's a day-of-week:
01948   //
01949   if ( parseDayName( scursor, send ) ) {
01950     eatCFWS( scursor, send, isCRLF );
01951     if ( scursor == send ) {
01952       return false;
01953     }
01954     // day-name should be followed by ',' but we treat it as optional:
01955     if ( *scursor == ',' ) {
01956       scursor++; // eat ','
01957       eatCFWS( scursor, send, isCRLF );
01958     }
01959   }
01960 
01961   //
01962   // 1*2DIGIT representing "day" (of month):
01963   //
01964   int maybeDay;
01965   if ( !parseDigits( scursor, send, maybeDay ) ) {
01966     return false;
01967   }
01968 
01969   eatCFWS( scursor, send, isCRLF );
01970   if ( scursor == send ) {
01971     return false;
01972   }
01973 
01974   //
01975   // month-name:
01976   //
01977   int maybeMonth = 0;
01978   if ( !parseMonthName( scursor, send, maybeMonth ) ) {
01979     return false;
01980   }
01981   if ( scursor == send ) {
01982     return false;
01983   }
01984   assert( maybeMonth >= 0 ); assert( maybeMonth <= 11 );
01985   ++maybeMonth; // 0-11 -> 1-12
01986 
01987   eatCFWS( scursor, send, isCRLF );
01988   if ( scursor == send ) {
01989     return false;
01990   }
01991 
01992   //
01993   // 2*DIGIT representing "year":
01994   //
01995   int maybeYear;
01996   if ( !parseDigits( scursor, send, maybeYear ) ) {
01997     return false;
01998   }
01999   // RFC 2822 4.3 processing:
02000   if ( maybeYear < 50 ) {
02001     maybeYear += 2000;
02002   } else if ( maybeYear < 1000 ) {
02003     maybeYear += 1900;
02004   }
02005   // else keep as is
02006   if ( maybeYear < 1900 ) {
02007     return false; // rfc2822, 3.3
02008   }
02009 
02010   eatCFWS( scursor, send, isCRLF );
02011   if ( scursor == send ) {
02012     return false;
02013   }
02014 
02015   maybeDateTime.setDate( QDate( maybeYear, maybeMonth, maybeDay ) );
02016 
02017   //
02018   // time
02019   //
02020   int maybeHour, maybeMinute, maybeSecond;
02021   long int secsEastOfGMT;
02022   bool timeZoneKnown = true;
02023 
02024   if ( !parseTime( scursor, send,
02025                    maybeHour, maybeMinute, maybeSecond,
02026                    secsEastOfGMT, timeZoneKnown, isCRLF ) ) {
02027     return false;
02028   }
02029 
02030   maybeDateTime.setTime( QTime( maybeHour, maybeMinute, maybeSecond ) );
02031   if ( !maybeDateTime.isValid() )
02032     return false;
02033 
02034   result = KDateTime( maybeDateTime, KDateTime::Spec( KDateTime::OffsetFromUTC, secsEastOfGMT ) );
02035   if ( !result.isValid() )
02036     return false;
02037   return true;
02038 }
02039 
02040 } // namespace HeaderParsing
02041 
02042 } // namespace KMime

KMIME Library

Skip menu "KMIME Library"
  • Main Page
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

KDE-PIM Libraries

Skip menu "KDE-PIM Libraries"
  • akonadi
  • kabc
  • kblog
  • kcal
  • kholidays
  • kimap
  • kioslave
  •   imap4
  •   mbox
  • kldap
  • kmime
  • kpimidentities
  • kpimtextedit
  •   richtextbuilders
  • kpimutils
  • kresources
  • ktnef
  • kxmlrpcclient
  • mailtransport
  • microblog
  • qgpgme
  • syndication
  •   atom
  •   rdf
  •   rss2
Generated for KDE-PIM Libraries by doxygen 1.6.1
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal