libkmime/kmime_codecs.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367

/*  -*- c++ -*-
    kmime_codecs.h

    This file is part of KMime, the KDE internet mail/usenet news message library.
    Copyright (c) 2001-2002 Marc Mutz <mutz@kde.org>

    KMime is free software; you can redistribute it and/or modify it
    under the terms of the GNU General Public License, version 2, as
    published by the Free Software Foundation.

    KMime is distributed in the hope that it will be useful, but
    WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this library; if not, write to the Free Software
    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

    In addition, as a special exception, the copyright holders give
    permission to link the code of this library with any edition of
    the Qt library by Trolltech AS, Norway (or with modified versions
    of Qt that use the same license as Qt), and distribute linked
    combinations including the two.  You must obey the GNU General
    Public License in all respects for all of the code used other than
    Qt.  If you modify this file, you may extend this exception to
    your version of the file, but you are not obligated to do so.  If
    you do not wish to do so, delete this exception statement from
    your version.
*/

#ifndef __KMIME_CODECS__
#define __KMIME_CODECS__

#include <qasciidict.h>
#if defined(QT_THREAD_SUPPORT)
#  include <qmutex.h>
#endif

#include <qcstring.h> // QByteArray

#include <kdebug.h> // for kdFatal()
#include <kdepimmacros.h>

namespace KMime {

// forward declarations:
class Encoder;
class Decoder;

/** Abstract base class of codecs like base64 and
    quoted-printable. It's a singleton.

    @short Codecs for common mail transfer encodings.
    @author Marc Mutz <mutz@kde.org>
*/
class KDE_EXPORT Codec {
protected:

  static QAsciiDict<Codec>* all;
#if defined(QT_THREAD_SUPPORT)
  static QMutex* dictLock;
#endif

  Codec() {}
private:
  static void fillDictionary();

public:
  static Codec * codecForName( const char * name );
  static Codec * codecForName( const QCString & name );

  virtual int maxEncodedSizeFor( int insize, bool withCRLF=false ) const = 0;
  virtual int maxDecodedSizeFor( int insize, bool withCRLF=false ) const = 0;

  virtual Encoder * makeEncoder( bool withCRLF=false ) const = 0;
  virtual Decoder * makeDecoder( bool withCRLF=false ) const = 0;

  /**
   * Convenience wrapper that can be used for small chunks of data
   * when you can provide a large enough buffer. The default
   * implementation creates an Encoder and uses it.
   *
   * Encodes a chunk of bytes starting at @p scursor and extending to
   * @p send into the buffer described by @p dcursor and @p dend.
   *
   * This function doesn't support chaining of blocks. The returned
   * block cannot be added to, but you don't need to finalize it, too.
   *
   * Example usage (@p in contains the input data):
   * <pre>
   * KMime::Codec * codec = KMime::Codec::codecForName( "base64" );
   * kdFatal( !codec ) << "no base64 codec found!?" << endl;
   * QByteArray out( in.size()*1.4 ); // crude maximal size of b64 encoding
   * QByteArray::Iterator iit = in.begin();
   * QByteArray::Iterator oit = out.begin();
   * if ( !codec->encode( iit, in.end(), oit, out.end() ) ) {
   *   kdDebug() << "output buffer too small" << endl;
   *   return;
   * }
   * kdDebug() << "Size of encoded data: " << oit - out.begin() << endl;
   * </pre>
   *
   * @param scursor/send begin and end of input buffer
   * @param dcursor/dend begin and end of output buffer
   * @param withCRLF If true, make the lineends CRLF, else make them LF only.
   *
   * @return false if the encoded data didn't fit into the output
   * buffer.
   **/
  virtual bool encode( const char* & scursor, const char * const send,
		       char* & dcursor, const char * const dend,
		       bool withCRLF=false ) const;

  /**
   * Convenience wrapper that can be used for small chunks of data
   * when you can provide a large enough buffer. The default
   * implementation creates a Decoder and uses it.
   *
   * Decodes a chunk of bytes starting at @p scursor and extending to
   * @p send into the buffer described by @p dcursor and @p dend.
   *
   * This function doesn't support chaining of blocks. The returned
   * block cannot be added to, but you don't need to finalize it, too.
   *
   * Example usage (@p in contains the input data):
   * <pre>
   * KMime::Codec * codec = KMime::Codec::codecForName( "base64" );
   * kdFatal( !codec ) << "no base64 codec found!?" << endl;
   * QByteArray out( in.size() ); // good guess for any encoding...
   * QByteArray::Iterator iit = in.begin();
   * QByteArray::Iterator oit = out.begin();
   * if ( !codec->decode( iit, in.end(), oit, out.end() ) ) {
   *   kdDebug() << "output buffer too small" << endl;
   *   return;
   * }
   * kdDebug() << "Size of decoded data: " << oit - out.begin() << endl;
   * </pre>
   *
   * @param scursor/send begin and end of input buffer
   * @param dcursor/dend begin and end of output buffer
   * @param withCRLF If true, make the lineends CRLF, else make them LF only.
   *
   * @return false if the decoded data didn't fit into the output
   * buffer.
   **/
  virtual bool decode( const char* & scursor, const char * const send,
		       char* & dcursor, const char * const dend,
		       bool withCRLF=false ) const;

  /**
   * Even more convenient, but also a bit slower and more memory
   * intensive, since it allocates storage for the worst case and then
   * shrinks the result QByteArray to the actual size again.
   *
   * For use with small @p src.
   **/
  virtual QByteArray encode( const QByteArray & src, bool withCRLF=false ) const;

  /**
   * Even more convenient, but also a bit slower and more memory
   * intensive, since it allocates storage for the worst case and then
   * shrinks the result QCString to the actual size again.
   *
   * For use with small @p src.
   *
   * This method only works for codecs whose output is in the 8bit
   * domain (ie. not in the binary domain). Codecs that do not fall
   * into this category will return a null QCString.
   **/
  virtual QCString encodeToQCString( const QByteArray & src, bool withCRLF=false ) const;

  /**
   * Even more convenient, but also a bit slower and more memory
   * intensive, since it allocates storage for the worst case and then
   * shrinks the result QByteArray to the actual size again.
   *
   * For use with small @p src.
   **/
  virtual QByteArray decode( const QByteArray & src, bool withCRLF=false ) const;

  /**
   * @return the name of the encoding. Guaranteed to be lowercase.
   */
  virtual const char * name() const = 0;

  virtual ~Codec() {}

};

/**
 * Stateful decoder class, modelled after QTextDecoder.
 *
 * @section Overview
 *
 * KMime decoders are designed to be able to process encoded data in
 * chunks of arbitrary size and to work with output buffers of also
 * arbitrary size. They maintain any state necessary to go on where
 * the previous call left off.
 *
 * The class consists of only two methods of interest: see decode,
 * which decodes an input block and finalize, which flushes any
 * remaining data to the output stream.
 *
 * Typically, you will create a decoder instance, call decode as
 * often as necessary, then call finalize (most often a single
 * call suffices, but it might be that during that call the output
 * buffer is filled, so you should be prepared to call finalize
 * as often as necessary, ie. until it returns @p true).
 *
 * @section Return Values
 *
 * Both methods return @p true to indicate that they've finished their
 * job. For decode, a return value of @p true means that the
 * current input block has been finished (@p false most often means
 * that the output buffer is full, but that isn't required
 * behavior. The decode call is free to return at arbitrary
 * times during processing).
 *
 * For finalize, a return value of @p true means that all data
 * implicitly or explicitly stored in the decoder instance has been
 * flushed to the output buffer. A @p false return value should be
 * interpreted as "check if the output buffer is full and call me
 * again", just as with decode.
 *
 * @section Usage Pattern
 *
 * Since the decoder maintains state, you can only use it once. After
 * a sequence of input blocks has been processed, you finalize
 * the output and then delete the decoder instance. If you want to
 * process another input block sequence, you create a new instance.
 *
 * Typical usage (@p in contains the (base64-encoded) input data),
 * taking into account all the conventions detailed above:
 *
 * <pre>
 * KMime::Codec * codec = KMime::Codec::codecForName( "base64" );
 * kdFatal( !codec ) << "No codec found for base64!" << endl;
 * KMime::Decoder * dec = codec->makeDecoder();
 * assert( dec ); // should not happen
 * QByteArray out( 256 ); // small buffer is enough ;-)
 * QByteArray::Iterator iit = in.begin();
 * QByteArray::Iterator oit = out.begin();
 * // decode the chunk
 * while ( !dec->decode( iit, in.end(), oit, out.end() ) )
 *   if ( oit == out.end() ) { // output buffer full, process contents
 *     do_something_with( out );
 *     oit = out.begin();
 *   }
 * // repeat while loop for each input block
 * // ...
 * // finish (flush remaining data from decoder):
 * while ( !dec->finish( oit, out.end() ) )
 *   if ( oit == out.end() ) { // output buffer full, process contents
 *     do_something_with( out );
 *     oit = out.begin();
 *   }
 * // now process last chunk:
 * out.resize( oit - out.begin() );
 * do_something_with( out );
 * // _delete_ the decoder, but not the codec:
 * delete dec;
 * </pre>
 *
 * @short Stateful CTE decoder class
 * @author Marc Mutz <mutz@kde.org>
 **/
class Decoder {
protected:
  friend class Codec;
  /**
   * Protected constructor. Use KMime::Codec::makeDecoder to
   * create an instance. The bool parameter determines whether lines
   * end with CRLF (true) or LF (false, default).
   **/
  Decoder( bool withCRLF=false )
    : mWithCRLF( withCRLF ) {}
public:
  virtual ~Decoder() {}

  /** Decode a chunk of data, maintaining state information between
   *  calls. See class decumentation for calling conventions.
   **/
  virtual bool decode( const char* & scursor, const char * const send,
		       char* & dcursor, const char * const dend ) = 0;
  /** Call this method to finalize the output stream. Writes all
   *  remaining data and resets the decoder. See KMime::Codec for
   *  calling conventions.
   **/
  virtual bool finish( char* & dcursor, const char * const dend ) = 0;

protected:
  const bool mWithCRLF;
};

/** Stateful encoder class, modelled after QTextEncoder.
    @short Stateful encoder class
    @author Marc Mutz <mutz@kde.org>
*/
class Encoder {
protected:
  friend class Codec;
  /** Protected constructor. Use KMime::Codec::makeEncoder if you
      want one. The bool parameter determines whether lines end with
      CRLF (true) or LF (false, default). */
  Encoder( bool withCRLF=false )
    : mOutputBufferCursor( 0 ), mWithCRLF( withCRLF ) {}
public:
  virtual ~Encoder() {}

  /** Encode a chunk of data, maintaining state information between
      calls. See KMime::Codec for calling conventions. */
  virtual bool encode( const char* & scursor, const char * const send,
		       char* & dcursor, const char * const dend ) = 0;

  /** Call this method to finalize the output stream. Writes all
      remaining data and resets the encoder. See KMime::Codec for
      calling conventions. */
  virtual bool finish( char* & dcursor, const char * const dend ) = 0;

protected:
  /** Space in the output buffer */
  enum { maxBufferedChars = 8 };

  /** Writes @p ch to the output stream or the output buffer,
      depending on whether or not the output stream has space left.
      @return true if written to the output stream, false if buffered. */
  bool write( char ch, char* & dcursor, const char * const dend ) {
    if ( dcursor != dend ) {
      // if there's space in the output stream, write there:
      *dcursor++ = ch;
      return true;
    } else {
      // else buffer the output:
      kdFatal( mOutputBufferCursor >= maxBufferedChars )
	<< "KMime::Encoder: internal buffer overflow!" << endl;
      mOutputBuffer[ mOutputBufferCursor++ ] = ch;
      return false;
    }
  }

  /** Writes characters from the output buffer to the output stream.
      Implementations of encode and finish should call this
      at the very beginning and for each iteration of the while loop.
      @return true if all chars could be written, false otherwise */
  bool flushOutputBuffer( char* & dcursor, const char * const dend );

  /** Convenience function. Outputs LF or CRLF, based on the state of
      mWithCRLF */
  bool writeCRLF( char* & dcursor, const char * const dend ) {
    if ( mWithCRLF )
      write( '\r', dcursor, dend );
    return write( '\n', dcursor, dend );
  }

private:
  /** An output buffer to simplyfy some codecs. Use with write
      and flushOutputBuffer */
  char mOutputBuffer[ maxBufferedChars ];
protected:
  uchar mOutputBufferCursor;
  const bool mWithCRLF;
};

} // namespace KMime

#endif // __KMIME_CODECS__