// $Revision: 1.3 $ // Copyright (c) 1994-1995 Taligent, Inc. All rights reserved. #ifndef TaligentSamples_TRANSCODINGSNIPPETS #include "TranscodingSnippets.h" #endif // Used for TASCIITranscoder. #ifndef Taligent_TRANSCODING #include #endif // Used for TStandardText. #ifndef Taligent_STANDARDTEXT #include #endif // Used for TASCII. #ifndef Taligent_UNICODEGENERAL #include #endif // ----------------------------------------------------------------------------- TaligentTypeExtensionMacro(TTranscodingSnippets) TTranscodingSnippets::TTranscodingSnippets() : TSnippets() { SNIPPETINFO(ConvertASCIIToUnicode); SNIPPETINFO(ConvertUnicodeToASCII_1); SNIPPETINFO(ConvertUnicodeToASCII_2); SNIPPETINFO(ConvertASCIIToUnicodeWithoutTranscoder); SNIPPETINFO(ConvertUnicodeToASCIIWithoutTranscoder); SNIPPETINFO(UsingGetMaximumBytesPerCharacter); SNIPPETINFO(UsingGetBufferSize); SNIPPETINFO(UsingCanConvert); } TTranscodingSnippets::~TTranscodingSnippets() { } // Use a transcoder to convert ASCII to Unicode. // TStandardText is a concrete subclass of TText used to hold the result. // A TASCIITranscoder is used because we know the source text is ASCII. // Other source text (e.g., Macintosh text, which uses char values of // 128 and above) would be converted using other transcoders. void TTranscodingSnippets::ConvertASCIIToUnicode() { const char* const sourceASCII = "Source ASCII string.\xd\xa"; const unsigned long sourceLength = strlen(sourceASCII); GetDisplay().SetAsciifyNewline(true); GetDisplay() << sourceASCII << endl; //- TStandardText resultUnicode; TASCIITranscoder transcoder; TTextCount numCharsConverted = transcoder.AppendToText(sourceASCII, sourceLength, resultUnicode); //- GetDisplay() << resultUnicode << endl; } // Use a transcoder to convert Unicode to ASCII using CreateStringFromText. // TStandardText holds the Unicode text. This example presumes the compiler // is on a Unicode platform and generates unicode from the given string. // A TASCIITranscoder is used because we want to produce ASCII. Unicode values // that do not correspond to ASCII are represented by the substitution // character TASCII::kSubstitution. //. // The resulting string is allocated with 'new' and may be disposed using // 'delete'. It is null-terminated. void TTranscodingSnippets::ConvertUnicodeToASCII_1() { const TStandardText sourceUnicode = TStandardText("Source Unicode text.") + TStandardText(TGeneralPunctuation::kLineSeparator) + TStandardText(TGeneralPunctuation::kParagraphSeparator); GetDisplay().SetAsciifyNewline(true); GetDisplay() << sourceUnicode << endl; //- unsigned char* resultASCII = NIL; TASCIITranscoder transcoder; TTextCount resultLength = transcoder.CreateStringFromText(resultASCII, sourceUnicode); //- GetDisplay() << resultASCII << endl; delete resultASCII; } // Use a transcoder to convert Unicode to ASCII using ExtractFromText. // This sample is like ConvertUnicodeToASCII_1 except that a buffer is used to // hold the result. This allows the buffer to be reused across multiple calls. // Here the buffer is artificially small for illustration. // // On entry, resultLength indicates the size of the result buffer. On exit, // it indicates the number of characters put into the buffer. The return // value is the number of unicode characters used. // // The resulting character data is not null-terminated. void TTranscodingSnippets::ConvertUnicodeToASCII_2() { const TStandardText sourceUnicode = TStandardText("Source Unicode text.") + TStandardText(TGeneralPunctuation::kLineSeparator) + TStandardText(TGeneralPunctuation::kParagraphSeparator); GetDisplay().SetAsciifyNewline(true); GetDisplay() << sourceUnicode << endl; //- const TTextCount sourceLength = sourceUnicode.GetLength(); const unsigned long kBufferSize = 12; unsigned char resultASCIIBuffer[kBufferSize]; TASCIITranscoder transcoder; TTextCount totalExtracted = 0; while (totalExtracted < sourceLength) { TTextRange sourceRange(TTextOffset(totalExtracted), sourceLength); TTextCount resultLength = kBufferSize; TTextCount numberExtracted = transcoder.ExtractFromText(sourceUnicode, sourceRange, resultASCIIBuffer, resultLength); GetDisplay() << TRepChars(resultASCIIBuffer, resultLength) << endl; totalExtracted += numberExtracted; } //- } // This mimics the behavior of the ASCII transcoder doing a no-round-trip // conversion. void TTranscodingSnippets::ConvertASCIIToUnicodeWithoutTranscoder() { const char* const sourceASCII = "Source ASCII string.\xd\xa"; GetDisplay().SetAsciifyNewline(true); GetDisplay() << sourceASCII << endl; //- const int kUnicodeBufferSize = 128; UniChar resultUnicodeBuffer[kUnicodeBufferSize]; TTextCount resultLength; char aChar; const char* asciiPtr = sourceASCII; UniChar* unicodePtr = resultUnicodeBuffer; UniChar* unicodePtrEnd = unicodePtr + kUnicodeBufferSize; while ((aChar = *asciiPtr++) && (unicodePtr != unicodePtrEnd)) { switch (aChar) { case TASCII::kLineFeed: *unicodePtr++ = TGeneralPunctuation::kParagraphSeparator; break; case TASCII::kCarriageReturn: *unicodePtr++ = TGeneralPunctuation::kParagraphSeparator; break; default: if (aChar <= TASCII::kDelete) { *unicodePtr++ = (UniChar)aChar; } else { *unicodePtr++ = TUnicodeSpecial::kReplacementCharacter; } } } resultLength = unicodePtr - resultUnicodeBuffer; //- GetDisplay() << TRepUniChars(resultUnicodeBuffer, resultLength) << endl; } // This mimics the behavior of the ASCII transcoder doing a no-round-trip // conversion. When doing a round-trip conversion, if a character is not // convertable, conversion stops instead of using the substitute character. void TTranscodingSnippets::ConvertUnicodeToASCIIWithoutTranscoder() { const TStandardText unicodeText = TStandardText("Source Unicode text.") + TStandardText(TGeneralPunctuation::kLineSeparator) + TStandardText(TGeneralPunctuation::kParagraphSeparator); TTextCount sourceLength = unicodeText.GetLength(); UniChar* sourceUnicode = new UniChar[sourceLength]; unicodeText.Extract(TTextRange(0, sourceLength), sourceUnicode, sourceLength); GetDisplay().SetAsciifyNewline(true); GetDisplay() << TRepUniChars(sourceUnicode, sourceLength) << endl; //- const int kASCIIBufferSize = 128; unsigned char resultASCIIBuffer[kASCIIBufferSize]; TTextCount resultLength; UniChar aUniChar; const UniChar* unicodePtr = sourceUnicode; unsigned char* asciiPtr = resultASCIIBuffer; unsigned char* asciiPtrEnd = asciiPtr + kASCIIBufferSize; while ((aUniChar = *unicodePtr++) && (asciiPtr != asciiPtrEnd)) { switch (aUniChar) { case TGeneralPunctuation::kLineSeparator: *asciiPtr++ = TASCII::kLineFeed; break; case TGeneralPunctuation::kParagraphSeparator: *asciiPtr++ = TASCII::kLineFeed; // not kCarriageReturn break; default: if (aUniChar <= TASCII::kDelete) { *asciiPtr++ = (unsigned char)aUniChar; } else { *asciiPtr++ = TASCII::kSubstitute; } } } resultLength = asciiPtr - resultASCIIBuffer; //- GetDisplay() << TRepChars(resultASCIIBuffer, resultLength) << endl; } // Use GetMaximumBytesPerCharacter to calculate the size of the buffer // needed to hold the result. It quickly generates a worst case value // large enough for any source text. // // For illustration, it uses a transcoding scope of partialroundtrip. Note // that the extraction must use the same scope. void TTranscodingSnippets::UsingGetMaximumBytesPerCharacter() { const TStandardText sourceUnicode = TStandardText("Source Unicode text.") + TStandardText(TGeneralPunctuation::kLineSeparator) + TStandardText(TGeneralPunctuation::kParagraphSeparator); GetDisplay().SetAsciifyNewline(true); GetDisplay() << sourceUnicode << endl; //- const TTextCount sourceLength = sourceUnicode.GetLength(); const TTextRange sourceRange(0, sourceLength); const TTranscoder::ETranscodingScope scope = TTranscoder::kPartialRoundTrip; TASCIITranscoder transcoder; const TTextCount maxBytesPerCharacter = transcoder.GetMaximumBytesPerCharacter(scope); const TTextCount bufferSize = sourceLength * maxBytesPerCharacter; unsigned char* resultASCIIBuffer = new unsigned char[bufferSize]; TTextCount resultLength = bufferSize; TTextCount numberExtracted = transcoder.ExtractFromText(sourceUnicode, sourceRange, resultASCIIBuffer, resultLength); //- GetDisplay() << TRepChars(resultASCIIBuffer, resultLength) << endl; delete resultASCIIBuffer; } // Uses GetBufferSize to calculate the exact size of a buffer needed to // hold the result for a particular conversion. This essentially passes // over the source text two times, once to calculate the buffer size, // and once to do the conversion, so it is slower than the example that // uses GetMaximumBytesPerCharacter. void TTranscodingSnippets::UsingGetBufferSize() { const TStandardText sourceUnicode = TStandardText("Source Unicode text.") + TStandardText(TGeneralPunctuation::kLineSeparator) + TStandardText(TGeneralPunctuation::kParagraphSeparator); GetDisplay().SetAsciifyNewline(true); GetDisplay() << sourceUnicode << endl; //- const TTextCount sourceLength = sourceUnicode.GetLength(); const TTextRange sourceRange(0, sourceLength); const TTranscoder::ETranscodingScope scope = TTranscoder::kPartialRoundTrip; TASCIITranscoder transcoder; const TTextCount bufferSize = transcoder.GetBufferSize(sourceUnicode, sourceRange, scope); unsigned char* resultASCIIBuffer = new unsigned char[bufferSize]; TTextCount resultLength = bufferSize; TTextCount numberExtracted = transcoder.ExtractFromText(sourceUnicode, sourceRange, resultASCIIBuffer, resultLength); //- GetDisplay() << TRepChars(resultASCIIBuffer, resultLength) << endl; delete resultASCIIBuffer; } // Use CanConvert to find out how many characters can be converted usnig the // provided transcoding scope. void TTranscodingSnippets::UsingCanConvert() { const char* sourceMacintosh = "Source Macintosh\xaa string."; // 'tm'; TTextCount sourceLength = strlen(sourceMacintosh); GetDisplay() << sourceMacintosh << endl; //- TASCIITranscoder transcoder; TTextCount numberCanConvert = transcoder.CanConvert( sourceMacintosh, sourceLength, TTranscoder::kFullRoundTrip); //- GetDisplay() << numberCanConvert << " of " << sourceLength << " character(s) will convert." << endl; }