examples/PIPS/antiword/src/word2text.c

00001 /*
00002  * word2text.c
00003  * Copyright (C) 1998-2005 A.J. van Os; Released under GNU GPL
00004  *
00005  * Description:
00006  * MS Word to "text" functions
00007  */
00008 
00009 #include <stdio.h>
00010 #include <stdlib.h>
00011 #include <string.h>
00012 #include <ctype.h>
00013 #if defined(__riscos)
00014 #include "DeskLib:Hourglass.h"
00015 #include "drawfile.h"
00016 #endif /* __riscos */
00017 #include "antiword.h"
00018 
00019 
00020 #define INITIAL_SIZE            40
00021 #define EXTENTION_SIZE          20
00022 
00023 
00024 /* Macros to make sure all such statements will be identical */
00025 #define OUTPUT_LINE()           \
00026         do {\
00027                 vAlign2Window(pDiag, pAnchor, lWidthMax, ucAlignment);\
00028                 TRACE_MSG("after vAlign2Window");\
00029                 pAnchor = pStartNewOutput(pAnchor, NULL);\
00030                 pOutput = pAnchor;\
00031         } while(0)
00032 
00033 #define RESET_LINE()            \
00034         do {\
00035                 pAnchor = pStartNewOutput(pAnchor, NULL);\
00036                 pOutput = pAnchor;\
00037         } while(0)
00038 
00039 #if defined(__riscos)
00040 /* Length of the document in characters */
00041 static ULONG    ulDocumentLength;
00042 /* Number of characters processed so far */
00043 static ULONG    ulCharCounter;
00044 static int      iCurrPct, iPrevPct;
00045 #endif /* __riscos */
00046 /* The document is in the format belonging to this version of Word */
00047 static int      iWordVersion = -1;
00048 /* Special treatment for files from Word 4/5/6 on an Apple Macintosh */
00049 static BOOL     bOldMacFile = FALSE;
00050 /* Section Information */
00051 static const section_block_type *pSection = NULL;
00052 static const section_block_type *pSectionNext = NULL;
00053 /* All the (command line) options */
00054 static options_type     tOptions;
00055 /* Needed for reading a complete table row */
00056 static const row_block_type     *pRowInfo = NULL;
00057 static BOOL     bStartRow = FALSE;
00058 static BOOL     bEndRowNorm = FALSE;
00059 static BOOL     bEndRowFast = FALSE;
00060 static BOOL     bIsTableRow = FALSE;
00061 /* Index of the next style and font information */
00062 static USHORT   usIstdNext = ISTD_NORMAL;
00063 /* Needed for finding the start of a style */
00064 static const style_block_type   *pStyleInfo = NULL;
00065 static style_block_type         tStyleNext;
00066 static BOOL     bStartStyle = FALSE;
00067 static BOOL     bStartStyleNext = FALSE;
00068 /* Needed for finding the start of a font */
00069 static const font_block_type    *pFontInfo = NULL;
00070 static font_block_type          tFontNext;
00071 static BOOL     bStartFont = FALSE;
00072 static BOOL     bStartFontNext = FALSE;
00073 /* Needed for finding an image */
00074 static ULONG    ulFileOffsetImage = FC_INVALID;
00075 
00076 
00077 /*
00078  * vUpdateCounters - Update the counters for the hourglass
00079  */
00080 static void
00081 vUpdateCounters(void)
00082 {
00083 #if defined(__riscos)
00084         ulCharCounter++;
00085         iCurrPct = (int)((ulCharCounter * 100) / ulDocumentLength);
00086         if (iCurrPct != iPrevPct) {
00087                 Hourglass_Percentage(iCurrPct);
00088                 iPrevPct = iCurrPct;
00089         }
00090 #endif /* __riscos */
00091 } /* end of vUpdateCounters */
00092 
00093 /*
00094  * bOutputContainsText - see if the output contains more than white space
00095  */
00096 BOOL
00097 bOutputContainsText(const output_type *pAnchor)
00098 {
00099         const output_type       *pCurr;
00100         size_t  tIndex;
00101 
00102         fail(pAnchor == NULL);
00103 
00104         for (pCurr = pAnchor; pCurr != NULL; pCurr = pCurr->pNext) {
00105                 fail(pCurr->lStringWidth < 0);
00106                 for (tIndex = 0; tIndex < pCurr->tNextFree; tIndex++) {
00107                         if (isspace((int)(UCHAR)pCurr->szStorage[tIndex])) {
00108                                 continue;
00109                         }
00110 #if defined(DEBUG)
00111                         if (pCurr->szStorage[tIndex] == FILLER_CHAR) {
00112                                 continue;
00113                         }
00114 #endif /* DEBUG */
00115                         return TRUE;
00116                 }
00117         }
00118         return FALSE;
00119 } /* end of bOutputContainsText */
00120 
00121 /*
00122  * lTotalStringWidth - compute the total width of the output string
00123  */
00124 static long
00125 lTotalStringWidth(const output_type *pAnchor)
00126 {
00127         const output_type       *pCurr;
00128         long            lTotal;
00129 
00130         lTotal = 0;
00131         for (pCurr = pAnchor; pCurr != NULL; pCurr = pCurr->pNext) {
00132                 DBG_DEC_C(pCurr->lStringWidth < 0, pCurr->lStringWidth);
00133                 fail(pCurr->lStringWidth < 0);
00134                 lTotal += pCurr->lStringWidth;
00135         }
00136         return lTotal;
00137 } /* end of lTotalStringWidth */
00138 
00139 /*
00140  * vStoreByte - store one byte
00141  */
00142 static void
00143 vStoreByte(UCHAR ucChar, output_type *pOutput)
00144 {
00145         fail(pOutput == NULL);
00146 
00147         if (ucChar == 0) {
00148                 pOutput->szStorage[pOutput->tNextFree] = '\0';
00149                 return;
00150         }
00151 
00152         while (pOutput->tNextFree + 2 > pOutput->tStorageSize) {
00153                 pOutput->tStorageSize += EXTENTION_SIZE;
00154                 pOutput->szStorage = xrealloc(pOutput->szStorage,
00155                                         pOutput->tStorageSize);
00156         }
00157         pOutput->szStorage[pOutput->tNextFree] = (char)ucChar;
00158         pOutput->szStorage[pOutput->tNextFree + 1] = '\0';
00159         pOutput->tNextFree++;
00160 } /* end of vStoreByte */
00161 
00162 /*
00163  * vStoreChar - store a character as one or more bytes
00164  */
00165 static void
00166 vStoreChar(ULONG ulChar, BOOL bChangeAllowed, output_type *pOutput)
00167 {
00168         char    szResult[4];
00169         size_t  tIndex, tLen;
00170 
00171         fail(pOutput == NULL);
00172 
00173         if (tOptions.eEncoding == encoding_utf_8 && bChangeAllowed) {
00174                 DBG_HEX_C(ulChar > 0xffff, ulChar);
00175                 fail(ulChar > 0xffff);
00176                 tLen = tUcs2Utf8(ulChar, szResult, sizeof(szResult));
00177                 for (tIndex = 0; tIndex < tLen; tIndex++) {
00178                         vStoreByte((UCHAR)szResult[tIndex], pOutput);
00179                 }
00180         } else {
00181                 DBG_HEX_C(ulChar > 0xff, ulChar);
00182                 fail(ulChar > 0xff);
00183                 vStoreByte((UCHAR)ulChar, pOutput);
00184                 tLen = 1;
00185         }
00186         pOutput->lStringWidth += lComputeStringWidth(
00187                                 pOutput->szStorage + pOutput->tNextFree - tLen,
00188                                 tLen,
00189                                 pOutput->tFontRef,
00190                                 pOutput->usFontSize);
00191 } /* end of vStoreChar */
00192 
00193 /*
00194  * vStoreCharacter - store one character
00195  */
00196 static void
00197 vStoreCharacter(ULONG ulChar, output_type *pOutput)
00198 {
00199         vStoreChar(ulChar, TRUE, pOutput);
00200 } /* end of vStoreCharacter */
00201 
00202 /*
00203  * vStoreString - store a string
00204  */
00205 static void
00206 vStoreString(const char *szString, size_t tStringLength, output_type *pOutput)
00207 {
00208         size_t  tIndex;
00209 
00210         fail(szString == NULL || pOutput == NULL);
00211 
00212         for (tIndex = 0; tIndex < tStringLength; tIndex++) {
00213                 vStoreCharacter((ULONG)(UCHAR)szString[tIndex], pOutput);
00214         }
00215 } /* end of vStoreString */
00216 
00217 /*
00218  * vStoreNumberAsDecimal - store a number as a decimal number
00219  */
00220 static void
00221 vStoreNumberAsDecimal(UINT uiNumber, output_type *pOutput)
00222 {
00223         size_t  tLen;
00224         char    szString[3 * sizeof(UINT) + 1];
00225 
00226         fail(uiNumber == 0);
00227         fail(pOutput == NULL);
00228 
00229         tLen = (size_t)sprintf(szString, "%u", uiNumber);
00230         vStoreString(szString, tLen, pOutput);
00231 } /* end of vStoreNumberAsDecimal */
00232 
00233 /*
00234  * vStoreNumberAsRoman - store a number as a roman numerical
00235  */
00236 static void
00237 vStoreNumberAsRoman(UINT uiNumber, output_type *pOutput)
00238 {
00239         size_t  tLen;
00240         char    szString[15];
00241 
00242         fail(uiNumber == 0);
00243         fail(pOutput == NULL);
00244 
00245         tLen = tNumber2Roman(uiNumber, FALSE, szString);
00246         vStoreString(szString, tLen, pOutput);
00247 } /* end of vStoreNumberAsRoman */
00248 
00249 /*
00250  * vStoreStyle - store a style
00251  */
00252 static void
00253 vStoreStyle(diagram_type *pDiag, output_type *pOutput,
00254         const style_block_type *pStyle)
00255 {
00256         size_t  tLen;
00257         char    szString[120];
00258 
00259         fail(pDiag == NULL);
00260         fail(pOutput == NULL);
00261         fail(pStyle == NULL);
00262 
00263         if (tOptions.eConversionType == conversion_xml) {
00264                 vSetHeaders(pDiag, pStyle->usIstd);
00265         } else {
00266                 tLen = tStyle2Window(szString, sizeof(szString),
00267                                         pStyle, pSection);
00268                 vStoreString(szString, tLen, pOutput);
00269         }
00270 } /* end of vStoreStyle */
00271 
00272 /*
00273  * vPutIndentation - output the specified amount of indentation
00274  */
00275 static void
00276 vPutIndentation(diagram_type *pDiag, output_type *pOutput,
00277         BOOL bNoMarks, BOOL bFirstLine,
00278         UINT uiListNumber, UCHAR ucNFC, const char *szListChar,
00279         long lLeftIndentation, long lLeftIndentation1)
00280 {
00281         long    lWidth;
00282         size_t  tIndex, tNextFree;
00283         char    szLine[30];
00284 
00285         fail(pDiag == NULL);
00286         fail(pOutput == NULL);
00287         fail(szListChar == NULL);
00288         fail(lLeftIndentation < 0);
00289 
00290         if (tOptions.eConversionType == conversion_xml) {
00291                 /* XML does its own indentation at rendering time */
00292                 return;
00293         }
00294 
00295         if (bNoMarks) {
00296                 if (bFirstLine) {
00297                         lLeftIndentation += lLeftIndentation1;
00298                 }
00299                 if (lLeftIndentation < 0) {
00300                         lLeftIndentation = 0;
00301                 }
00302                 vSetLeftIndentation(pDiag, lLeftIndentation);
00303                 return;
00304         }
00305         if (lLeftIndentation <= 0) {
00306                 DBG_HEX_C(ucNFC != 0x00, ucNFC);
00307                 vSetLeftIndentation(pDiag, 0);
00308                 return;
00309         }
00310 
00311 #if defined(DEBUG)
00312         if (tOptions.eEncoding == encoding_utf_8) {
00313                 fail(strlen(szListChar) > 3);
00314         } else {
00315                 DBG_HEX_C(iscntrl((int)szListChar[0]), szListChar[0]);
00316                 fail(iscntrl((int)szListChar[0]));
00317                 fail(szListChar[1] != '\0');
00318         }
00319 #endif /* DEBUG */
00320 
00321         switch (ucNFC) {
00322         case LIST_ARABIC_NUM:
00323         case LIST_NUMBER_TXT:
00324                 tNextFree = (size_t)sprintf(szLine, "%u", uiListNumber);
00325                 break;
00326         case LIST_UPPER_ROMAN:
00327         case LIST_LOWER_ROMAN:
00328                 tNextFree = tNumber2Roman(uiListNumber,
00329                                 ucNFC == LIST_UPPER_ROMAN, szLine);
00330                 break;
00331         case LIST_UPPER_ALPHA:
00332         case LIST_LOWER_ALPHA:
00333                 tNextFree = tNumber2Alpha(uiListNumber,
00334                                 ucNFC == LIST_UPPER_ALPHA, szLine);
00335                 break;
00336         case LIST_ORDINAL_NUM:
00337         case LIST_ORDINAL_TXT:
00338                 if (uiListNumber % 10 == 1 && uiListNumber != 11) {
00339                         tNextFree =
00340                                 (size_t)sprintf(szLine, "%ust", uiListNumber);
00341                 } else if (uiListNumber % 10 == 2 && uiListNumber != 12) {
00342                         tNextFree =
00343                                 (size_t)sprintf(szLine, "%und", uiListNumber);
00344                 } else if (uiListNumber % 10 == 3 && uiListNumber != 13) {
00345                         tNextFree =
00346                                 (size_t)sprintf(szLine, "%urd", uiListNumber);
00347                 } else {
00348                         tNextFree =
00349                                 (size_t)sprintf(szLine, "%uth", uiListNumber);
00350                 }
00351                 break;
00352         case LIST_OUTLINE_NUM:
00353                 tNextFree = (size_t)sprintf(szLine, "%02u", uiListNumber);
00354                 break;
00355         case LIST_SPECIAL:
00356         case LIST_SPECIAL2:
00357         case LIST_BULLETS:
00358                 tNextFree = 0;
00359                 break;
00360         default:
00361                 DBG_HEX(ucNFC);
00362                 DBG_FIXME();
00363                 tNextFree = (size_t)sprintf(szLine, "%u", uiListNumber);
00364                 break;
00365         }
00366         tNextFree += (size_t)sprintf(szLine + tNextFree, "%.3s", szListChar);
00367         szLine[tNextFree++] = ' ';
00368         szLine[tNextFree] = '\0';
00369         lWidth = lComputeStringWidth(szLine, tNextFree,
00370                                 pOutput->tFontRef, pOutput->usFontSize);
00371         lLeftIndentation -= lWidth;
00372         if (lLeftIndentation < 0) {
00373                 lLeftIndentation = 0;
00374         }
00375         vSetLeftIndentation(pDiag, lLeftIndentation);
00376         for (tIndex = 0; tIndex < tNextFree; tIndex++) {
00377                 vStoreChar((ULONG)(UCHAR)szLine[tIndex], FALSE, pOutput);
00378         }
00379 } /* end of vPutIndentation */
00380 
00381 /*
00382  * vPutSeparatorLine - output a separator line
00383  *
00384  * A separator line is a horizontal line two inches long.
00385  * Two inches equals 144000 millipoints.
00386  */
00387 static void
00388 vPutSeparatorLine(output_type *pOutput)
00389 {
00390         long    lCharWidth;
00391         int     iCounter, iChars;
00392         char    szOne[2];
00393 
00394         fail(pOutput == NULL);
00395 
00396         szOne[0] = OUR_EM_DASH;
00397         szOne[1] = '\0';
00398         lCharWidth = lComputeStringWidth(szOne, 1,
00399                                 pOutput->tFontRef, pOutput->usFontSize);
00400         NO_DBG_DEC(lCharWidth);
00401         iChars = (int)((144000 + lCharWidth / 2) / lCharWidth);
00402         NO_DBG_DEC(iChars);
00403         for (iCounter = 0; iCounter < iChars; iCounter++) {
00404                 vStoreCharacter((ULONG)(UCHAR)OUR_EM_DASH, pOutput);
00405         }
00406 } /* end of vPutSeparatorLine */
00407 
00408 /*
00409  * pStartNextOutput - start the next output record
00410  *
00411  * returns a pointer to the next record
00412  */
00413 static output_type *
00414 pStartNextOutput(output_type *pCurrent)
00415 {
00416         output_type     *pNew;
00417 
00418         TRACE_MSG("pStartNextOutput");
00419 
00420         if (pCurrent->tNextFree == 0) {
00421                 /* The current record is empty, re-use */
00422                 fail(pCurrent->szStorage[0] != '\0');
00423                 fail(pCurrent->lStringWidth != 0);
00424                 return pCurrent;
00425         }
00426         /* The current record is in use, make a new one */
00427         pNew = xmalloc(sizeof(*pNew));
00428         pCurrent->pNext = pNew;
00429         pNew->tStorageSize = INITIAL_SIZE;
00430         pNew->szStorage = xmalloc(pNew->tStorageSize);
00431         pNew->szStorage[0] = '\0';
00432         pNew->tNextFree = 0;
00433         pNew->lStringWidth = 0;
00434         pNew->ucFontColor = FONT_COLOR_DEFAULT;
00435         pNew->usFontStyle = FONT_REGULAR;
00436         pNew->tFontRef = (drawfile_fontref)0;
00437         pNew->usFontSize = DEFAULT_FONT_SIZE;
00438         pNew->pPrev = pCurrent;
00439         pNew->pNext = NULL;
00440         return pNew;
00441 } /* end of pStartNextOutput */
00442 
00443 /*
00444  * pStartNewOutput
00445  */
00446 static output_type *
00447 pStartNewOutput(output_type *pAnchor, output_type *pLeftOver)
00448 {
00449         output_type     *pCurr, *pNext;
00450         USHORT          usFontStyle, usFontSize;
00451         drawfile_fontref        tFontRef;
00452         UCHAR           ucFontColor;
00453 
00454         TRACE_MSG("pStartNewOutput");
00455 
00456         ucFontColor = FONT_COLOR_DEFAULT;
00457         usFontStyle = FONT_REGULAR;
00458         tFontRef = (drawfile_fontref)0;
00459         usFontSize = DEFAULT_FONT_SIZE;
00460         /* Free the old output space */
00461         pCurr = pAnchor;
00462         while (pCurr != NULL) {
00463                 TRACE_MSG("Free the old output space");
00464                 pNext = pCurr->pNext;
00465                 pCurr->szStorage = xfree(pCurr->szStorage);
00466                 if (pCurr->pNext == NULL) {
00467                         ucFontColor = pCurr->ucFontColor;
00468                         usFontStyle = pCurr->usFontStyle;
00469                         tFontRef = pCurr->tFontRef;
00470                         usFontSize = pCurr->usFontSize;
00471                 }
00472                 pCurr = xfree(pCurr);
00473                 pCurr = pNext;
00474         }
00475         if (pLeftOver == NULL) {
00476                 /* Create new output space */
00477                 TRACE_MSG("Create new output space");
00478                 pLeftOver = xmalloc(sizeof(*pLeftOver));
00479                 pLeftOver->tStorageSize = INITIAL_SIZE;
00480                 NO_DBG_DEC(pLeftOver->tStorageSize);
00481                 TRACE_MSG("before 2nd xmalloc");
00482                 pLeftOver->szStorage = xmalloc(pLeftOver->tStorageSize);
00483                 TRACE_MSG("after 2nd xmalloc");
00484                 pLeftOver->szStorage[0] = '\0';
00485                 pLeftOver->tNextFree = 0;
00486                 pLeftOver->lStringWidth = 0;
00487                 pLeftOver->ucFontColor = ucFontColor;
00488                 pLeftOver->usFontStyle = usFontStyle;
00489                 pLeftOver->tFontRef = tFontRef;
00490                 pLeftOver->usFontSize = usFontSize;
00491                 pLeftOver->pPrev = NULL;
00492                 pLeftOver->pNext = NULL;
00493         }
00494         fail(!bCheckDoubleLinkedList(pLeftOver));
00495         return pLeftOver;
00496 } /* end of pStartNewOutput */
00497 
00498 /*
00499  * ulGetChar - get the next character from the specified list
00500  *
00501  * returns the next character of EOF
00502  */
00503 static ULONG
00504 ulGetChar(FILE *pFile, list_id_enum eListID)
00505 {
00506         const font_block_type   *pCurr;
00507         ULONG           ulChar, ulFileOffset, ulCharPos;
00508         row_info_enum   eRowInfo;
00509         USHORT          usChar, usPropMod;
00510         BOOL            bSkip;
00511 
00512         fail(pFile == NULL);
00513 
00514         pCurr = pFontInfo;
00515         bSkip = FALSE;
00516         for (;;) {
00517                 usChar = usNextChar(pFile, eListID,
00518                                 &ulFileOffset, &ulCharPos, &usPropMod);
00519                 if (usChar == (USHORT)EOF) {
00520                         return (ULONG)EOF;
00521                 }
00522 
00523                 vUpdateCounters();
00524 
00525                 eRowInfo = ePropMod2RowInfo(usPropMod, iWordVersion);
00526                 if (!bStartRow) {
00527 #if 0
00528                         bStartRow = eRowInfo == found_a_cell ||
00529                                 (pRowInfo != NULL &&
00530                                  ulFileOffset == pRowInfo->ulFileOffsetStart &&
00531                                  eRowInfo != found_not_a_cell);
00532 #else
00533                         bStartRow = pRowInfo != NULL &&
00534                                 ulFileOffset == pRowInfo->ulFileOffsetStart;
00535 #endif
00536                         NO_DBG_HEX_C(bStartRow, pRowInfo->ulFileOffsetStart);
00537                 }
00538                 if (!bEndRowNorm) {
00539 #if 0
00540                         bEndRow = eRowInfo == found_end_of_row ||
00541                                 (pRowInfo != NULL &&
00542                                  ulFileOffset == pRowInfo->ulFileOffsetEnd &&
00543                                  eRowInfo != found_not_end_of_row);
00544 #else
00545                         bEndRowNorm = pRowInfo != NULL &&
00546                                 ulFileOffset == pRowInfo->ulFileOffsetEnd;
00547 #endif
00548                         NO_DBG_HEX_C(bEndRowNorm, pRowInfo->ulFileOffsetEnd);
00549                 }
00550                 if (!bEndRowFast) {
00551                         bEndRowFast = eRowInfo == found_end_of_row;
00552                         NO_DBG_HEX_C(bEndRowFast, pRowInfo->ulFileOffsetEnd);
00553                 }
00554 
00555                 if (!bStartStyle) {
00556                         bStartStyle = pStyleInfo != NULL &&
00557                                 ulFileOffset == pStyleInfo->ulFileOffset;
00558                         NO_DBG_HEX_C(bStartStyle, ulFileOffset);
00559                 }
00560                 if (pCurr != NULL && ulFileOffset == pCurr->ulFileOffset) {
00561                         bStartFont = TRUE;
00562                         NO_DBG_HEX(ulFileOffset);
00563                         pFontInfo = pCurr;
00564                         pCurr = pGetNextFontInfoListItem(pCurr);
00565                 }
00566 
00567                 /* Skip embedded characters */
00568                 if (usChar == START_EMBEDDED) {
00569                         bSkip = TRUE;
00570                         continue;
00571                 }
00572                 if (usChar == END_IGNORE || usChar == END_EMBEDDED) {
00573                         bSkip = FALSE;
00574                         continue;
00575                 }
00576                 if (bSkip) {
00577                         continue;
00578                 }
00579                 ulChar = ulTranslateCharacters(usChar,
00580                                         ulFileOffset,
00581                                         iWordVersion,
00582                                         tOptions.eConversionType,
00583                                         tOptions.eEncoding,
00584                                         bOldMacFile);
00585                 if (ulChar == IGNORE_CHARACTER) {
00586                         continue;
00587                 }
00588                 if (ulChar == PICTURE) {
00589                         ulFileOffsetImage = ulGetPictInfoListItem(ulFileOffset);
00590                 } else {
00591                         ulFileOffsetImage = FC_INVALID;
00592                 }
00593                 if (ulChar == PAR_END) {
00594                         /* End of paragraph seen, prepare for the next */
00595                         vFillStyleFromStylesheet(usIstdNext, &tStyleNext);
00596                         vCorrectStyleValues(&tStyleNext);
00597                         bStartStyleNext = TRUE;
00598                         vFillFontFromStylesheet(usIstdNext, &tFontNext);
00599                         vCorrectFontValues(&tFontNext);
00600                         bStartFontNext = TRUE;
00601                 }
00602                 if (ulChar == PAGE_BREAK) {
00603                         /* Might be the start of a new section */
00604                         pSectionNext = pGetSectionInfo(pSection, ulCharPos);
00605                 }
00606                 return ulChar;
00607         }
00608 } /* end of ulGetChar */
00609 
00610 /*
00611  * lGetWidthMax - get the maximum line width from the paragraph break value
00612  *
00613  * Returns the maximum line width in millipoints
00614  */
00615 static long
00616 lGetWidthMax(int iParagraphBreak)
00617 {
00618         fail(iParagraphBreak < 0);
00619 
00620         if (iParagraphBreak == 0) {
00621                 return LONG_MAX;
00622         }
00623         if (iParagraphBreak < MIN_SCREEN_WIDTH) {
00624                 return lChar2MilliPoints(MIN_SCREEN_WIDTH);
00625         }
00626         if (iParagraphBreak > MAX_SCREEN_WIDTH) {
00627                 return lChar2MilliPoints(MAX_SCREEN_WIDTH);
00628         }
00629         return lChar2MilliPoints(iParagraphBreak);
00630 } /* end of lGetWidthMax */
00631 
00632 /*
00633  * bWordDecryptor - turn Word to something more useful
00634  *
00635  * returns TRUE when succesful, otherwise FALSE
00636  */
00637 BOOL
00638 bWordDecryptor(FILE *pFile, long lFilesize, diagram_type *pDiag)
00639 {
00640         imagedata_type  tImage;
00641         const style_block_type  *pStyleTmp;
00642         const font_block_type   *pFontTmp;
00643         const char      *szListChar;
00644         output_type     *pAnchor, *pOutput, *pLeftOver;
00645         ULONG   ulChar;
00646         long    lBeforeIndentation, lAfterIndentation;
00647         long    lLeftIndentation, lLeftIndentation1, lRightIndentation;
00648         long    lWidthCurr, lWidthMax, lDefaultTabWidth, lHalfSpaceWidth, lTmp;
00649         list_id_enum    eListID;
00650         image_info_enum eRes;
00651         UINT    uiFootnoteNumber, uiEndnoteNumber, uiTmp;
00652         int     iListSeqNumber;
00653         BOOL    bWasTableRow, bTableFontClosed, bWasEndOfParagraph;
00654         BOOL    bInList, bWasInList, bNoMarks, bFirstLine;
00655         BOOL    bAllCapitals, bHiddenText, bMarkDelText, bSuccess;
00656         USHORT  usListNumber;
00657         USHORT  usFontStyle, usFontStyleMinimal, usFontSize, usTmp;
00658         UCHAR   ucFontNumber, ucFontColor;
00659         UCHAR   ucNFC, ucAlignment;
00660 
00661         fail(pFile == NULL || lFilesize <= 0 || pDiag == NULL);
00662 
00663         TRACE_MSG("bWordDecryptor");
00664 
00665         iWordVersion = iInitDocument(pFile, lFilesize);
00666         if (iWordVersion < 0) {
00667                 DBG_DEC(iWordVersion);
00668                 return FALSE;
00669         }
00670 
00671         vGetOptions(&tOptions);
00672         bOldMacFile = bIsOldMacFile();
00673         vPrepareHdrFtrText(pFile);
00674         vPrepareFootnoteText(pFile);
00675 
00676         vPrologue2(pDiag, iWordVersion);
00677 
00678         /* Initialisation */
00679 #if defined(__riscos)
00680         ulCharCounter = 0;
00681         iCurrPct = 0;
00682         iPrevPct = -1;
00683         ulDocumentLength = ulGetDocumentLength();
00684 #endif /* __riscos */
00685         pSection = pGetSectionInfo(NULL, 0);
00686         pSectionNext = pSection;
00687         lDefaultTabWidth = lGetDefaultTabWidth();
00688         DBG_DEC_C(lDefaultTabWidth != 36000, lDefaultTabWidth);
00689         pRowInfo = pGetNextRowInfoListItem();
00690         DBG_HEX_C(pRowInfo != NULL, pRowInfo->ulFileOffsetStart);
00691         DBG_HEX_C(pRowInfo != NULL, pRowInfo->ulFileOffsetEnd);
00692         DBG_MSG_C(pRowInfo == NULL, "No rows at all");
00693         bStartRow = FALSE;
00694         bEndRowNorm = FALSE;
00695         bEndRowFast = FALSE;
00696         bIsTableRow = FALSE;
00697         bWasTableRow = FALSE;
00698         vResetStyles();
00699         pStyleInfo = pGetNextTextStyle(NULL);
00700         bStartStyle = FALSE;
00701         bInList = FALSE;
00702         bWasInList = FALSE;
00703         iListSeqNumber = 0;
00704         usIstdNext = ISTD_NORMAL;
00705         pAnchor = NULL;
00706         pFontInfo = pGetNextFontInfoListItem(NULL);
00707         DBG_HEX_C(pFontInfo != NULL, pFontInfo->ulFileOffset);
00708         DBG_MSG_C(pFontInfo == NULL, "No fonts at all");
00709         bStartFont = FALSE;
00710         ucFontNumber = 0;
00711         usFontStyleMinimal = FONT_REGULAR;
00712         usFontStyle = FONT_REGULAR;
00713         usFontSize = DEFAULT_FONT_SIZE;
00714         ucFontColor = FONT_COLOR_DEFAULT;
00715         pAnchor = pStartNewOutput(pAnchor, NULL);
00716         pOutput = pAnchor;
00717         pOutput->ucFontColor = ucFontColor;
00718         pOutput->usFontStyle = usFontStyle;
00719         pOutput->tFontRef = tOpenFont(ucFontNumber, usFontStyle, usFontSize);
00720         pOutput->usFontSize = usFontSize;
00721         bTableFontClosed = TRUE;
00722         lBeforeIndentation = 0;
00723         lAfterIndentation = 0;
00724         lLeftIndentation = 0;
00725         lLeftIndentation1 = 0;
00726         lRightIndentation = 0;
00727         bWasEndOfParagraph = TRUE;
00728         bNoMarks = TRUE;
00729         bFirstLine = TRUE;
00730         ucNFC = LIST_BULLETS;
00731         if (pStyleInfo != NULL) {
00732                 szListChar = pStyleInfo->szListChar;
00733                 pStyleTmp = pStyleInfo;
00734         } else {
00735                 if (tStyleNext.szListChar[0] == '\0') {
00736                         vGetBulletValue(tOptions.eConversionType,
00737                                 tOptions.eEncoding, tStyleNext.szListChar, 4);
00738                 }
00739                 szListChar = tStyleNext.szListChar;
00740                 pStyleTmp = &tStyleNext;
00741         }
00742         usListNumber = 0;
00743         ucAlignment = ALIGNMENT_LEFT;
00744         bAllCapitals = FALSE;
00745         bHiddenText = FALSE;
00746         bMarkDelText = FALSE;
00747         lWidthMax = lGetWidthMax(tOptions.iParagraphBreak);
00748         NO_DBG_DEC(lWidthMax);
00749 
00750         Hourglass_On();
00751 
00752         uiFootnoteNumber = 0;
00753         uiEndnoteNumber = 0;
00754         eListID = text_list;
00755         for(;;) {
00756                 ulChar = ulGetChar(pFile, eListID);
00757                 if (ulChar == (ULONG)EOF) {
00758                         if (bOutputContainsText(pAnchor)) {
00759                                 OUTPUT_LINE();
00760                         } else {
00761                                 RESET_LINE();
00762                         }
00763                         switch (eListID) {
00764                         case text_list:
00765                                 if (tOptions.eConversionType !=
00766                                                         conversion_xml) {
00767                                         eListID = footnote_list;
00768                                         if (uiFootnoteNumber != 0) {
00769                                                 vPutSeparatorLine(pAnchor);
00770                                                 OUTPUT_LINE();
00771                                                 uiFootnoteNumber = 0;
00772                                         }
00773                                         break;
00774                                 }
00775                                 /* No break or return */
00776                         case footnote_list:
00777                                 eListID = endnote_list;
00778                                 if (uiEndnoteNumber != 0) {
00779                                         vPutSeparatorLine(pAnchor);
00780                                         OUTPUT_LINE();
00781                                         uiEndnoteNumber = 0;
00782                                 }
00783                                 break;
00784                         case endnote_list:
00785                                 eListID = textbox_list;
00786                                 if (bExistsTextBox()) {
00787                                         vPutSeparatorLine(pAnchor);
00788                                         OUTPUT_LINE();
00789                                 }
00790                                 break;
00791                         case textbox_list:
00792                                 eListID = hdrtextbox_list;
00793                                 if (bExistsHdrTextBox()) {
00794                                         vPutSeparatorLine(pAnchor);
00795                                         OUTPUT_LINE();
00796                                 }
00797                                 break;
00798                         case hdrtextbox_list:
00799                         default:
00800                                 eListID = end_of_lists;
00801                                 break;
00802                         }
00803                         if (eListID == end_of_lists) {
00804                                 break;
00805                         }
00806                         continue;
00807                 }
00808 
00809                 if (ulChar == UNKNOWN_NOTE_CHAR) {
00810                         switch (eListID) {
00811                         case footnote_list:
00812                                 ulChar = FOOTNOTE_CHAR;
00813                                 break;
00814                         case endnote_list:
00815                                 ulChar = ENDNOTE_CHAR;
00816                                 break;
00817                         default:
00818                                 break;
00819                         }
00820                 }
00821 
00822                 if (bStartRow) {
00823                         /* Begin of a tablerow found */
00824                         if (bOutputContainsText(pAnchor)) {
00825                                 OUTPUT_LINE();
00826                         } else {
00827                                 RESET_LINE();
00828                         }
00829                         fail(pAnchor != pOutput);
00830                         if (bTableFontClosed) {
00831                                 /* Start special table font */
00832                                 vCloseFont();
00833                                 /*
00834                                  * Compensate for the fact that Word uses
00835                                  * proportional fonts for its tables and we
00836                                  * only one fixed-width font
00837                                  */
00838                                 uiTmp = ((UINT)usFontSize * 5 + 3) / 6;
00839                                 if (uiTmp < MIN_TABLEFONT_SIZE) {
00840                                         uiTmp = MIN_TABLEFONT_SIZE;
00841                                 } else if (uiTmp > MAX_TABLEFONT_SIZE) {
00842                                         uiTmp = MAX_TABLEFONT_SIZE;
00843                                 }
00844                                 pOutput->usFontSize = (USHORT)uiTmp;
00845                                 pOutput->tFontRef =
00846                                         tOpenTableFont(pOutput->usFontSize);
00847                                 pOutput->usFontStyle = FONT_REGULAR;
00848                                 pOutput->ucFontColor = FONT_COLOR_BLACK;
00849                                 bTableFontClosed = FALSE;
00850                         }
00851                         bIsTableRow = TRUE;
00852                         bStartRow = FALSE;
00853                 }
00854 
00855                 if (bWasTableRow &&
00856                     !bIsTableRow &&
00857                     ulChar != PAR_END &&
00858                     ulChar != HARD_RETURN &&
00859                     ulChar != PAGE_BREAK &&
00860                     ulChar != COLUMN_FEED) {
00861                         /*
00862                          * The end of a table should be followed by an
00863                          * empty line, like the end of a paragraph
00864                          */
00865                         OUTPUT_LINE();
00866                         vEndOfParagraph(pDiag,
00867                                         pOutput->tFontRef,
00868                                         pOutput->usFontSize,
00869                                         (long)pOutput->usFontSize * 600);
00870                 }
00871 
00872                 switch (ulChar) {
00873                 case PAGE_BREAK:
00874                 case COLUMN_FEED:
00875                         if (bIsTableRow) {
00876                                 /* Ignore when in a table */
00877                                 break;
00878                         }
00879                         if (bOutputContainsText(pAnchor)) {
00880                                 OUTPUT_LINE();
00881                         } else {
00882                                 RESET_LINE();
00883                         }
00884                         if (ulChar == PAGE_BREAK) {
00885                                 vEndOfPage(pDiag, lAfterIndentation,
00886                                                 pSection != pSectionNext);
00887                         } else {
00888                                 vEndOfParagraph(pDiag,
00889                                         pOutput->tFontRef,
00890                                         pOutput->usFontSize,
00891                                         lAfterIndentation);
00892                         }
00893                         break;
00894                 default:
00895                         break;
00896                 }
00897 
00898                 if (bStartFont || (bStartFontNext && ulChar != PAR_END)) {
00899                         /* Begin of a font found */
00900                         if (bStartFont) {
00901                                 /* bStartFont takes priority */
00902                                 fail(pFontInfo == NULL);
00903                                 pFontTmp = pFontInfo;
00904                         } else {
00905                                 pFontTmp = &tFontNext;
00906                         }
00907                         bAllCapitals = bIsCapitals(pFontTmp->usFontStyle);
00908                         bHiddenText = bIsHidden(pFontTmp->usFontStyle);
00909                         bMarkDelText = bIsMarkDel(pFontTmp->usFontStyle);
00910                         usTmp = pFontTmp->usFontStyle &
00911                                 (FONT_BOLD|FONT_ITALIC|FONT_UNDERLINE|
00912                                  FONT_STRIKE|FONT_MARKDEL|
00913                                  FONT_SUPERSCRIPT|FONT_SUBSCRIPT);
00914                         if (!bIsTableRow &&
00915                             (usFontSize != pFontTmp->usFontSize ||
00916                              ucFontNumber != pFontTmp->ucFontNumber ||
00917                              usFontStyleMinimal != usTmp ||
00918                              ucFontColor != pFontTmp->ucFontColor)) {
00919                                 pOutput = pStartNextOutput(pOutput);
00920                                 vCloseFont();
00921                                 pOutput->ucFontColor = pFontTmp->ucFontColor;
00922                                 pOutput->usFontStyle = pFontTmp->usFontStyle;
00923                                 pOutput->usFontSize = pFontTmp->usFontSize;
00924                                 pOutput->tFontRef = tOpenFont(
00925                                                 pFontTmp->ucFontNumber,
00926                                                 pFontTmp->usFontStyle,
00927                                                 pFontTmp->usFontSize);
00928                                 fail(!bCheckDoubleLinkedList(pAnchor));
00929                         }
00930                         ucFontNumber = pFontTmp->ucFontNumber;
00931                         usFontSize = pFontTmp->usFontSize;
00932                         ucFontColor = pFontTmp->ucFontColor;
00933                         usFontStyle = pFontTmp->usFontStyle;
00934                         usFontStyleMinimal = usTmp;
00935                         if (bStartFont) {
00936                                 /* Get the next font info */
00937                                 pFontInfo = pGetNextFontInfoListItem(pFontInfo);
00938                                 NO_DBG_HEX_C(pFontInfo != NULL,
00939                                                 pFontInfo->ulFileOffset);
00940                                 DBG_MSG_C(pFontInfo == NULL, "No more fonts");
00941                         }
00942                         bStartFont = FALSE;
00943                         bStartFontNext = FALSE;
00944                 }
00945 
00946                 if (bStartStyle || (bStartStyleNext && ulChar != PAR_END)) {
00947                         bFirstLine = TRUE;
00948                         /* Begin of a style found */
00949                         if (bStartStyle) {
00950                                 /* bStartStyle takes priority */
00951                                 fail(pStyleInfo == NULL);
00952                                 pStyleTmp = pStyleInfo;
00953                         } else {
00954                                 pStyleTmp = &tStyleNext;
00955                         }
00956                         if (!bIsTableRow) {
00957                                 vStoreStyle(pDiag, pOutput, pStyleTmp);
00958                         }
00959                         usIstdNext = pStyleTmp->usIstdNext;
00960                         lBeforeIndentation =
00961                                 lTwips2MilliPoints(pStyleTmp->usBeforeIndent);
00962                         lAfterIndentation =
00963                                 lTwips2MilliPoints(pStyleTmp->usAfterIndent);
00964                         lLeftIndentation =
00965                                 lTwips2MilliPoints(pStyleTmp->sLeftIndent);
00966                         lLeftIndentation1 =
00967                                 lTwips2MilliPoints(pStyleTmp->sLeftIndent1);
00968                         lRightIndentation =
00969                                 lTwips2MilliPoints(pStyleTmp->sRightIndent);
00970                         bInList = bStyleImpliesList(pStyleTmp, iWordVersion);
00971                         bNoMarks = !bInList || pStyleTmp->bNumPause;
00972                         ucNFC = pStyleTmp->ucNFC;
00973                         szListChar = pStyleTmp->szListChar;
00974                         ucAlignment = pStyleTmp->ucAlignment;
00975                         if (bInList && !bWasInList) {
00976                                 /* Start of a list */
00977                                 iListSeqNumber++;
00978                                 vStartOfList(pDiag, ucNFC,
00979                                                 bWasTableRow && !bIsTableRow);
00980                         }
00981                         if (!bInList && bWasInList) {
00982                                 /* End of a list */
00983                                 vEndOfList(pDiag);
00984                         }
00985                         bWasInList = bInList;
00986                         if (bStartStyle) {
00987                                 pStyleInfo = pGetNextTextStyle(pStyleInfo);
00988                                 NO_DBG_HEX_C(pStyleInfo != NULL,
00989                                                 pStyleInfo->ulFileOffset);
00990                                 DBG_MSG_C(pStyleInfo == NULL,
00991                                                 "No more styles");
00992                         }
00993                         bStartStyle = FALSE;
00994                         bStartStyleNext = FALSE;
00995                 }
00996 
00997                 if (bWasEndOfParagraph) {
00998                         vStartOfParagraph1(pDiag, lBeforeIndentation);
00999                 }
01000 
01001                 if (!bIsTableRow &&
01002                     lTotalStringWidth(pAnchor) == 0) {
01003                         if (!bNoMarks) {
01004                                 usListNumber = usGetListValue(iListSeqNumber,
01005                                                         iWordVersion,
01006                                                         pStyleTmp);
01007                         }
01008                         if (bInList && bFirstLine) {
01009                                 vStartOfListItem(pDiag, bNoMarks);
01010                         }
01011                         vPutIndentation(pDiag, pAnchor, bNoMarks, bFirstLine,
01012                                         usListNumber, ucNFC, szListChar,
01013                                         lLeftIndentation, lLeftIndentation1);
01014                         bFirstLine = FALSE;
01015                         /* One number or mark per paragraph will do */
01016                         bNoMarks = TRUE;
01017                 }
01018 
01019                 if (bWasEndOfParagraph) {
01020                         vStartOfParagraph2(pDiag);
01021                         bWasEndOfParagraph = FALSE;
01022                 }
01023 
01024                 switch (ulChar) {
01025                 case PICTURE:
01026                         (void)memset(&tImage, 0, sizeof(tImage));
01027                         eRes = eExamineImage(pFile, ulFileOffsetImage, &tImage);
01028                         switch (eRes) {
01029                         case image_no_information:
01030                                 bSuccess = FALSE;
01031                                 break;
01032                         case image_minimal_information:
01033                         case image_full_information:
01034 #if 0
01035                                 if (bOutputContainsText(pAnchor)) {
01036                                         OUTPUT_LINE();
01037                                 } else {
01038                                         RESET_LINE();
01039                                 }
01040 #endif
01041                                 bSuccess = bTranslateImage(pDiag, pFile,
01042                                         eRes == image_minimal_information,
01043                                         ulFileOffsetImage, &tImage);
01044                                 break;
01045                         default:
01046                                 DBG_DEC(eRes);
01047                                 bSuccess = FALSE;
01048                                 break;
01049                         }
01050                         if (!bSuccess) {
01051                                 vStoreString("[pic]", 5, pOutput);
01052                         }
01053                         break;
01054                 case FOOTNOTE_CHAR:
01055                         uiFootnoteNumber++;
01056                         if (tOptions.eConversionType == conversion_xml) {
01057                                 vStoreCharacter((ULONG)FOOTNOTE_OR_ENDNOTE,
01058                                                                 pOutput);
01059                                 break;
01060                         }
01061                         vStoreCharacter((ULONG)'[', pOutput);
01062                         vStoreNumberAsDecimal(uiFootnoteNumber, pOutput);
01063                         vStoreCharacter((ULONG)']', pOutput);
01064                         break;
01065                 case ENDNOTE_CHAR:
01066                         uiEndnoteNumber++;
01067                         vStoreCharacter((ULONG)'[', pOutput);
01068                         vStoreNumberAsRoman(uiEndnoteNumber, pOutput);
01069                         vStoreCharacter((ULONG)']', pOutput);
01070                         break;
01071                 case UNKNOWN_NOTE_CHAR:
01072                         vStoreString("[?]", 3, pOutput);
01073                         break;
01074                 case PAR_END:
01075                         if (bIsTableRow) {
01076                                 vStoreCharacter((ULONG)'\n', pOutput);
01077                                 break;
01078                         }
01079                         if (bOutputContainsText(pAnchor)) {
01080                                 OUTPUT_LINE();
01081                         } else {
01082                                 vMove2NextLine(pDiag,
01083                                         pOutput->tFontRef, pOutput->usFontSize);
01084                                 RESET_LINE();
01085                         }
01086                         vEndOfParagraph(pDiag,
01087                                         pOutput->tFontRef,
01088                                         pOutput->usFontSize,
01089                                         lAfterIndentation);
01090                         bWasEndOfParagraph = TRUE;
01091                         break;
01092                 case HARD_RETURN:
01093                         if (bIsTableRow) {
01094                                 vStoreCharacter((ULONG)'\n', pOutput);
01095                                 break;
01096                         }
01097                         if (bOutputContainsText(pAnchor)) {
01098                                 OUTPUT_LINE();
01099                         } else {
01100                                 vMove2NextLine(pDiag,
01101                                         pOutput->tFontRef, pOutput->usFontSize);
01102                                 RESET_LINE();
01103                         }
01104                         break;
01105                 case PAGE_BREAK:
01106                 case COLUMN_FEED:
01107                         pSection = pSectionNext;
01108                         break;
01109                 case TABLE_SEPARATOR:
01110                         if (bIsTableRow) {
01111                                 vStoreCharacter(ulChar, pOutput);
01112                                 break;
01113                         }
01114                         vStoreCharacter((ULONG)' ', pOutput);
01115                         vStoreCharacter((ULONG)TABLE_SEPARATOR_CHAR, pOutput);
01116                         break;
01117                 case TAB:
01118                         if (bIsTableRow ||
01119                             tOptions.eConversionType == conversion_xml) {
01120                                 vStoreCharacter((ULONG)' ', pOutput);
01121                                 break;
01122                         }
01123                         if (tOptions.iParagraphBreak == 0 &&
01124                             (tOptions.eConversionType == conversion_text ||
01125                              tOptions.eConversionType == conversion_fmt_text)) {
01126                                 /* No logical lines, so no tab expansion */
01127                                 vStoreCharacter(TAB, pOutput);
01128                                 break;
01129                         }
01130                         lHalfSpaceWidth = (lComputeSpaceWidth(
01131                                         pOutput->tFontRef,
01132                                         pOutput->usFontSize) + 1) / 2;
01133                         lTmp = lTotalStringWidth(pAnchor);
01134                         lTmp += lDrawUnits2MilliPoints(pDiag->lXleft);
01135                         lTmp /= lDefaultTabWidth;
01136                         do {
01137                                 vStoreCharacter((ULONG)FILLER_CHAR, pOutput);
01138                                 lWidthCurr = lTotalStringWidth(pAnchor);
01139                                 lWidthCurr +=
01140                                         lDrawUnits2MilliPoints(pDiag->lXleft);
01141                         } while (lTmp == lWidthCurr / lDefaultTabWidth &&
01142                                  lWidthCurr < lWidthMax + lRightIndentation);
01143                         break;
01144                 default:
01145                         if (bHiddenText && tOptions.bHideHiddenText) {
01146                                 continue;
01147                         }
01148                         if (bMarkDelText && tOptions.bRemoveRemovedText) {
01149                                 continue;
01150                         }
01151                         if (ulChar == UNICODE_ELLIPSIS &&
01152                             tOptions.eEncoding != encoding_utf_8) {
01153                                 vStoreString("...", 3, pOutput);
01154                         } else {
01155                                 if (bAllCapitals) {
01156                                         ulChar = ulToUpper(ulChar);
01157                                 }
01158                                 vStoreCharacter(ulChar, pOutput);
01159                         }
01160                         break;
01161                 }
01162 
01163                 if (bWasTableRow && !bIsTableRow) {
01164                         /* End of a table */
01165                         vEndOfTable(pDiag);
01166                         /* Resume normal font */
01167                         NO_DBG_MSG("End of table font");
01168                         vCloseFont();
01169                         bTableFontClosed = TRUE;
01170                         pOutput->ucFontColor = ucFontColor;
01171                         pOutput->usFontStyle = usFontStyle;
01172                         pOutput->usFontSize = usFontSize;
01173                         pOutput->tFontRef = tOpenFont(
01174                                         ucFontNumber, usFontStyle, usFontSize);
01175                 }
01176                 bWasTableRow = bIsTableRow;
01177 
01178                 if (bIsTableRow) {
01179                         fail(pAnchor != pOutput);
01180                         if (!bEndRowNorm && !bEndRowFast) {
01181                                 continue;
01182                         }
01183                         /* End of a table row */
01184                         if (bEndRowNorm) {
01185                                 fail(pRowInfo == NULL);
01186                                 vTableRow2Window(pDiag, pAnchor, pRowInfo,
01187                                                 tOptions.eConversionType,
01188                                                 tOptions.iParagraphBreak);
01189                         } else {
01190                                 fail(!bEndRowFast);
01191                         }
01192                         /* Reset */
01193                         pAnchor = pStartNewOutput(pAnchor, NULL);
01194                         pOutput = pAnchor;
01195                         if (bEndRowNorm) {
01196                                 pRowInfo = pGetNextRowInfoListItem();
01197                         }
01198                         bIsTableRow = FALSE;
01199                         bEndRowNorm = FALSE;
01200                         bEndRowFast = FALSE;
01201                         NO_DBG_HEX_C(pRowInfo != NULL,
01202                                                 pRowInfo->ulFileOffsetStart);
01203                         NO_DBG_HEX_C(pRowInfo != NULL,
01204                                                 pRowInfo->ulFileOffsetEnd);
01205                         continue;
01206                 }
01207                 lWidthCurr = lTotalStringWidth(pAnchor);
01208                 lWidthCurr += lDrawUnits2MilliPoints(pDiag->lXleft);
01209                 if (lWidthCurr < lWidthMax + lRightIndentation) {
01210                         continue;
01211                 }
01212                 pLeftOver = pSplitList(pAnchor);
01213                 vJustify2Window(pDiag, pAnchor,
01214                                 lWidthMax, lRightIndentation, ucAlignment);
01215                 pAnchor = pStartNewOutput(pAnchor, pLeftOver);
01216                 for (pOutput = pAnchor;
01217                      pOutput->pNext != NULL;
01218                      pOutput = pOutput->pNext)
01219                         ;       /* EMPTY */
01220                 fail(pOutput == NULL);
01221                 if (lTotalStringWidth(pAnchor) > 0) {
01222                         vSetLeftIndentation(pDiag, lLeftIndentation);
01223                 }
01224         }
01225 
01226         pAnchor = pStartNewOutput(pAnchor, NULL);
01227         pAnchor->szStorage = xfree(pAnchor->szStorage);
01228         pAnchor = xfree(pAnchor);
01229         vCloseFont();
01230         vFreeDocument();
01231         Hourglass_Off();
01232         return TRUE;
01233 } /* end of bWordDecryptor */
01234 
01235 /*
01236  * lLastStringWidth - compute the width of the last part of the output string
01237  */
01238 static long
01239 lLastStringWidth(const output_type *pAnchor)
01240 {
01241         const output_type       *pCurr, *pStart;
01242 
01243         pStart = NULL;
01244         for (pCurr = pAnchor; pCurr != NULL; pCurr = pCurr->pNext) {
01245                 if (pCurr->tNextFree == 1 &&
01246                     (pCurr->szStorage[0] == PAR_END ||
01247                      pCurr->szStorage[0] == HARD_RETURN)) {
01248                         /* Found a separator. Start after the separator */
01249                         pStart = pCurr->pNext;
01250                 }
01251         }
01252         if (pStart == NULL) {
01253                 /* No separators. Use the whole output string */
01254                 pStart = pAnchor;
01255         }
01256         return lTotalStringWidth(pStart);
01257 } /* end of lLastStringWidth */
01258 
01259 /*
01260  * pHdrFtrDecryptor - turn a header/footer list element to something useful
01261  */
01262 output_type *
01263 pHdrFtrDecryptor(FILE *pFile, ULONG ulCharPosStart, ULONG ulCharPosNext)
01264 {
01265         output_type     *pAnchor, *pOutput, *pLeftOver;
01266         ULONG   ulChar, ulFileOffset, ulCharPos;
01267         long    lWidthCurr, lWidthMax;
01268         long    lRightIndentation;
01269         USHORT  usChar;
01270         UCHAR   ucAlignment;
01271         BOOL    bSkip;
01272 
01273         fail(iWordVersion < 0);
01274         fail(tOptions.eConversionType == conversion_unknown);
01275         fail(tOptions.eEncoding == 0);
01276 
01277         if (ulCharPosStart == ulCharPosNext) {
01278                 /* There are no bytes to decrypt */
01279                 return NULL;
01280         }
01281 
01282         lRightIndentation = 0;
01283         ucAlignment = ALIGNMENT_LEFT;
01284         bSkip = FALSE;
01285         lWidthMax = lGetWidthMax(tOptions.iParagraphBreak);
01286         pAnchor = pStartNewOutput(NULL, NULL);
01287         pOutput = pAnchor;
01288         pOutput->tFontRef = tOpenFont(0, FONT_REGULAR, DEFAULT_FONT_SIZE);
01289         usChar = usToHdrFtrPosition(pFile, ulCharPosStart);
01290         ulCharPos = ulCharPosStart;
01291         ulFileOffset = ulCharPos2FileOffset(ulCharPos);
01292         while (usChar != (USHORT)EOF && ulCharPos != ulCharPosNext) {
01293                 /* Skip embedded characters */
01294                 if (usChar == START_EMBEDDED) {
01295                         bSkip = TRUE;
01296                 } else if (usChar == END_IGNORE || usChar == END_EMBEDDED) {
01297                         bSkip = FALSE;
01298                 }
01299                 /* Translate character */
01300                 if (bSkip || usChar == END_IGNORE || usChar == END_EMBEDDED) {
01301                         ulChar = IGNORE_CHARACTER;
01302                 } else {
01303                         ulChar = ulTranslateCharacters(usChar,
01304                                         ulFileOffset,
01305                                         iWordVersion,
01306                                         tOptions.eConversionType,
01307                                         tOptions.eEncoding,
01308                                         bOldMacFile);
01309                 }
01310                 /* Process character */
01311                 if (ulChar != IGNORE_CHARACTER) {
01312                         switch (ulChar) {
01313                         case PICTURE:
01314                                 vStoreString("[pic]", 5, pOutput);
01315                                 break;
01316                         case PAR_END:
01317                         case HARD_RETURN:
01318                         case PAGE_BREAK:
01319                         case COLUMN_FEED:
01320                                 /* To the next substring */
01321                                 pOutput = pStartNextOutput(pOutput);
01322                                 vCloseFont();
01323                                 pOutput->tFontRef = tOpenFont(0,
01324                                         FONT_REGULAR, DEFAULT_FONT_SIZE);
01325                                 /* A substring with just one character */
01326                                 if (ulChar == HARD_RETURN) {
01327                                         vStoreCharacter(HARD_RETURN, pOutput);
01328                                 } else {
01329                                         vStoreCharacter(PAR_END, pOutput);
01330                                 }
01331                                 /* To the next substring */
01332                                 pOutput = pStartNextOutput(pOutput);
01333                                 vCloseFont();
01334                                 pOutput->tFontRef = tOpenFont(0,
01335                                         FONT_REGULAR, DEFAULT_FONT_SIZE);
01336                                 fail(!bCheckDoubleLinkedList(pAnchor));
01337                                 break;
01338                         case TABLE_SEPARATOR:
01339                                 vStoreCharacter((ULONG)' ', pOutput);
01340                                 vStoreCharacter((ULONG)TABLE_SEPARATOR_CHAR,
01341                                                         pOutput);
01342                                 break;
01343                         case TAB:
01344                                 vStoreCharacter((ULONG)FILLER_CHAR, pOutput);
01345                                 break;
01346                         default:
01347                                 vStoreCharacter(ulChar, pOutput);
01348                                 break;
01349                         }
01350                 }
01351                 lWidthCurr = lLastStringWidth(pAnchor);
01352                 if (lWidthCurr >= lWidthMax + lRightIndentation) {
01353                         pLeftOver = pSplitList(pAnchor);
01354                         for (pOutput = pAnchor;
01355                              pOutput->pNext != NULL;
01356                              pOutput = pOutput->pNext)
01357                                 ;       /* EMPTY */
01358                         fail(pOutput == NULL);
01359                         /* To the next substring */
01360                         pOutput = pStartNextOutput(pOutput);
01361                         /* A substring with just one HARD_RETURN */
01362                         vStoreCharacter(HARD_RETURN, pOutput);
01363                         /* Put the leftover piece(s) at the end */
01364                         pOutput->pNext = pLeftOver;
01365                         if (pLeftOver != NULL) {
01366                                 pLeftOver->pPrev = pOutput;
01367                         }
01368                         fail(!bCheckDoubleLinkedList(pAnchor));
01369                         for (pOutput = pAnchor;
01370                              pOutput->pNext != NULL;
01371                              pOutput = pOutput->pNext)
01372                                 ;       /* EMPTY */
01373                         fail(pOutput == NULL);
01374                 }
01375                 usChar = usNextChar(pFile, hdrftr_list,
01376                                         &ulFileOffset, &ulCharPos, NULL);
01377         }
01378         vCloseFont();
01379         if (bOutputContainsText(pAnchor)) {
01380                 return pAnchor;
01381         }
01382         pAnchor = pStartNewOutput(pAnchor, NULL);
01383         pAnchor->szStorage = xfree(pAnchor->szStorage);
01384         pAnchor = xfree(pAnchor);
01385         return NULL;
01386 } /* end of pHdrFtrDecryptor */
01387 
01388 /*
01389  * pFootnoteDecryptor - turn a footnote text list element into text
01390  */
01391 char *
01392 szFootnoteDecryptor(FILE *pFile, ULONG ulCharPosStart, ULONG ulCharPosNext)
01393 {
01394         char    *szText;
01395         ULONG   ulChar, ulFileOffset, ulCharPos;
01396         USHORT  usChar;
01397         size_t  tLen, tIndex, tNextFree, tStorageSize;
01398         char    szResult[6];
01399         BOOL    bSkip;
01400 
01401         fail(iWordVersion < 0);
01402         fail(tOptions.eConversionType == conversion_unknown);
01403         fail(tOptions.eEncoding == 0);
01404 
01405         if (ulCharPosStart == ulCharPosNext) {
01406                 /* There are no bytes to decrypt */
01407                 return NULL;
01408         }
01409 
01410         if (tOptions.eConversionType != conversion_xml) {
01411                 /* Only implemented for XML output */
01412                 return NULL;
01413         }
01414 
01415         bSkip = FALSE;
01416 
01417         /* Initialise the text buffer */
01418         tStorageSize = INITIAL_SIZE;
01419         szText = xmalloc(tStorageSize);
01420         tNextFree = 0;
01421         szText[tNextFree] = '\0';
01422 
01423         /* Goto the start */
01424         usChar = usToFootnotePosition(pFile, ulCharPosStart);
01425         ulCharPos = ulCharPosStart;
01426         ulFileOffset = ulCharPos2FileOffset(ulCharPos);
01427         /* Skip the unwanted starting characters */
01428         while (usChar != (USHORT)EOF && ulCharPos != ulCharPosNext &&
01429                (usChar == FOOTNOTE_OR_ENDNOTE ||
01430                 usChar == PAR_END ||
01431                 usChar == TAB ||
01432                 usChar == (USHORT)' ')) {
01433                 usChar = usNextChar(pFile, footnote_list,
01434                                         &ulFileOffset, &ulCharPos, NULL);
01435         }
01436         /* Process the footnote text */
01437         while (usChar != (USHORT)EOF && ulCharPos != ulCharPosNext) {
01438                 /* Skip embedded characters */
01439                 if (usChar == START_EMBEDDED) {
01440                         bSkip = TRUE;
01441                 } else if (usChar == END_IGNORE || usChar == END_EMBEDDED) {
01442                         bSkip = FALSE;
01443                 }
01444                 /* Translate character */
01445                 if (bSkip ||
01446                     usChar == END_IGNORE ||
01447                     usChar == END_EMBEDDED ||
01448                     usChar == FOOTNOTE_OR_ENDNOTE) {
01449                         ulChar = IGNORE_CHARACTER;
01450                 } else {
01451                         ulChar = ulTranslateCharacters(usChar,
01452                                         ulFileOffset,
01453                                         iWordVersion,
01454                                         tOptions.eConversionType,
01455                                         tOptions.eEncoding,
01456                                         bOldMacFile);
01457                 }
01458                 /* Process character */
01459                 if (ulChar == PICTURE) {
01460                         tLen = 5;
01461                         strcpy(szResult, "[pic]");
01462                 } else if (ulChar == IGNORE_CHARACTER) {
01463                         tLen = 0;
01464                         szResult[0] = '\0';
01465                 } else {
01466                         switch (ulChar) {
01467                         case PAR_END:
01468                         case HARD_RETURN:
01469                         case PAGE_BREAK:
01470                         case COLUMN_FEED:
01471                                 ulChar = (ULONG)PAR_END;
01472                                 break;
01473                         case TAB:
01474                                 ulChar = (ULONG)' ';
01475                                 break;
01476                         default:
01477                                 break;
01478                         }
01479                         tLen = tUcs2Utf8(ulChar, szResult, sizeof(szResult));
01480                 }
01481                 /* Add the results to the text */
01482                 if (tNextFree + tLen + 1 > tStorageSize) {
01483                         tStorageSize += EXTENTION_SIZE;
01484                         szText = xrealloc(szText, tStorageSize);
01485                 }
01486                 for (tIndex = 0; tIndex < tLen; tIndex++) {
01487                         szText[tNextFree++] = szResult[tIndex];
01488                 }
01489                 szText[tNextFree] = '\0';
01490                 /* Next character */
01491                 usChar = usNextChar(pFile, footnote_list,
01492                                         &ulFileOffset, &ulCharPos, NULL);
01493         }
01494         /* Remove redundant spaces */
01495         while (tNextFree != 0 && szText[tNextFree - 1] == ' ') {
01496                 szText[tNextFree - 1] = '\0';
01497                 tNextFree--;
01498         }
01499         if (tNextFree == 0) {
01500                 /* No text */
01501                 szText = xfree(szText);
01502                 return NULL;
01503         }
01504         return szText;
01505 } /* end of szFootnoteDecryptor */

Generated by  doxygen 1.6.2