examples/PIPS/antiword/src/wordwin.c

00001 /*
00002  * wordwin.c
00003  * Copyright (C) 2002-2005 A.J. van Os; Released under GPL
00004  *
00005  * Description:
00006  * Deal with the WIN internals of a MS Word file
00007  */
00008 
00009 #include "antiword.h"
00010 
00011 
00012 /*
00013  * bGetDocumentText - make a list of the text blocks of a Word document
00014  *
00015  * Return TRUE when succesful, otherwise FALSE
00016  */
00017 static BOOL
00018 bGetDocumentText(FILE *pFile, const UCHAR *aucHeader)
00019 {
00020         text_block_type tTextBlock;
00021         ULONG   ulBeginOfText;
00022         ULONG   ulTextLen, ulFootnoteLen;
00023         ULONG   ulHdrFtrLen, ulMacroLen, ulAnnotationLen;
00024         UINT    uiQuickSaves;
00025         USHORT  usDocStatus;
00026         BOOL    bTemplate, bFastSaved, bEncrypted, bSuccess;
00027 
00028         fail(pFile == NULL);
00029         fail(aucHeader == NULL);
00030 
00031         DBG_MSG("bGetDocumentText");
00032 
00033         /* Get the status flags from the header */
00034         usDocStatus = usGetWord(0x0a, aucHeader);
00035         DBG_HEX(usDocStatus);
00036         bTemplate = (usDocStatus & BIT(0)) != 0;
00037         DBG_MSG_C(bTemplate, "This document is a Template");
00038         bFastSaved = (usDocStatus & BIT(2)) != 0;
00039         uiQuickSaves = (UINT)(usDocStatus & 0x00f0) >> 4;
00040         DBG_MSG_C(bFastSaved, "This document is Fast Saved");
00041         DBG_DEC_C(bFastSaved, uiQuickSaves);
00042         if (bFastSaved) {
00043                 werr(0, "Word2: fast saved documents are not supported yet");
00044                 return FALSE;
00045         }
00046         bEncrypted = (usDocStatus & BIT(8)) != 0;
00047         if (bEncrypted) {
00048                 werr(0, "Encrypted documents are not supported");
00049                 return FALSE;
00050         }
00051 
00052         /* Get length information */
00053         ulBeginOfText = ulGetLong(0x18, aucHeader);
00054         DBG_HEX(ulBeginOfText);
00055         ulTextLen = ulGetLong(0x34, aucHeader);
00056         ulFootnoteLen = ulGetLong(0x38, aucHeader);
00057         ulHdrFtrLen = ulGetLong(0x3c, aucHeader);
00058         ulMacroLen = ulGetLong(0x40, aucHeader);
00059         ulAnnotationLen = ulGetLong(0x44, aucHeader);
00060         DBG_DEC(ulTextLen);
00061         DBG_DEC(ulFootnoteLen);
00062         DBG_DEC(ulHdrFtrLen);
00063         DBG_DEC(ulMacroLen);
00064         DBG_DEC(ulAnnotationLen);
00065         if (bFastSaved) {
00066                 bSuccess = FALSE;
00067         } else {
00068                 tTextBlock.ulFileOffset = ulBeginOfText;
00069                 tTextBlock.ulCharPos = ulBeginOfText;
00070                 tTextBlock.ulLength = ulTextLen +
00071                                 ulFootnoteLen +
00072                                 ulHdrFtrLen + ulMacroLen + ulAnnotationLen;
00073                 tTextBlock.bUsesUnicode = FALSE;
00074                 tTextBlock.usPropMod = IGNORE_PROPMOD;
00075                 bSuccess = bAdd2TextBlockList(&tTextBlock);
00076                 DBG_HEX_C(!bSuccess, tTextBlock.ulFileOffset);
00077                 DBG_HEX_C(!bSuccess, tTextBlock.ulCharPos);
00078                 DBG_DEC_C(!bSuccess, tTextBlock.ulLength);
00079                 DBG_DEC_C(!bSuccess, tTextBlock.bUsesUnicode);
00080                 DBG_DEC_C(!bSuccess, tTextBlock.usPropMod);
00081         }
00082 
00083         if (bSuccess) {
00084                 vSplitBlockList(pFile,
00085                                 ulTextLen,
00086                                 ulFootnoteLen,
00087                                 ulHdrFtrLen,
00088                                 ulMacroLen,
00089                                 ulAnnotationLen,
00090                                 0,
00091                                 0,
00092                                 0,
00093                                 FALSE);
00094         } else {
00095                 vDestroyTextBlockList();
00096                 werr(0, "I can't find the text of this document");
00097         }
00098         return bSuccess;
00099 } /* end of bGetDocumentText */
00100 
00101 /*
00102  * vGetDocumentData - make a list of the data blocks of a Word document
00103  */
00104 static void
00105 vGetDocumentData(FILE *pFile, const UCHAR *aucHeader)
00106 {
00107         data_block_type tDataBlock;
00108         options_type    tOptions;
00109         ULONG   ulEndOfText, ulBeginCharInfo;
00110         BOOL    bFastSaved, bHasImages, bSuccess;
00111         USHORT  usDocStatus;
00112 
00113         /* Get the options */
00114         vGetOptions(&tOptions);
00115 
00116         /* Get the status flags from the header */
00117         usDocStatus = usGetWord(0x0a, aucHeader);
00118         DBG_HEX(usDocStatus);
00119         bFastSaved = (usDocStatus & BIT(2)) != 0;
00120         bHasImages = (usDocStatus & BIT(3)) != 0;
00121 
00122         if (!bHasImages ||
00123             tOptions.eConversionType == conversion_text ||
00124             tOptions.eConversionType == conversion_fmt_text ||
00125             tOptions.eConversionType == conversion_xml ||
00126             tOptions.eImageLevel == level_no_images) {
00127                 /*
00128                  * No images in the document or text-only output or
00129                  * no images wanted, so no data blocks will be needed
00130                  */
00131                 vDestroyDataBlockList();
00132                 return;
00133         }
00134 
00135         if (bFastSaved) {
00136                 bSuccess = FALSE;
00137         } else {
00138                 /* This datablock is too big, but it contains all images */
00139                 ulEndOfText = ulGetLong(0x1c, aucHeader);
00140                 DBG_HEX(ulEndOfText);
00141                 ulBeginCharInfo = ulGetLong(0xa0, aucHeader);
00142                 DBG_HEX(ulBeginCharInfo);
00143                 if (ulBeginCharInfo > ulEndOfText) {
00144                         tDataBlock.ulFileOffset = ulEndOfText;
00145                         tDataBlock.ulDataPos = ulEndOfText;
00146                         tDataBlock.ulLength = ulBeginCharInfo - ulEndOfText;
00147                         bSuccess = bAdd2DataBlockList(&tDataBlock);
00148                         DBG_HEX_C(!bSuccess, tDataBlock.ulFileOffset);
00149                         DBG_HEX_C(!bSuccess, tDataBlock.ulDataPos);
00150                         DBG_DEC_C(!bSuccess, tDataBlock.ulLength);
00151                 } else {
00152                         bSuccess = ulBeginCharInfo == ulEndOfText;
00153                 }
00154         }
00155 
00156         if (!bSuccess) {
00157                 vDestroyDataBlockList();
00158                 werr(0, "I can't find the data of this document");
00159         }
00160 } /* end of vGetDocumentData */
00161 
00162 /*
00163  * iInitDocumentWIN - initialize an WIN document
00164  *
00165  * Returns the version of Word that made the document or -1
00166  */
00167 int
00168 iInitDocumentWIN(FILE *pFile, long lFilesize)
00169 {
00170         int     iWordVersion;
00171         BOOL    bSuccess;
00172         USHORT  usIdent;
00173         UCHAR   aucHeader[384];
00174 
00175         fail(pFile == NULL);
00176 
00177         if (lFilesize < 384) {
00178                 return -1;
00179         }
00180 
00181         /* Read the headerblock */
00182         if (!bReadBytes(aucHeader, 384, 0x00, pFile)) {
00183                 return -1;
00184         }
00185         /* Get the "magic number" from the header */
00186         usIdent = usGetWord(0x00, aucHeader);
00187         DBG_HEX(usIdent);
00188         fail(usIdent != 0xa59b &&       /* WinWord 1.x */
00189                 usIdent != 0xa5db);     /* WinWord 2.0 */
00190         iWordVersion = iGetVersionNumber(aucHeader);
00191         if (iWordVersion != 1 && iWordVersion != 2) {
00192                 werr(0, "This file is not from ''Win Word 1 or 2'.");
00193                 return -1;
00194         }
00195         bSuccess = bGetDocumentText(pFile, aucHeader);
00196         if (bSuccess) {
00197                 vGetDocumentData(pFile, aucHeader);
00198                 vGetPropertyInfo(pFile, NULL,
00199                                 NULL, 0, NULL, 0,
00200                                 aucHeader, iWordVersion);
00201                 vSetDefaultTabWidth(pFile, NULL,
00202                                 NULL, 0, NULL, 0,
00203                                 aucHeader, iWordVersion);
00204                 vGetNotesInfo(pFile, NULL,
00205                                 NULL, 0, NULL, 0,
00206                                 aucHeader, iWordVersion);
00207         }
00208         return bSuccess ? iWordVersion : -1;
00209 } /* end of iInitDocumentWIN */

Generated by  doxygen 1.6.2