00001
00002
00003
00004
00005
00006
00007
00008
00009 #include "antiword.h"
00010
00011
00012
00013
00014
00015
00016
00017 static BOOL
00018 bGetDocumentText(FILE *pFile, const UCHAR *aucHeader)
00019 {
00020 text_block_type tTextBlock;
00021 ULONG ulBeginOfText;
00022 ULONG ulTextLen, ulFootnoteLen;
00023 ULONG ulHdrFtrLen, ulMacroLen, ulAnnotationLen;
00024 UINT uiQuickSaves;
00025 USHORT usDocStatus;
00026 BOOL bTemplate, bFastSaved, bEncrypted, bSuccess;
00027
00028 fail(pFile == NULL);
00029 fail(aucHeader == NULL);
00030
00031 DBG_MSG("bGetDocumentText");
00032
00033
00034 usDocStatus = usGetWord(0x0a, aucHeader);
00035 DBG_HEX(usDocStatus);
00036 bTemplate = (usDocStatus & BIT(0)) != 0;
00037 DBG_MSG_C(bTemplate, "This document is a Template");
00038 bFastSaved = (usDocStatus & BIT(2)) != 0;
00039 uiQuickSaves = (UINT)(usDocStatus & 0x00f0) >> 4;
00040 DBG_MSG_C(bFastSaved, "This document is Fast Saved");
00041 DBG_DEC_C(bFastSaved, uiQuickSaves);
00042 if (bFastSaved) {
00043 werr(0, "Word2: fast saved documents are not supported yet");
00044 return FALSE;
00045 }
00046 bEncrypted = (usDocStatus & BIT(8)) != 0;
00047 if (bEncrypted) {
00048 werr(0, "Encrypted documents are not supported");
00049 return FALSE;
00050 }
00051
00052
00053 ulBeginOfText = ulGetLong(0x18, aucHeader);
00054 DBG_HEX(ulBeginOfText);
00055 ulTextLen = ulGetLong(0x34, aucHeader);
00056 ulFootnoteLen = ulGetLong(0x38, aucHeader);
00057 ulHdrFtrLen = ulGetLong(0x3c, aucHeader);
00058 ulMacroLen = ulGetLong(0x40, aucHeader);
00059 ulAnnotationLen = ulGetLong(0x44, aucHeader);
00060 DBG_DEC(ulTextLen);
00061 DBG_DEC(ulFootnoteLen);
00062 DBG_DEC(ulHdrFtrLen);
00063 DBG_DEC(ulMacroLen);
00064 DBG_DEC(ulAnnotationLen);
00065 if (bFastSaved) {
00066 bSuccess = FALSE;
00067 } else {
00068 tTextBlock.ulFileOffset = ulBeginOfText;
00069 tTextBlock.ulCharPos = ulBeginOfText;
00070 tTextBlock.ulLength = ulTextLen +
00071 ulFootnoteLen +
00072 ulHdrFtrLen + ulMacroLen + ulAnnotationLen;
00073 tTextBlock.bUsesUnicode = FALSE;
00074 tTextBlock.usPropMod = IGNORE_PROPMOD;
00075 bSuccess = bAdd2TextBlockList(&tTextBlock);
00076 DBG_HEX_C(!bSuccess, tTextBlock.ulFileOffset);
00077 DBG_HEX_C(!bSuccess, tTextBlock.ulCharPos);
00078 DBG_DEC_C(!bSuccess, tTextBlock.ulLength);
00079 DBG_DEC_C(!bSuccess, tTextBlock.bUsesUnicode);
00080 DBG_DEC_C(!bSuccess, tTextBlock.usPropMod);
00081 }
00082
00083 if (bSuccess) {
00084 vSplitBlockList(pFile,
00085 ulTextLen,
00086 ulFootnoteLen,
00087 ulHdrFtrLen,
00088 ulMacroLen,
00089 ulAnnotationLen,
00090 0,
00091 0,
00092 0,
00093 FALSE);
00094 } else {
00095 vDestroyTextBlockList();
00096 werr(0, "I can't find the text of this document");
00097 }
00098 return bSuccess;
00099 }
00100
00101
00102
00103
00104 static void
00105 vGetDocumentData(FILE *pFile, const UCHAR *aucHeader)
00106 {
00107 data_block_type tDataBlock;
00108 options_type tOptions;
00109 ULONG ulEndOfText, ulBeginCharInfo;
00110 BOOL bFastSaved, bHasImages, bSuccess;
00111 USHORT usDocStatus;
00112
00113
00114 vGetOptions(&tOptions);
00115
00116
00117 usDocStatus = usGetWord(0x0a, aucHeader);
00118 DBG_HEX(usDocStatus);
00119 bFastSaved = (usDocStatus & BIT(2)) != 0;
00120 bHasImages = (usDocStatus & BIT(3)) != 0;
00121
00122 if (!bHasImages ||
00123 tOptions.eConversionType == conversion_text ||
00124 tOptions.eConversionType == conversion_fmt_text ||
00125 tOptions.eConversionType == conversion_xml ||
00126 tOptions.eImageLevel == level_no_images) {
00127
00128
00129
00130
00131 vDestroyDataBlockList();
00132 return;
00133 }
00134
00135 if (bFastSaved) {
00136 bSuccess = FALSE;
00137 } else {
00138
00139 ulEndOfText = ulGetLong(0x1c, aucHeader);
00140 DBG_HEX(ulEndOfText);
00141 ulBeginCharInfo = ulGetLong(0xa0, aucHeader);
00142 DBG_HEX(ulBeginCharInfo);
00143 if (ulBeginCharInfo > ulEndOfText) {
00144 tDataBlock.ulFileOffset = ulEndOfText;
00145 tDataBlock.ulDataPos = ulEndOfText;
00146 tDataBlock.ulLength = ulBeginCharInfo - ulEndOfText;
00147 bSuccess = bAdd2DataBlockList(&tDataBlock);
00148 DBG_HEX_C(!bSuccess, tDataBlock.ulFileOffset);
00149 DBG_HEX_C(!bSuccess, tDataBlock.ulDataPos);
00150 DBG_DEC_C(!bSuccess, tDataBlock.ulLength);
00151 } else {
00152 bSuccess = ulBeginCharInfo == ulEndOfText;
00153 }
00154 }
00155
00156 if (!bSuccess) {
00157 vDestroyDataBlockList();
00158 werr(0, "I can't find the data of this document");
00159 }
00160 }
00161
00162
00163
00164
00165
00166
00167 int
00168 iInitDocumentWIN(FILE *pFile, long lFilesize)
00169 {
00170 int iWordVersion;
00171 BOOL bSuccess;
00172 USHORT usIdent;
00173 UCHAR aucHeader[384];
00174
00175 fail(pFile == NULL);
00176
00177 if (lFilesize < 384) {
00178 return -1;
00179 }
00180
00181
00182 if (!bReadBytes(aucHeader, 384, 0x00, pFile)) {
00183 return -1;
00184 }
00185
00186 usIdent = usGetWord(0x00, aucHeader);
00187 DBG_HEX(usIdent);
00188 fail(usIdent != 0xa59b &&
00189 usIdent != 0xa5db);
00190 iWordVersion = iGetVersionNumber(aucHeader);
00191 if (iWordVersion != 1 && iWordVersion != 2) {
00192 werr(0, "This file is not from ''Win Word 1 or 2'.");
00193 return -1;
00194 }
00195 bSuccess = bGetDocumentText(pFile, aucHeader);
00196 if (bSuccess) {
00197 vGetDocumentData(pFile, aucHeader);
00198 vGetPropertyInfo(pFile, NULL,
00199 NULL, 0, NULL, 0,
00200 aucHeader, iWordVersion);
00201 vSetDefaultTabWidth(pFile, NULL,
00202 NULL, 0, NULL, 0,
00203 aucHeader, iWordVersion);
00204 vGetNotesInfo(pFile, NULL,
00205 NULL, 0, NULL, 0,
00206 aucHeader, iWordVersion);
00207 }
00208 return bSuccess ? iWordVersion : -1;
00209 }