00001
00002
00003
00004
00005
00006
00007
00008
00009 #include <stdio.h>
00010 #include <stdlib.h>
00011 #include "antiword.h"
00012
00013
00014
00015
00016
00017
00018
00019 BOOL
00020 bAddTextBlocks(ULONG ulCharPosFirst, ULONG ulTotalLength,
00021 BOOL bUsesUnicode, USHORT usPropMod,
00022 ULONG ulStartBlock, const ULONG *aulBBD, size_t tBBDLen)
00023 {
00024 text_block_type tTextBlock;
00025 ULONG ulCharPos, ulOffset, ulIndex;
00026 long lToGo;
00027
00028 fail(ulTotalLength > (ULONG)LONG_MAX / 2);
00029 fail(ulStartBlock > MAX_BLOCKNUMBER && ulStartBlock != END_OF_CHAIN);
00030 fail(aulBBD == NULL);
00031
00032 NO_DBG_HEX(ulCharPosFirst);
00033 NO_DBG_DEC(ulTotalLength);
00034
00035 if (bUsesUnicode) {
00036
00037 NO_DBG_MSG("Uses Unicode");
00038 lToGo = (long)ulTotalLength * 2;
00039 } else {
00040
00041 NO_DBG_MSG("Uses ASCII");
00042 lToGo = (long)ulTotalLength;
00043 }
00044
00045 ulCharPos = ulCharPosFirst;
00046 ulOffset = ulCharPosFirst;
00047 for (ulIndex = ulStartBlock;
00048 ulIndex != END_OF_CHAIN && lToGo > 0;
00049 ulIndex = aulBBD[ulIndex]) {
00050 if (ulIndex >= (ULONG)tBBDLen) {
00051 DBG_DEC(ulIndex);
00052 DBG_DEC(tBBDLen);
00053 werr(1, "The Big Block Depot is damaged");
00054 }
00055 if (ulOffset >= BIG_BLOCK_SIZE) {
00056 ulOffset -= BIG_BLOCK_SIZE;
00057 continue;
00058 }
00059 tTextBlock.ulFileOffset =
00060 (ulIndex + 1) * BIG_BLOCK_SIZE + ulOffset;
00061 tTextBlock.ulCharPos = ulCharPos;
00062 tTextBlock.ulLength = min(BIG_BLOCK_SIZE - ulOffset,
00063 (ULONG)lToGo);
00064 tTextBlock.bUsesUnicode = bUsesUnicode;
00065 tTextBlock.usPropMod = usPropMod;
00066 ulOffset = 0;
00067 if (!bAdd2TextBlockList(&tTextBlock)) {
00068 DBG_HEX(tTextBlock.ulFileOffset);
00069 DBG_HEX(tTextBlock.ulCharPos);
00070 DBG_DEC(tTextBlock.ulLength);
00071 DBG_DEC(tTextBlock.bUsesUnicode);
00072 DBG_DEC(tTextBlock.usPropMod);
00073 return FALSE;
00074 }
00075 ulCharPos += tTextBlock.ulLength;
00076 lToGo -= (long)tTextBlock.ulLength;
00077 }
00078 DBG_DEC_C(lToGo != 0, lToGo);
00079 return lToGo == 0;
00080 }
00081
00082
00083
00084
00085
00086
00087
00088
00089 BOOL
00090 bGet6DocumentText(FILE *pFile, BOOL bUsesUnicode, ULONG ulStartBlock,
00091 const ULONG *aulBBD, size_t tBBDLen, const UCHAR *aucHeader)
00092 {
00093 UCHAR *aucBuffer;
00094 ULONG ulBeginTextInfo, ulTextOffset, ulTotLength;
00095 size_t tTextInfoLen;
00096 int iIndex, iType, iOff, iLen, iPieces;
00097 USHORT usPropMod;
00098
00099 DBG_MSG("bGet6DocumentText");
00100
00101 fail(pFile == NULL);
00102 fail(aulBBD == NULL);
00103 fail(aucHeader == NULL);
00104
00105 ulBeginTextInfo = ulGetLong(0x160, aucHeader);
00106 DBG_HEX(ulBeginTextInfo);
00107 tTextInfoLen = (size_t)ulGetLong(0x164, aucHeader);
00108 DBG_DEC(tTextInfoLen);
00109
00110 aucBuffer = xmalloc(tTextInfoLen);
00111 if (!bReadBuffer(pFile, ulStartBlock,
00112 aulBBD, tBBDLen, BIG_BLOCK_SIZE,
00113 aucBuffer, ulBeginTextInfo, tTextInfoLen)) {
00114 aucBuffer = xfree(aucBuffer);
00115 return FALSE;
00116 }
00117 NO_DBG_PRINT_BLOCK(aucBuffer, tTextInfoLen);
00118
00119 iOff = 0;
00120 while ((size_t)iOff < tTextInfoLen) {
00121 iType = (int)ucGetByte(iOff, aucBuffer);
00122 iOff++;
00123 if (iType == 0) {
00124 DBG_FIXME();
00125 iOff++;
00126 continue;
00127 }
00128 if (iType == 1) {
00129 iLen = (int)usGetWord(iOff, aucBuffer);
00130 vAdd2PropModList(aucBuffer + iOff);
00131 iOff += iLen + 2;
00132 continue;
00133 }
00134 if (iType != 2) {
00135 werr(0, "Unknown type of 'fastsaved' format");
00136 aucBuffer = xfree(aucBuffer);
00137 return FALSE;
00138 }
00139
00140 iLen = (int)usGetWord(iOff, aucBuffer);
00141 NO_DBG_DEC(iLen);
00142 iOff += 4;
00143 iPieces = (iLen - 4) / 12;
00144 DBG_DEC(iPieces);
00145 for (iIndex = 0; iIndex < iPieces; iIndex++) {
00146 ulTextOffset = ulGetLong(
00147 iOff + (iPieces + 1) * 4 + iIndex * 8 + 2,
00148 aucBuffer);
00149 usPropMod = usGetWord(
00150 iOff + (iPieces + 1) * 4 + iIndex * 8 + 6,
00151 aucBuffer);
00152 ulTotLength = ulGetLong(iOff + (iIndex + 1) * 4,
00153 aucBuffer) -
00154 ulGetLong(iOff + iIndex * 4,
00155 aucBuffer);
00156 NO_DBG_HEX_C(usPropMod != 0, usPropMod);
00157 if (!bAddTextBlocks(ulTextOffset, ulTotLength,
00158 bUsesUnicode, usPropMod,
00159 ulStartBlock,
00160 aulBBD, tBBDLen)) {
00161 aucBuffer = xfree(aucBuffer);
00162 return FALSE;
00163 }
00164 }
00165 break;
00166 }
00167 aucBuffer = xfree(aucBuffer);
00168 return TRUE;
00169 }
00170
00171
00172
00173
00174
00175
00176 BOOL
00177 bGet8DocumentText(FILE *pFile, const pps_info_type *pPPS,
00178 const ULONG *aulBBD, size_t tBBDLen,
00179 const ULONG *aulSBD, size_t tSBDLen,
00180 const UCHAR *aucHeader)
00181 {
00182 const ULONG *aulBlockDepot;
00183 UCHAR *aucBuffer;
00184 ULONG ulTextOffset, ulBeginTextInfo;
00185 ULONG ulTotLength, ulLen;
00186 long lIndex, lPieces, lOff;
00187 size_t tTextInfoLen, tBlockDepotLen, tBlockSize;
00188 int iType, iLen;
00189 BOOL bUsesUnicode;
00190 USHORT usPropMod;
00191
00192 DBG_MSG("bGet8DocumentText");
00193
00194 fail(pFile == NULL || pPPS == NULL);
00195 fail(aulBBD == NULL || aulSBD == NULL);
00196 fail(aucHeader == NULL);
00197
00198 ulBeginTextInfo = ulGetLong(0x1a2, aucHeader);
00199 DBG_HEX(ulBeginTextInfo);
00200 tTextInfoLen = (size_t)ulGetLong(0x1a6, aucHeader);
00201 DBG_DEC(tTextInfoLen);
00202
00203 DBG_DEC(pPPS->tTable.ulSB);
00204 DBG_HEX(pPPS->tTable.ulSize);
00205 if (pPPS->tTable.ulSize == 0) {
00206 return FALSE;
00207 }
00208
00209 if (pPPS->tTable.ulSize < MIN_SIZE_FOR_BBD_USE) {
00210
00211 aulBlockDepot = aulSBD;
00212 tBlockDepotLen = tSBDLen;
00213 tBlockSize = SMALL_BLOCK_SIZE;
00214 } else {
00215
00216 aulBlockDepot = aulBBD;
00217 tBlockDepotLen = tBBDLen;
00218 tBlockSize = BIG_BLOCK_SIZE;
00219 }
00220 aucBuffer = xmalloc(tTextInfoLen);
00221 if (!bReadBuffer(pFile, pPPS->tTable.ulSB,
00222 aulBlockDepot, tBlockDepotLen, tBlockSize,
00223 aucBuffer, ulBeginTextInfo, tTextInfoLen)) {
00224 aucBuffer = xfree(aucBuffer);
00225 return FALSE;
00226 }
00227 NO_DBG_PRINT_BLOCK(aucBuffer, tTextInfoLen);
00228
00229 lOff = 0;
00230 while (lOff < (long)tTextInfoLen) {
00231 iType = (int)ucGetByte(lOff, aucBuffer);
00232 lOff++;
00233 if (iType == 0) {
00234 DBG_FIXME();
00235 lOff++;
00236 continue;
00237 }
00238 if (iType == 1) {
00239 iLen = (int)usGetWord(lOff, aucBuffer);
00240 vAdd2PropModList(aucBuffer + lOff);
00241 lOff += (long)iLen + 2;
00242 continue;
00243 }
00244 if (iType != 2) {
00245 werr(0, "Unknown type of 'fastsaved' format");
00246 aucBuffer = xfree(aucBuffer);
00247 return FALSE;
00248 }
00249
00250 ulLen = ulGetLong(lOff, aucBuffer);
00251 if (ulLen < 4) {
00252 DBG_DEC(ulLen);
00253 return FALSE;
00254 }
00255 lOff += 4;
00256 lPieces = (long)((ulLen - 4) / 12);
00257 DBG_DEC(lPieces);
00258 for (lIndex = 0; lIndex < lPieces; lIndex++) {
00259 ulTextOffset = ulGetLong(
00260 lOff + (lPieces + 1) * 4 + lIndex * 8 + 2,
00261 aucBuffer);
00262 usPropMod = usGetWord(
00263 lOff + (lPieces + 1) * 4 + lIndex * 8 + 6,
00264 aucBuffer);
00265 ulTotLength = ulGetLong(lOff + (lIndex + 1) * 4,
00266 aucBuffer) -
00267 ulGetLong(lOff + lIndex * 4,
00268 aucBuffer);
00269 if ((ulTextOffset & BIT(30)) == 0) {
00270 bUsesUnicode = TRUE;
00271 } else {
00272 bUsesUnicode = FALSE;
00273 ulTextOffset &= ~BIT(30);
00274 ulTextOffset /= 2;
00275 }
00276 NO_DBG_HEX_C(usPropMod != 0, usPropMod);
00277 if (!bAddTextBlocks(ulTextOffset, ulTotLength,
00278 bUsesUnicode, usPropMod,
00279 pPPS->tWordDocument.ulSB,
00280 aulBBD, tBBDLen)) {
00281 aucBuffer = xfree(aucBuffer);
00282 return FALSE;
00283 }
00284 }
00285 break;
00286 }
00287 aucBuffer = xfree(aucBuffer);
00288 return TRUE;
00289 }