%{ #include "parse.h" #include #define YYMAXDEPTH 100000 char currentTitle[1024]; %} %token START_COMMENT %token CLOSE_TAG PLAIN_TEXT TAG_TEXT TAG_ID %token CHARACTER_ENTITY EQUALS COMMENT_TEXT NEWLINE %token START_HTML END_HTML START_HEAD END_HEAD START_BODY END_BODY %token ISINDEX START_TITLE END_TITLE %token START_UNKNOWN END_UNKNOWN %token START_H1 END_H1 START_H2 END_H2 START_H3 END_H3 %token START_H4 END_H4 START_H5 END_H5 START_H6 END_H6 %token START_A END_A START_I END_I START_B END_B START_U END_U %token START_S END_S START_SUP END_SUP START_SUB END_SUB %token START_TT END_TT START_EM END_EM START_STRONG END_STRONG %token START_PRE END_PRE START_LIT END_LIT START_QUOTE END_QUOTE %token START_ABSTRACT END_ABSTRACT START_BYLINE END_BYLINE START_NOTE END_NOTE %token START_ADDRESS END_ADDRESS START_BLOCKQUOTE END_BLOCKQUOTE %token START_OL END_OL START_UL END_UL START_LI END_LI %token START_MENU END_MENU START_DIR END_DIR START_CITE END_CITE %token START_DL END_DL START_DT END_DT START_DD END_DD START_FONT END_FONT %token START_TABLE END_TABLE START_TH END_TH START_TD END_TD %token START_TR END_TR START_TB END_TB START_CAPTION END_CAPTION %token HR BR P TAB IMG IMAGE %% document : doccomments documentstart doccomments : | doccomments comment ; documentstart : starthtml doccontent endhtml | doccontent ; starthtml : START_HTML optattributes CLOSE_TAG ; endhtml : | END_HTML CLOSE_TAG {ignoreWhitespace = 1;} ; doccontent : head body ; head : { currentTitle[0] = '\0'; } headcontent | starthead { currentTitle[0] = '\0'; } headcontent endhead ; starthead : START_HEAD optattributes CLOSE_TAG ; endhead : END_HEAD CLOSE_TAG ; headcontent : | headterm headcontent ; headterm : simpleheadtag | headtag {ignoreWhitespace = 0;} headtext {ignoreWhitespace = 1;} endheadtag { if ($1.yytoken == START_TITLE) { strcpy(currentTitle, $3.text); } } ; headtext : { $$.text[0] = '\0'; } | PLAIN_TEXT headtext { strcpy($$.text, $1.text); strcat($$.text, $2.text); } | NEWLINE headtext { strcpy($$.text, " "); strcat($$.text, $2.text); } ; simpleheadtag : ISINDEX optattributes CLOSE_TAG | comment ; headtag : START_TITLE optattributes CLOSE_TAG { $$.yytoken = START_TITLE; } ; endheadtag : END_TITLE CLOSE_TAG | END_UNKNOWN CLOSE_TAG ; body : startbody {ignoreWhitespace = 0;} bodycontent endbody | {ignoreWhitespace = 0;} bodycontent ; startbody : START_BODY optattributes CLOSE_TAG ; endbody : | END_BODY CLOSE_TAG {ignoreWhitespace = 1;} ; bodycontent : | bodycontent bodytag | bodycontent endbodytag | bodycontent PLAIN_TEXT { indexbodytext($2.text, currentLine); } | bodycontent NEWLINE | bodycontent CHARACTER_ENTITY ; bodytag : START_UNKNOWN optattributes CLOSE_TAG | comment | START_H1 optattributes CLOSE_TAG | START_H2 optattributes CLOSE_TAG | START_H3 optattributes CLOSE_TAG | START_H4 optattributes CLOSE_TAG | START_H5 optattributes CLOSE_TAG | START_H6 optattributes CLOSE_TAG | START_A optattributes CLOSE_TAG | START_I optattributes CLOSE_TAG | START_B optattributes CLOSE_TAG | START_U optattributes CLOSE_TAG | START_S optattributes CLOSE_TAG | START_SUP optattributes CLOSE_TAG | START_SUB optattributes CLOSE_TAG | START_TT optattributes CLOSE_TAG | START_EM optattributes CLOSE_TAG | START_STRONG optattributes CLOSE_TAG | START_PRE optattributes CLOSE_TAG | START_LIT optattributes CLOSE_TAG | START_QUOTE optattributes CLOSE_TAG | START_ABSTRACT optattributes CLOSE_TAG | START_BYLINE optattributes CLOSE_TAG | START_NOTE optattributes CLOSE_TAG | START_ADDRESS optattributes CLOSE_TAG | START_BLOCKQUOTE optattributes CLOSE_TAG | START_CITE optattributes CLOSE_TAG | START_OL optattributes CLOSE_TAG | START_UL optattributes CLOSE_TAG | START_LI optattributes CLOSE_TAG | START_MENU optattributes CLOSE_TAG | START_DIR optattributes CLOSE_TAG | START_DL optattributes CLOSE_TAG | START_DT optattributes CLOSE_TAG | START_DD optattributes CLOSE_TAG | START_FONT optattributes CLOSE_TAG | START_TABLE optattributes CLOSE_TAG | START_TH optattributes CLOSE_TAG | START_TD optattributes CLOSE_TAG | START_TR optattributes CLOSE_TAG | START_TB optattributes CLOSE_TAG | START_CAPTION optattributes CLOSE_TAG | HR optattributes CLOSE_TAG | BR optattributes CLOSE_TAG | P optattributes CLOSE_TAG | TAB optattributes CLOSE_TAG | IMG optattributes CLOSE_TAG | IMAGE optattributes CLOSE_TAG ; optcomment : | COMMENT_TEXT ; comment : START_COMMENT COMMENT_TEXT CLOSE_TAG { indexcomment($2.text); } optattributes : attributes optcomment ; attributes : | TAG_ID attributes | TAG_ID EQUALS TAG_TEXT attributes | TAG_ID EQUALS TAG_ID attributes ; endbodytag : END_UNKNOWN CLOSE_TAG | END_H1 CLOSE_TAG | END_H2 CLOSE_TAG | END_H3 CLOSE_TAG | END_H4 CLOSE_TAG | END_H5 CLOSE_TAG | END_H6 CLOSE_TAG | END_A CLOSE_TAG | END_I CLOSE_TAG | END_B CLOSE_TAG | END_U CLOSE_TAG | END_S CLOSE_TAG | END_SUP CLOSE_TAG | END_SUB CLOSE_TAG | END_TT CLOSE_TAG | END_EM CLOSE_TAG | END_STRONG CLOSE_TAG | END_PRE CLOSE_TAG | END_LIT CLOSE_TAG | END_QUOTE CLOSE_TAG | END_ABSTRACT CLOSE_TAG | END_BYLINE CLOSE_TAG | END_NOTE CLOSE_TAG | END_ADDRESS CLOSE_TAG | END_BLOCKQUOTE CLOSE_TAG | END_CITE CLOSE_TAG | END_OL CLOSE_TAG | END_UL CLOSE_TAG | END_LI CLOSE_TAG | END_MENU CLOSE_TAG | END_DIR CLOSE_TAG | END_DL CLOSE_TAG | END_DT CLOSE_TAG | END_DD CLOSE_TAG | END_FONT CLOSE_TAG | END_TABLE CLOSE_TAG | END_TH CLOSE_TAG | END_TD CLOSE_TAG | END_TR CLOSE_TAG | END_TB CLOSE_TAG | END_CAPTION CLOSE_TAG ; %%