Main Page   Data Structures   File List   Data Fields   Globals  

nccparser.c

Go to the documentation of this file.
00001 /***************************************************************************
00002  *   Copyright (C) 2006 by André Lindhjem <belgarat@sdf.lonestar.org>,     *
00003  *                         Kjetil Holien <kjetil.holien@gmail.com>,        *
00004  *                         Terje Risa <terje.risa@gmail.com> &             *
00005  *                         Øyvind Nerbråten <oyvind@nerbraten.com>         *
00006  *                                                                         *
00007  *   This program is free software; you can redistribute it and/or modify  *
00008  *   it under the terms of the GNU General Public License as published by  *
00009  *   the Free Software Foundation; either version 2 of the License, or     *
00010  *   (at your option) any later version.                                   *
00011  *                                                                         *
00012  *   This program is distributed in the hope that it will be useful,       *
00013  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
00014  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
00015  *   GNU General Public License for more details.                          *
00016  *                                                                         *
00017  *   You should have received a copy of the GNU General Public License     *
00018  *   along with this program; if not, write to the                         *
00019  *   Free Software Foundation, Inc.,                                       *
00020  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
00021  ***************************************************************************
00022  *
00023  * \file nccparser.c
00024  * \author André Lindhjem, Kjetil Holien, Terje Risa & Øyvind Nerbråten
00025  * \date 16.02.2006
00026  *
00027  * NCC parser for the Daisy 2.02 standard. Collects data and stores it in linked list
00028  * in the main datastructure. This linked smil list represents the spine of the
00029  * daisy DTB.
00030  */
00031 
00032 
00033 #include <stdio.h>
00034 #include <stdlib.h>
00035 #include <string.h>
00036 #include <libxml2/libxml/xmlreader.h>
00037 
00038 #include "common.h"
00039 #include "nccparser.h"
00040 #include "control.h"
00041 #include "report.h"
00042 #include "snprintf/snprintf.h"
00043 
00044 /* Test if the xml reader is present */
00045 #ifdef LIBXML_READER_ENABLED
00046 
00047 
00048 
00049 /* ************************************************** *
00050  *          Static function declarations              *
00051  * ************************************************** */
00052 
00059 static int parseNccHead (xmlTextReaderPtr reader, struct DaisyData *daisydata);
00060 
00067 static int parseNccTitle (xmlTextReaderPtr reader, struct DaisyData *daisydata);
00068 
00075 static int parseNccMeta (xmlTextReaderPtr reader, struct DaisyData *daisydata);
00076 
00083 static int parseNccBody (xmlTextReaderPtr reader, struct DaisyData *daisydata);
00084 
00091 static int parseNccHx (xmlTextReaderPtr reader, struct SmilNode *tmpSmilNode);
00092 
00099 static int parseNccSpan (xmlTextReaderPtr reader, struct SmilNode *tmpSmilNode);
00100 
00107 static int parseNccDiv (xmlTextReaderPtr reader, struct SmilNode *tmpSmilNode);
00108 
00115 static int parseNccA (xmlTextReaderPtr reader, struct SmilNode *tmpSmilNode);
00116 
00117 
00118 
00119 /* ************************************************** *
00120  *          Global function definitions               *
00121  * ************************************************** */
00122  
00131 int parseNCC (struct DaisyData *daisydata, char *filename)
00132 {
00133     const xmlChar *name = NULL;
00134     char *nameChar = NULL;
00135     xmlTextReaderPtr reader = NULL;
00136     int ret = 1;
00137     
00138     /* open the ncc file */
00139     reader = xmlReaderForFile (filename, NULL, 0);
00140     if (reader == NULL)
00141     {
00142         char error[STRLEN];
00143         snprintf (error, sizeof(error), "Failed to open NCC file: %s", filename);
00144         report (error, REP_WARNING);
00145         return -1;
00146     }
00147     else 
00148     {
00149         if (DEBUG_NCCPARSER)
00150         {
00151             char error[STRLEN];
00152             snprintf (error, sizeof(error), "Parsing NCC file: %s", filename);
00153             report (error, REP_DEBUG);
00154         }
00155         /* read the first tag */
00156         ret = xmlTextReaderRead(reader);
00157         if (ret != 1)
00158         {
00159             char error[STRLEN];
00160             snprintf (error, sizeof(error), "Failed to parse NCC file: %s", filename);
00161             report (error, REP_WARNING);
00162             xmlFreeTextReader (reader);
00163             return -1;
00164         }
00165         /* while there are more tags, parse the next */
00166         while(ret == 1)
00167         {
00168             /* get name of tag */
00169             name = xmlTextReaderConstName (reader);
00170             if (name == NULL) return 0;
00171             nameChar = tolowercase((const char *)name);
00172             if (nameChar == NULL) return 0;
00173             /* parse tag if <head> */
00174             if (!strcmp (nameChar, "head"))
00175             { 
00176                 ret = parseNccHead (reader, daisydata);
00177                 if (ret != 1)
00178                 {
00179                     char error[STRLEN];
00180                     snprintf (error, sizeof(error), "Failed to parse NCC file: %s", filename);
00181                     report (error, REP_WARNING);
00182                     xmlFreeTextReader (reader);
00183                     return -1;
00184                 }
00185             }
00186             /* parse tag if <body> */
00187             else if (!strcmp (nameChar, "body"))
00188             { 
00189                 ret = parseNccBody (reader, daisydata);
00190                 if (ret != 1)
00191                 {
00192                     char error[STRLEN];
00193                     snprintf (error, sizeof(error), "Failed to parse NCC file: %s", filename);
00194                     report (error, REP_WARNING);
00195                     xmlFreeTextReader (reader);
00196                     return -1;
00197                 }
00198             }
00199             ret = xmlTextReaderRead (reader);
00200         }
00201         xmlFreeTextReader (reader);
00202         if (ret != 0)
00203         {
00204             char error[STRLEN];
00205             snprintf (error, sizeof(error), "Failed to parse NCC file: %s", filename);
00206             report (error, REP_WARNING);
00207             return -1;
00208         }
00209     }
00210     return 1;
00211 }
00212 
00213 
00214 
00215 /* ************************************************** *
00216  *          Static function definitions               *
00217  * ************************************************** */
00218 
00225 static int parseNccHead (xmlTextReaderPtr reader, struct DaisyData *daisydata)
00226 {
00227     const xmlChar *name = NULL;
00228     char *nameChar = NULL;
00229     int ret, type;
00230     ret = type = 1;
00231     
00232     if (DEBUG_NCCPARSER) report ("Parsing NCC <HEAD>", REP_DEBUG);
00233     type = xmlTextReaderNodeType(reader);
00234     if (type != 1) 
00235     {   if (DEBUG_NCXPARSER) report ("Failed NCC <HEAD>", REP_ERROR);
00236         return -1;
00237     }
00238     ret = xmlTextReaderRead (reader);
00239     if (ret != 1) return ret;
00240     name = xmlTextReaderConstName (reader);
00241     if (name == NULL) return 0;
00242     nameChar = tolowercase((const char *)name);
00243     if (nameChar == NULL) return 0;
00244     /* while we haven't come to the head closetag */
00245     while (strcmp (nameChar, "head"))
00246     {   
00247         /* parse tag if <title> */
00248         if (!strcmp ((const char *)nameChar, "title")) ret = parseNccTitle (reader, daisydata);
00249         /* parse tag if <meta> * */
00250         else if (!strcmp (nameChar, "meta")) ret = parseNccMeta (reader, daisydata);
00251         if (ret != 1) return ret;
00252         ret = xmlTextReaderRead (reader);
00253         if (ret != 1) return ret;
00254         name = xmlTextReaderConstName (reader);
00255         if (name == NULL) return 0;
00256         nameChar = tolowercase((const char *)name);
00257         if (nameChar == NULL) return 0;
00258     }   
00259     type = xmlTextReaderNodeType(reader);
00260     if (type != 15) 
00261     {   if (DEBUG_NCXPARSER) report ("Failed parsing NCC <HEAD>, endtag expected", REP_ERROR);
00262         return -1;
00263     }   
00264     return ret;
00265 }
00266 
00273 static int parseNccTitle (xmlTextReaderPtr reader, struct DaisyData *daisydata)
00274 {
00275     xmlChar *value = NULL;
00276     char *tmp = NULL;
00277     int ret = 1;
00278     
00279     if (DEBUG_NCCPARSER) report ("Parsing NCC <TITLE>", REP_DEBUG);
00280     ret = xmlTextReaderRead (reader);
00281     if (ret != 1) return ret;
00282     /* if the title tag has a value  */
00283     if (xmlTextReaderHasValue (reader))
00284     {
00285         value = xmlTextReaderValue (reader);
00286         tmp = removewhitespaces ((char *)value);
00287         /* storing the books title in the datastucture */
00288         daisydata->bookInfo->titleText = tmp;
00289     }
00290     if (value != NULL) xmlFree (value); value = NULL;
00291     ret = xmlTextReaderRead (reader);
00292     return ret;
00293 }
00294 
00301 static int parseNccMeta (xmlTextReaderPtr reader, struct DaisyData *daisydata)
00302 {
00303     int ret = 1;
00304     xmlChar *attrvalue = NULL, *attrcontent = NULL;
00305     
00306     if (DEBUG_NCCPARSER) report ("Parsing NCC <META>", REP_DEBUG);
00307     attrvalue = xmlTextReaderGetAttribute (reader, (xmlChar *)"name");
00308     if (attrvalue != NULL)
00309     {
00310         attrcontent = xmlTextReaderGetAttribute (reader, (xmlChar *)"content");
00311         if (attrcontent != NULL)
00312         {   
00313             /* extract the title */
00314             if (!strcmp ((char *) attrvalue, "dc:title"))
00315             {   
00316                 /* if we haven't parsed a title tag */
00317                 if (daisydata->bookInfo->titleText == NULL)
00318                 {
00319                     daisydata->bookInfo->titleText = (char *) attrcontent;
00320                 }
00321             }
00322             /* extract an author */
00323             else if (!strcmp ((char *) attrvalue, "dc:creator"))
00324             {
00325                 (void) addNewAuthor (daisydata);
00326                 daisydata->bookInfo->author->name = (char *) attrcontent;
00327             }
00328             /* extract the total time of the book */
00329             else if (!strcmp ((char *) attrvalue, "ncc:totalTime"))
00330             {
00331                 daisydata->bookInfo->totalTime = (char *) attrcontent;
00332             }
00333                         
00334         }
00335     }
00336     ret = xmlTextReaderRead (reader);
00337     return ret; 
00338 }
00339 
00346 static int parseNccBody (xmlTextReaderPtr reader, struct DaisyData *daisydata)
00347 {
00348     const xmlChar *name = NULL;
00349     char *nameChar = NULL;
00350     int ret, type;
00351     ret = type = 1;
00352     
00353     if (DEBUG_NCCPARSER) report ("Parsing NCC <BODY>", REP_DEBUG);
00354     type = xmlTextReaderNodeType(reader);
00355     if (type != 1) 
00356     {   if (DEBUG_NCXPARSER) report ("Failed NCC <BODY>", REP_ERROR);
00357         return -1;
00358     }
00359     ret = xmlTextReaderRead (reader);
00360     if (ret != 1) return ret;
00361     name = xmlTextReaderConstName (reader);
00362     if (name == NULL) return 0;
00363     nameChar = tolowercase((const char *)name);
00364     if (nameChar == NULL) return 0;
00365     /* while we haven't come to the body closetag */
00366     while (strcmp (nameChar, "body"))
00367     {   
00368         /* parse tag if <h1> + <h2-h6> * */
00369         if (!strcmp (nameChar, "h1") ||  !strcmp (nameChar, "h2") || !strcmp (nameChar, "h3") ||  !strcmp (nameChar, "h4") 
00370          || !strcmp (nameChar, "h5") ||  !strcmp (nameChar, "h6"))
00371         {
00372             struct SmilNode *tmpSmilNode = addNewSmilNode(daisydata);
00373             ret = parseNccHx (reader, tmpSmilNode);
00374             if (ret != 1) return ret;
00375         }
00376         /* parse tag if <span> * */
00377         else if (!strcmp (nameChar, "span")) 
00378         {   
00379             struct SmilNode *tmpSmilNode = addNewSmilNode(daisydata);
00380             ret = parseNccSpan (reader, tmpSmilNode);
00381             if (ret != 1) return ret;
00382         }
00383         /* parse tag if <div> * */
00384         else if (!strcmp (nameChar, "div"))
00385         {
00386             struct SmilNode *tmpSmilNode = addNewSmilNode(daisydata);
00387             ret = parseNccDiv (reader, tmpSmilNode);
00388             if (ret != 1) return ret;
00389         }
00390         ret = xmlTextReaderRead (reader);
00391         if (ret != 1) return ret;
00392         name = xmlTextReaderConstName (reader);
00393         if (name == NULL) return 0;
00394         nameChar = tolowercase((const char *)name);
00395         if (nameChar == NULL) return 0;
00396     }   
00397     type = xmlTextReaderNodeType(reader);
00398     if (type != 15) 
00399     {   if (DEBUG_NCXPARSER) report ("Failed parsing NCC <BODY>, endtag expected", REP_ERROR);
00400         return -1;
00401     }   
00402     return ret;
00403 }
00404 
00411 static int parseNccHx(xmlTextReaderPtr reader, struct SmilNode *tmpSmilNode)
00412 {
00413 /*TODO: Hx must contain (attributes) id, h1 must contain class, h2-6 might contain class. Not taken care of now */
00414     const xmlChar *name = NULL;
00415     char *nameChar = NULL;
00416     xmlChar *attrvalue = NULL;
00417     int ret = 1;
00418     
00419     if (DEBUG_NCCPARSER) report ("Parsing NCC <Hx>", REP_DEBUG);
00420     /* getting value if the id attribute */
00421     attrvalue = xmlTextReaderGetAttribute (reader, (xmlChar *)"id");
00422     /* storing the value of the id attribute in the smilNode datastructure */
00423     if (attrvalue != NULL) tmpSmilNode->id = (char *)attrvalue;
00424     /* fetching next tag */
00425     ret = xmlTextReaderRead (reader);
00426     if (ret != 1) return ret;
00427     /* getting the name of the tag */
00428     name = xmlTextReaderConstName (reader);
00429     if (name == NULL) return 0;
00430     nameChar = tolowercase((const char *)name);
00431     if (nameChar == NULL) return 0;
00432     /* parse tag if <a> else return error */
00433     if (!strcmp (nameChar, "a"))
00434     {
00435         ret = parseNccA (reader, tmpSmilNode);
00436     }
00437     else
00438         return -1;
00439     return ret;
00440 }
00441 
00448 static int parseNccSpan(xmlTextReaderPtr reader, struct SmilNode *tmpSmilNode)
00449 {
00450 /* Span must contain id and class attributes */
00451     const xmlChar *name = NULL;
00452     char *nameChar = NULL;
00453     xmlChar *attrvalue = NULL;
00454     int ret = 1;
00455     
00456     if (DEBUG_NCCPARSER) report ("Parsing NCC <SPAN>", REP_DEBUG);
00457     /* getting value if the id attribute */
00458     attrvalue = xmlTextReaderGetAttribute (reader, (xmlChar *)"id");
00459     /* storing the value of the id attribute in the smilNode datastructure */
00460     if (attrvalue != NULL) tmpSmilNode->id = (char *)attrvalue;
00461     /* fetching next tag */
00462     ret = xmlTextReaderRead (reader);
00463     if (ret != 1) return ret;
00464     name = xmlTextReaderConstName (reader);
00465     if (name == NULL) return 0;
00466     nameChar = tolowercase((const char *)name);
00467     if (nameChar == NULL) return 0;
00468     /* parse tag if <a> else return error */
00469     if (!strcmp (nameChar, "a"))
00470     {   
00471         ret = parseNccA (reader, tmpSmilNode);
00472     }
00473     else
00474         return -1;
00475     return ret;
00476 }
00477 
00484 static int parseNccDiv(xmlTextReaderPtr reader, struct SmilNode *tmpSmilNode)
00485 {
00486 /* Span must contain id and class attributes */
00487     const xmlChar *name = NULL;
00488     char *nameChar = NULL;
00489     xmlChar *attrvalue = NULL;
00490     int ret = 1;
00491     
00492     if (DEBUG_NCCPARSER) report ("Parsing NCC <DIV>", REP_DEBUG);
00493     /* getting value if the id attribute */
00494     attrvalue = xmlTextReaderGetAttribute (reader, (xmlChar *)"id");
00495     /* storing the value of the id attribute in the smilNode datastructure */
00496     if (attrvalue != NULL) tmpSmilNode->id = (char *)attrvalue;
00497     /* fetching next tag */
00498     ret = xmlTextReaderRead (reader);
00499     if (ret != 1) return ret;
00500     name = xmlTextReaderConstName (reader);
00501     if (name == NULL) return 0;
00502     nameChar = tolowercase((const char *)name);
00503     if (nameChar == NULL) return 0;
00504     /* parse tag if <a> else return error */
00505     if (!strcmp (nameChar, "a"))
00506     {
00507         ret = parseNccA (reader, tmpSmilNode);
00508     }
00509     else
00510         return -1;
00511     return ret;
00512 }
00513 
00520 static int parseNccA (xmlTextReaderPtr reader, struct SmilNode *tmpSmilNode)
00521 {
00522     xmlChar *smilsrc, *tmp, *value;
00523     char *tempChar = NULL;
00524     int ret = 1;
00525     smilsrc = tmp = value = NULL;
00526     
00527     if (DEBUG_NCCPARSER) report ("Parsing NCC <A>", REP_DEBUG);
00528     
00529     /* gets the href attribute. */
00530     smilsrc = xmlTextReaderGetAttribute (reader, (xmlChar *)"href");    /*TODO: what if attribute does not exist? */
00531     
00532     /* splits the text on #. */
00533     tmp = (xmlChar *)strtok ((char *)smilsrc, "#");
00534     tempChar = (char *) malloc (strlen ((char *)tmp)+1);
00535     strcpy(tempChar, (char *)tmp);
00536     
00537     if (DEBUG_NCCPARSER)
00538     {
00539         char error[STRLEN];
00540         snprintf (error, sizeof(error), "Parsing NCC <A> File anchor: %s", tmp);
00541         report (error, REP_DEBUG);
00542     }
00543     
00544     /* put the anchor in the node */
00545     tmpSmilNode->anchor = tempChar; tempChar = NULL; 
00546     /* get the fragment identifier */
00547     tmp = (xmlChar *)strtok (NULL, "#");
00548     tempChar = (char *) malloc (strlen ((char *)tmp)+1);
00549     strcpy(tempChar, (char *)tmp);
00550     
00551     if (DEBUG_NCCPARSER)
00552     {
00553         char error[STRLEN];
00554         snprintf (error, sizeof(error), "Parsing NCC <A> Fragment Identifier: %s", tmp);
00555         report (error, REP_DEBUG);
00556     }
00557     
00558     /* put the fragment identifier in the node */
00559     tmpSmilNode->fragmentIdentifier = tempChar; tempChar = NULL;
00560     ret = xmlTextReaderRead (reader);
00561     if (ret != 1) return ret;
00562     
00563     if (smilsrc != NULL) xmlFree (smilsrc); smilsrc = NULL;
00564     
00565     /* get the text value of the node */
00566     if (xmlTextReaderHasValue (reader))
00567     {
00568         value = xmlTextReaderValue (reader);
00569         tempChar = removewhitespaces ((char *)value);
00570         
00571         if (DEBUG_NCCPARSER)
00572         {
00573             char error[STRLEN];
00574             snprintf (error, sizeof(error), "Parsing NCC <A> Heading: %s", value);
00575             report (error, REP_DEBUG);
00576         }
00577         /* put the text in the node */
00578         tmpSmilNode->header = tempChar; tempChar = NULL;
00579         if (value != NULL) xmlFree (value); value = NULL;
00580     }
00581     ret = xmlTextReaderRead (reader);
00582     if (ret != 1) return ret;
00583     ret = xmlTextReaderRead (reader);
00584     return ret;
00585 }
00586 
00587 
00588 #endif

Generated on Tue Sep 5 12:14:07 2006 for libdaisy by doxygen1.2.15