- From: Arvid Ephraim Picciani <aep@ibcsolutions.de>
- Date: Thu, 5 Jun 2008 00:29:30 +0200
- To: hiba hussain <hiba117_37@hotmail.com>
- Cc: html-tidy@w3.org
On Wednesday 04 June 2008 10:33:30 you wrote: > I have developped an apllication with Qt4 for construction a tree DOM of > HTML page > > I'am using webkit library to load the html page like this: > ( QString frameText = page.mainFrame()->toHtml();) if you follow the qt interest mailing list you might have seen my posting about Qt and tiny ;) I'm running patternist on html pages for automatic XQuery data extraction from an arbitary html page here's what i do with tidy. customize those flags to your needs. static QByteArray tidy(QByteArray input) { QByteArray ret; TidyBuffer output; tidyBufInit(&output); int rc = -1; Bool ok; TidyDoc tdoc = tidyCreate(); // Initialize "document" tidySetReportFilter (tdoc, tinyreportcallback); ok = tidyOptSetBool( tdoc, TidyXmlOut, yes ); // Convert to XHTML if ( ok ) ok = tidyOptSetBool( tdoc, TidyXmlDecl, no ); if ( ok ) ok = tidyOptSetBool( tdoc, TidyDropPropAttrs, yes ); if ( ok ) ok = tidyOptSetBool( tdoc, TidyMakeBare, yes ); if ( ok ) ok = tidyOptSetValue( tdoc, TidyBodyOnly, "yes" ); if ( ok ) ok = tidyOptSetBool( tdoc, TidyDropFontTags, yes ); if ( ok ) ok = tidyOptSetBool( tdoc, TidyFixComments, yes ); if ( ok ) ok = tidyOptSetBool( tdoc, TidyEscapeCdata, yes ); if ( ok ) ok = tidyOptSetBool( tdoc, TidyJoinStyles, yes ); if ( ok ) ok = tidyOptSetBool( tdoc, TidyEscapeCdata, yes); if ( ok ) ok = tidyOptSetBool( tdoc, TidyHideComments, yes); if ( ok ) ok = tidyOptSetBool( tdoc, TidyForceOutput, yes); tidySetCharEncoding ( tdoc, "utf8"); if ( ok ) rc = tidyParseString( tdoc, input.data()); // Parse the input if ( rc >= 0 ) rc = tidyCleanAndRepair( tdoc ); // Tidy it up! if ( rc >= 0 ) rc = tidySaveBuffer( tdoc, &output ); // Pretty Print if ( rc > 0 ) ret=QByteArray(reinterpret_cast<char*>(output.bp),output.size); if(!ok || rc<0) qFatal("tidy failed"); tidyBufFree( &output ); tidyRelease( tdoc ); return ret; } -- best regards/Mit freundlichen Grüßen Arvid Ephraim Picciani
Received on Wednesday, 4 June 2008 22:34:16 UTC