- From: Arvid Ephraim Picciani <aep@ibcsolutions.de>
- Date: Thu, 5 Jun 2008 00:29:30 +0200
- To: hiba hussain <hiba117_37@hotmail.com>
- Cc: html-tidy@w3.org
On Wednesday 04 June 2008 10:33:30 you wrote:
> I have developped an apllication with Qt4 for construction a tree DOM of
> HTML page
>
> I'am using webkit library to load the html page like this:
> ( QString frameText = page.mainFrame()->toHtml();)
if you follow the qt interest mailing list you might have seen my posting
about Qt and tiny ;)
I'm running patternist on html pages for automatic XQuery data extraction from
an arbitary html page
here's what i do with tidy. customize those flags to your needs.
static QByteArray tidy(QByteArray input)
{
QByteArray ret;
TidyBuffer output;
tidyBufInit(&output);
int rc = -1;
Bool ok;
TidyDoc tdoc = tidyCreate(); //
Initialize "document"
tidySetReportFilter (tdoc, tinyreportcallback);
ok = tidyOptSetBool( tdoc, TidyXmlOut, yes ); // Convert to XHTML
if ( ok )
ok = tidyOptSetBool( tdoc, TidyXmlDecl, no );
if ( ok )
ok = tidyOptSetBool( tdoc, TidyDropPropAttrs, yes );
if ( ok )
ok = tidyOptSetBool( tdoc, TidyMakeBare, yes );
if ( ok )
ok = tidyOptSetValue( tdoc, TidyBodyOnly, "yes" );
if ( ok )
ok = tidyOptSetBool( tdoc, TidyDropFontTags, yes );
if ( ok )
ok = tidyOptSetBool( tdoc, TidyFixComments, yes );
if ( ok )
ok = tidyOptSetBool( tdoc, TidyEscapeCdata, yes );
if ( ok )
ok = tidyOptSetBool( tdoc, TidyJoinStyles, yes );
if ( ok )
ok = tidyOptSetBool( tdoc, TidyEscapeCdata, yes);
if ( ok )
ok = tidyOptSetBool( tdoc, TidyHideComments, yes);
if ( ok )
ok = tidyOptSetBool( tdoc, TidyForceOutput, yes);
tidySetCharEncoding ( tdoc, "utf8");
if ( ok )
rc = tidyParseString( tdoc, input.data()); // Parse the input
if ( rc >= 0 )
rc = tidyCleanAndRepair( tdoc ); // Tidy it up!
if ( rc >= 0 )
rc = tidySaveBuffer( tdoc, &output ); // Pretty Print
if ( rc > 0 )
ret=QByteArray(reinterpret_cast<char*>(output.bp),output.size);
if(!ok || rc<0)
qFatal("tidy failed");
tidyBufFree( &output );
tidyRelease( tdoc );
return ret;
}
--
best regards/Mit freundlichen Grüßen
Arvid Ephraim Picciani
Received on Wednesday, 4 June 2008 22:34:16 UTC