W3C home > Mailing lists > Public > www-validator-cvs@w3.org > February 2009

validator/httpd/cgi-bin check,1.646,1.647

From: Olivier Thereaux via cvs-syncmail <cvsmail@w3.org>
Date: Fri, 13 Feb 2009 21:04:17 +0000
To: www-validator-cvs@w3.org
Message-Id: <E1LY5Cr-0001Wx-Tt@lionel-hutz.w3.org>

Update of /sources/public/validator/httpd/cgi-bin
In directory hutz:/tmp/cvs-serv5853/httpd/cgi-bin

Modified Files:
	check 
Log Message:
using fuzzy matching to suggest replacement in case of unknown/ill-cased element or attribute - http://www.w3.org/Bugs/Public/show_bug.cgi?id=4412. Requires String::Approx perl library for Levenshtein edit distance matching/computing.

Index: check
===================================================================
RCS file: /sources/public/validator/httpd/cgi-bin/check,v
retrieving revision 1.646
retrieving revision 1.647
diff -u -d -r1.646 -r1.647
--- check	10 Feb 2009 13:53:01 -0000	1.646
+++ check	13 Feb 2009 21:04:15 -0000	1.647
@@ -15,7 +15,6 @@
 #     http://www.w3.org/Consortium/Legal/copyright-software
 #
 # $Id$
-
 #
 # Disable buffering on STDOUT!
 $| = 1;
@@ -63,6 +62,7 @@
 use URI                       qw();
 use URI::Escape               qw(uri_escape);
 use XML::LibXML               qw();
+use String::Approx            qw(amatch adist adistr);
 
 ###############################################################################
 #### Constant definitions. ####################################################
@@ -2992,6 +2992,38 @@
     # in that case the error message will be #344
   }
 
+  if ( ($err->{num} eq '108') or ($err->{num} eq '76') )
+  # element or attribute does not exist? Let's try and fuzzy-match it
+  {
+      my $bogus_elt_attr = $err->{msg};
+      $bogus_elt_attr =~ s/.*"(.+)".*/$1/;
+      
+      if (
+          ((exists $self->{CFG}->{Attributes}->{lc($bogus_elt_attr)}) and ($err->{num} eq '108'))
+          or
+           ((exists $self->{CFG}->{Elements}->{lc($bogus_elt_attr)}) and ($err->{num} eq '76'))
+          )
+      { 
+          $err->{msg} .= '. Maybe you meant "'.lc($bogus_elt_attr).'"?';
+      }
+      else {
+          my @matches;
+          @matches = String::Approx::amatch($bogus_elt_attr, ["3i"], keys %{$self->{CFG}->{Attributes}}); 
+          if (@matches){
+              my %distances;
+              @distances{@matches} = map { abs } String::Approx::adistr(lc($bogus_elt_attr), @matches);
+              my @matches_sorted = sort { $distances{$a} <=> $distances{$b} } @matches;
+               if (@matches > 1){
+                  $err->{msg} .= '. Maybe you meant "'.$matches_sorted[0].'" or "'.$matches_sorted[1].'"?';
+              
+              }
+              else {
+                  $err->{msg} .= '. Maybe you meant "'.$matches_sorted[0].'"?';
+              }
+          }
+      }
+  }
+
    if (($err->{num} eq '113') and ($err->{msg} =~ /xml:space/)) {
      # FIXME
      # this is a problem with some of the "flattened" W3C DTDs, filtering them out to not confuse users.
Received on Friday, 13 February 2009 21:04:26 GMT

This archive was generated by hypermail 2.2.0+W3C-0.50 : Thursday, 26 April 2012 12:55:08 GMT