Friday, March 30, 2012

Re: ajax-crawling approach to make gwt app crawlable works on default gwt example, but not on my app

Does your existing app's URLs conform to the spec at
https://developers.google.com/webmasters/ajax-crawling/docs/specification
? More specifically, do all your history tokens start with an
exclamation mark?

On Fri, Mar 30, 2012 at 9:31 AM, erebrus <erebrus@gmail.com> wrote:
> Hi all,
> I was reading https://developers.google.com/webmasters/ajax-crawling/
> on how to make ajax apps (consequently gwt apps) crawlable.
> I took the code from google (summarized in point 3 of "How to create
> an HTML snapshot?" to create a filter (that returns html from ajax
> using HtmlUnit) and changed the web.xml accordingly. I created a new
> GWT project with example code and applied the filter and the web.xml
> there. It worked directly.
> However, I did exactly the same on the gwt app I want to make
> searchable and it doesn't work. For some reason, the only requests the
> filter gets are the ones to the ones for the rpc.
> I think I must be missing a terribly simple detail, but I'm a bit lost
> on where to go from here.
>
>
> Following you can see the code for the filter (CrawlServlet) and the
> web.xml
>
> package crawltest.server;
>
> import com.gargoylesoftware.htmlunit.BrowserVersion;
> import com.gargoylesoftware.htmlunit.WebClient;
> import com.gargoylesoftware.htmlunit.html.HtmlPage;
>
> import java.io.IOException;
> import java.io.PrintWriter;
> import java.io.UnsupportedEncodingException;
> import java.net.URLDecoder;
> import java.util.logging.Logger;
>
> import javax.servlet.Filter;
> import javax.servlet.FilterChain;
> import javax.servlet.FilterConfig;
> import javax.servlet.ServletException;
> import javax.servlet.ServletRequest;
> import javax.servlet.ServletResponse;
> import javax.servlet.http.HttpServletRequest;
> import javax.servlet.http.HttpServletResponse;
>
> /**
>  * Servlet that makes this application crawlable
>  */
> public final class CrawlServlet implements Filter {
>
>        private static final Logger logger =
> Logger.getLogger(CrawlServlet.class
>                        .getName());
>  private static String rewriteQueryString(String queryString) throws
> UnsupportedEncodingException {
>    StringBuilder queryStringSb = new StringBuilder(queryString);
>    int i = queryStringSb.indexOf("&_escaped_fragment_");
>    if (i != -1) {
>      StringBuilder tmpSb = new
> StringBuilder(queryStringSb.substring(0, i));
>      tmpSb.append("#!");
>      tmpSb.append(URLDecoder.decode(queryStringSb.substring(i + 20,
> queryStringSb.length()),"UTF-8"));
>      queryStringSb = tmpSb;
>    }
>
>    i = queryStringSb.indexOf("_escaped_fragment_");
>    if (i != -1) {
>      StringBuilder tmpSb = new
> StringBuilder(queryStringSb.substring(0, i));
>      tmpSb.append("#!");
>      tmpSb.append(URLDecoder.decode(queryStringSb.substring(i + 19,
> queryStringSb.length()), "UTF-8"));
>      queryStringSb = tmpSb;
>    }
>    if (queryStringSb.indexOf("#!") != 0) {
>      queryStringSb.insert(0, '?');
>    }
>    queryString = queryStringSb.toString();
>
>
>
>    return queryString;
>  }
>
>  private FilterConfig filterConfig = null;
>
>  /**
>   * Destroys the filter configuration
>   */
>  public void destroy() {
>    this.filterConfig = null;
>  }
>
>  /**
>   * Filters all requests and invokes headless browser if necessary
>   */
>  public void doFilter(ServletRequest request, ServletResponse
> response,
>      FilterChain chain) throws IOException {
>          System.out.println("crawl");
>    if (filterConfig == null) {
>      return;
>    }
>    System.out.println("crawl");
>    HttpServletRequest req = (HttpServletRequest) request;
>    HttpServletResponse res = (HttpServletResponse) response;
>    String queryString = req.getQueryString();
>    System.out.println("query:"+queryString);
>    System.out.println("param:"+req.getParameterMap().toString());
>    System.out.println("req:"+req);
>    if ((queryString != null) &&
> (queryString.contains("_escaped_fragment_"))) {
>        System.out.println("in!!");
>      StringBuilder pageNameSb = new StringBuilder("http://");
>      pageNameSb.append(req.getServerName());
>      if (req.getServerPort() != 0) {
>        pageNameSb.append(":");
>        pageNameSb.append(req.getServerPort());
>      }
>      pageNameSb.append(req.getRequestURI());
>      queryString = rewriteQueryString(queryString);
>      pageNameSb.append(queryString);
>
>      final WebClient webClient = new
> WebClient(BrowserVersion.FIREFOX_3);
>      webClient.setJavaScriptEnabled(true);
>      String pageName = pageNameSb.toString();
>      HtmlPage page = webClient.getPage(pageName);
>      webClient.waitForBackgroundJavaScriptStartingBefore(2000);
>
>      res.setContentType("text/html;charset=UTF-8");
>      PrintWriter out = res.getWriter();
>      out.println("<hr>");
>      out.println("<center><h3>You are viewing a non-interactive page
> that is intended for the crawler.  You probably want to see this page:
> <a href=\""
>          + pageName + "\">" + pageName + "</a></h3></center>");
>      out.println("<hr>");
>
>      out.println(page.asXml());
>      webClient.closeAllWindows();
>      out.close();
>
>    } else {
>      try {
>        chain.doFilter(request, response);
>      } catch (ServletException e) {
>        e.printStackTrace();
>      }
>    }
>  }
>
>  /**
>   * Initializes the filter configuration
>   */
>  public void init(FilterConfig filterConfig) {
>    this.filterConfig = filterConfig;
>  }
>
> }
>
>
> web-xml:
>
> <?xml version="1.0" encoding="UTF-8"?>
> <web-app xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
>        xsi:schemaLocation="http://java.sun.com/xml/ns/javaee
>              http://java.sun.com/xml/ns/javaee/web-app_2_5.xsd"
>        version="2.5" xmlns="http://java.sun.com/xml/ns/javaee">
>
>        <filter>
>                <filter-name>CrawlServlet</filter-name>
>                <filter-class>crawltest.server.CrawlServlet</filter-class>
>        </filter>
>
>        <filter-mapping>
>                <filter-name>CrawlServlet</filter-name>
>                <url-pattern>/*</url-pattern>
>        </filter-mapping>
>
>        <!-- Servlets -->
>
>        <!-- Default page to serve -->
>        <welcome-file-list>
>                <welcome-file>CrawlTest.html</welcome-file>
>        </welcome-file-list>
>
> </web-app>
>
>
>
> --
> You received this message because you are subscribed to the Google Groups "Google Web Toolkit" group.
> To post to this group, send email to google-web-toolkit@googlegroups.com.
> To unsubscribe from this group, send email to google-web-toolkit+unsubscribe@googlegroups.com.
> For more options, visit this group at http://groups.google.com/group/google-web-toolkit?hl=en.
>

--
You received this message because you are subscribed to the Google Groups "Google Web Toolkit" group.
To post to this group, send email to google-web-toolkit@googlegroups.com.
To unsubscribe from this group, send email to google-web-toolkit+unsubscribe@googlegroups.com.
For more options, visit this group at http://groups.google.com/group/google-web-toolkit?hl=en.

No comments:

Post a Comment