Source: site.view [edit]
Function name: scrapeCartoons
Arguments:
Description: Construct a sample dataset of cartoons by scraping CondenastStore. Try to guess categories by using keyword spotting.
Page type: webl
Render function:  
Module: perfectCartoon

Page source:

var GetAuthor = fun(P, img, nextImg)
   var A;
   if (nextImg != nil) then
      A = Elem(P, "a") after img before nextImg
   else
      A = Elem(P, "a") after img
   end;
   var s = "";
   every a in A do
     var c = a.class ? nil;
     if (Str_IndexOf("artistName", c) > 0) then
        return Str_Trim(Text(a))
     end;
     s = s + Markup(a)
   end;
   return "NoAuthor"
end;

var res = [];
var pageNum = 1;
var tsv = "";
while pageNum < 30 do
   var P = GetURL("http://www.condenaststore.com/gallery.asp?cat=146230&c=c&title=Cartoons-by-Artist-Prints&cid=02EC0207D6FA432F9D56CDB3000B4C5E&isAjax=true&_=1371317483994&startat=/GetThumb.asp&Search=146230&page=" + ToString(pageNum));


   var Imgs = Elem(P, "img");
   var i = 0;                    
   while (i < Size(Imgs)-1) do
     var img = Imgs[i];
     var nextImg = nil;
     if (i + 6 < Size(Imgs)) then
        nextImg = Imgs[i+6]
     end;
     var c = img.class ? nil;
     if (c == "thmbd") then
        var a = Parent(img);
        var pgUrl = a.href;
        var label = ExpandCharEntities(img.alt);                
        res = res + [ [. imgUrl=img.src, pageUrl=pgUrl, title=label, text=label .] ];
        var tags = WubCall("generateCategories", label);
        var author = GetAuthor(P, img, nextImg);
        tsv = tsv + pgUrl + "\t" + img.src + "\t" + label + "\t" + label + "\t" + author + "\t" + tags + "\n";
     end;
     i = i + 1
   end;

  pageNum = pageNum + 1
end;
 
var fi = Wub_GetFunctionInfo("perfectCartoon.scrapedCartoons");
fi.exec = tsv;
Wub_SaveFunctionInfo(fi);
     
Size(res);