Source: site.view [edit]
Function name: testBuildSearchEngine
Arguments:
Description: Compile the exported dataset into a search index.
Page type: webl
Render function:  
Module: perfectCartoon

Page source:

   var fi = Wub_GetFunctionInfo("cartoonDBSettings");
   var settings = WubEval(fi.exec);

//   var writer = Wub_NewLuceneIndex(settings.staging, true, settings.stemmer);

   // Try it three times to increase robustness?
   var P = GetURL("https://docs.google.com/spreadsheets/d/e/2PACX-1vS6fVF6rf-5wt0NpFcqN5YZZ3NYlyZGPOrmHbIYtP5Be3UoYpHwDMmdTtOI5T27yrIPbtX30ZLFTXNk/pub?gid=0&single=true&output=tsv", nil, nil, [. mimetype="text/plain" .]) ? 
           GetURL("https://docs.google.com/spreadsheets/d/e/2PACX-1vS6fVF6rf-5wt0NpFcqN5YZZ3NYlyZGPOrmHbIYtP5Be3UoYpHwDMmdTtOI5T27yrIPbtX30ZLFTXNk/pub?gid=0&single=true&output=tsv", nil, nil, [. mimetype="text/plain" .]) ? 
           GetURL("https://docs.google.com/spreadsheets/d/e/2PACX-1vS6fVF6rf-5wt0NpFcqN5YZZ3NYlyZGPOrmHbIYtP5Be3UoYpHwDMmdTtOI5T27yrIPbtX30ZLFTXNk/pub?gid=0&single=true&output=tsv", nil, nil, [. mimetype="text/plain" .]);
   
   var synonyms = [. .];
   var SynP = GetURL("https://docs.google.com/spreadsheet/pub?key=0AsUaQihpzloZdERRWXg5QVZwYUVkbV91UTdmcTRuQ2c&output=csv", nil, nil, [. mimetype="text/plain" .]);
   every line in Str_Split(Markup(SynP),"\n") do
      var cols = [];
      every col in Str_Split(line, ",") do
         col = Str_Trim(Wub_ReplaceAll(col, `"`, ""));
         if col != "" then
            cols = cols + [col]
         end
      end;
      if Size(cols) > 1 then
         var syn = "";
         every s in Rest(cols) do
            syn = syn + " " + s
         end;
         synonyms[Str_ToLowerCase(First(cols))] := Str_Trim(syn)
      end
   end;


   var numDocs = 0;
   var ok = true; // (writer != nil);

   var tagSet = [. .];
   var tagSetFr = [. .];
   var authorSet = [. .];
   var scoreSet = [. .];

   var URL=0;
   var IMG=1;
   var CAPTION=2;
   var AUTHOR=3;
   var TAGS=4;
   var SCORE=5;
   var FRTAGS=6;
   var FRTEXT=7;

   var cln3 = fun(s)
      s = ExpandCharEntities(Str_Trim(s));
      var GOODCHARS = "abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ-'1234567890";
      var i = 0;
      var r = "";
      while i < Size(s) do
         var c = Select(s, i, i+1);
         if (Str_IndexOf(c, GOODCHARS) >= 0) then
            r = r + c
         end;
         i = i + 1
      end;
      return r
   end;

   var cln2 = fun(s)
      s = ExpandCharEntities(Str_Trim(s));
      s = Wub_ReplaceAll(s, "/>", "");
      s = Wub_ReplaceAll(s, "\t", " ");
      s = Wub_ReplaceAll(s, "\\", "");
      s = Wub_ReplaceAll(s, `"`, "'");
      s = Wub_ReplaceAll(s, "???", "");
      s = Wub_ReplaceAll(s, "??", "");
      // s = Wub_ReplaceAll(s, ",", " ");
      return s;
   end;
  
   var cln = fun(s)
      return cln2(s);
   end;

   var nn = "";
   var goodCount = 0;
   every tr in Str_Split(Markup(P), "\n") do
     var dir = tr.dir ? nil;
     var tds = Str_Split(tr, "\t");
     if Size(tds) == 8 then // (dir == "ltr") then
   
        var fImg = cln(tds[IMG]) ? "";
        var fPageUrl = cln(tds[URL]) ? "";
        var fCaption = cln(tds[CAPTION]) ? "";
        var fAuthor = cln(tds[AUTHOR]) ? "";
        var fTags = cln(tds[TAGS]) ? "";
        var fScore = cln(tds[SCORE]) ? "";
        var fCaptionFr = cln(tds[FRTEXT]) ? "";
        var fTagsFr = cln(tds[FRTAGS]) ? "";
   
        // if (Size(tds) == TAGS+1) or (Size(tds) == SCORE+1) or (Size(tds) == SCORE+2) then
        if (fImg != "") and (fPageUrl != "") and (fCaption != "") and (fPageUrl != "URL") then
  
           var s = fPageUrl;
           var img = fImg;
           if (s != "") and (img != "") and (s != "pageUrl") then
                 
  //            var doc = Wub_NewLuceneDocument();

              var isNewYorker = "no";
              if (Str_IndexOf("imgc.allpostersimages.com", fImg) > 0) then
                 isNewYorker = "yes"
              end;
  
              var syns = "";
              every cat in Str_Split(fTags, ",") do
                 cat = Str_ToLowerCase(cln3(cat));
                 if cat member synonyms then
                    syns = syns + synonyms[cat]
                 end
              end;
  
     		   var allFields = fTags + " " + fTags + " " + fTags + " " + 
         						fCaption + " " + fAuthor + " " + cln3(syns);
              allFields = Str_Trim(allFields);
     		   var allFieldsFr = fTagsFr + " " + fTagsFr + " " + fTagsFr + " " + 
         						fCaptionFr + " " + fAuthor + " " + cln3(syns);
              allFieldsFr = Str_Trim(allFieldsFr);
   
              // Add fields to doc
  //            ok = (doc != nil) and Wub_AddLuceneField(doc, "pageUrl", fPageUrl, true, false, 0);
  //            ok = Wub_AddLuceneField(doc, "imgUrl", fImg, true, false, 0);
  //            if fCaption != "" then
  //               ok = Wub_AddLuceneField(doc, "text", fCaption, true, true, 0);
  //            end;
  //            if fCaptionFr != "" then
  //               ok = Wub_AddLuceneField(doc, "textfr", fCaptionFr, true, true, 0);
  //            end;
  //            if fAuthor != "" then
  //               ok = Wub_AddLuceneField(doc, "author", fAuthor, true, true, 0);
  //            end;
  //            if fTags != "" then
  //               ok = Wub_AddLuceneField(doc, "tags", fTags, true, true, 0);
  //            end;
  //            if fTagsFr != "" then
  //               ok = Wub_AddLuceneField(doc, "tagsfr", fTagsFr, true, true, 0);
  //            end;
  //            if fScore != "" then
  //               ok = Wub_AddLuceneField(doc, "handScore", fScore, true, true, 0);
  //            end;
  //            ok = Wub_AddLuceneField(doc, "search", allFields, false, true, 0);
  //            ok = Wub_AddLuceneField(doc, "searchFr", allFieldsFr, false, true, 0);
  //            ok = Wub_AddLuceneField(doc, "newYorker", isNewYorker, true, true, 0);
  //   
     		  var shouldAdd = true;

      	      // if (Size(tds) == SCORE+1) or (Size(tds) == SCORE+2) then
      	      if (fScore != "") then
                 var val = fScore;
                 if (val != "") then
                    var f = (ToReal(val) ? 2.0);
     
                    // Score = -1 has special meaning: don't add
                    if (f == -1.0) then
                       shouldAdd = false
                    end;
        
                    if (f > 1.0) then
     				   if (Str_IndexOf("gcoat", Str_ToLowerCase(fTags)) >= 0) then
    				      f = f + 90.0
     				   elsif (Str_IndexOf("fcoat", Str_ToLowerCase(fTags)) >= 0) then
    				      f = f + 70.0
     				   elsif (Str_IndexOf("ecoat", Str_ToLowerCase(fTags)) >= 0) then
    				      f = f + 50.0
        			   end;
   
   		   			/*
                       var prv = scoreSet[val] ? nil;
                       if (prv == nil) then
                          scoreSet[val] := [cln(tds[TEXT])]
                       else
                          scoreSet[val] := scoreSet[val] + [cln(tds[TEXT])]
                       end;  */
  
                       // Will use post ranking by handScore instead
   //                    doc.setBoost(f)
                    end;
                 end
              end;
   
   //           ok = (doc != nil) and shouldAdd and Wub_AddLuceneDocument(writer, doc);
           
              if shouldAdd then
                 goodCount = goodCount + 1;
                 every tag in Str_Split(fTags, ",") do
                    var tagName = Str_Trim(cln3(tag));
                    var num = tagSet[ tagName ] ? 0;
                    tagSet[ tagName ] := num + 1
                 end;                             

                 every tag in Str_Split(fTagsFr, ",") do
                    var tagName = Str_Trim(cln3(tag));
                    var num = tagSetFr[ tagName ] ? 0;
                    tagSetFr[ tagName ] := num + 1
                 end;                             

                 var author = fAuthor;
                 if (author != "") then
                    var num = authorSet[ author ] ? 0;
                    authorSet[ author ] := num + 1
                 end;                             
           
                 numDocs = numDocs + 1;                                                    
              end
           end
        else
          nn = nn + fCaption + fImg + "\n"
        end
     end
   end;          
           
//   ok = ok and Wub_OptimizeLucene(writer);
//   if (writer != nil) then
//      Wub_CloseLuceneIndex(writer);
//   end;

   settings.tagSet := tagSet;
   settings.tagSetFr := tagSetFr;
   settings.authorSet := authorSet;           
//   fi.exec := ToString(settings);
//   Wub_SaveFunctionInfo(fi);
           
//   WubCall("adminConsole", ["Index created on STAGING!" + ToString(goodCount)]);
   
nn;