Source: site.view [edit]
Function name: buildSearchEngine
Arguments:
Description: Compile the exported dataset into a search index.
Page type: webl
Render function:  
Module: perfectCartoon

Page source:

   var fi = Wub_GetFunctionInfo("cartoonDBSettings");
   var settings = WubEval(fi.exec);

   var writer = Wub_NewLuceneIndex(settings.staging, true, settings.stemmer);

   // Try it three times to increase robustness?
   var P = GetURL(settings.dbUrl, nil, nil, [. mimetype="text/plain", autoredirect=true .]) ? 
           GetURL(settings.dbUrl, nil, nil, [. mimetype="text/plain", autoredirect=true .]) ? 
           GetURL(settings.dbUrl, nil, nil, [. mimetype="text/plain", autoredirect=false .]);

   var synonyms = [. .];
   var SynP = Markup(GetURL("https://docs.google.com/spreadsheets/d/e/2PACX-1vSJQvhH7JF3V6CgIopHLNrgON7h7OIFhx30dq0cZAg2ukOUgWlsNOolD9803OLWNrohcsWdwAtb0cFG/pub?gid=0&single=true&output=csv", nil, nil, [. mimetype="text/plain", autoredirect=true .])) ? "";
   every line in Str_Split(SynP,"\n") do
      var cols = [];
      every col in Str_Split(line, ",") do
         col = Str_Trim(Wub_ReplaceAll(col, `"`, ""));
         if col != "" then
            cols = cols + [col]
         end
      end;
      if Size(cols) > 1 then
         var syn = "";
         every s in Rest(cols) do
            syn = syn + " " + s
         end;
         synonyms[Str_ToLowerCase(First(cols))] := Str_Trim(syn)
      end
   end;


   var numDocs = 0;
   var outS = "?";
   var ok = (writer != nil);
   var addDebug = true;

   var tagSet = [. .];
   var tagSetFr = [. .];
   var authorSet = [. .];
   var scoreSet = [. .];

   var URL=0;
   var IMG=1;
   var CAPTION=2;
   var AUTHOR=3;
   var TAGS=4;
   var SCORE=5;
   var FRTAGS=6;
   var FRTEXT=7;

   var cln3 = fun(s)
      s = ExpandCharEntities(Str_Trim(s));
      var GOODCHARS = "abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ-'1234567890";
      var i = 0;
      var r = "";
      while i < Size(s) do
         var c = Select(s, i, i+1);
         if (Str_IndexOf(c, GOODCHARS) >= 0) then
            r = r + c
         end;
         i = i + 1
      end;
      return r
   end;

   var cln2 = fun(s)
      s = ExpandCharEntities(Str_Trim(s));
      s = Wub_ReplaceAll(s, "/>", "");
      s = Wub_ReplaceAll(s, "\t", " ");
      s = Wub_ReplaceAll(s, "\\", "");
      s = Wub_ReplaceAll(s, `"`, "'");
      s = Wub_ReplaceAll(s, "???", "");
      s = Wub_ReplaceAll(s, "??", "");
      // s = Wub_ReplaceAll(s, ",", " ");
      return s;
   end;
  
   var cln = fun(s)
      return cln2(s);
   end;
   
   var trs = Str_Split(Markup(P), "\n");
   every tr in trs do
     var dir = tr.dir ? nil;
     var tds = Str_Split(tr, "\t");
   
     if Size(tds) > 5 then // (dir == "ltr") then
   
        var fImg = cln(tds[IMG]) ? "";
        var fPageUrl = cln(tds[URL]) ? "";
        var fCaption = cln(tds[CAPTION]) ? "";
        var fAuthor = cln(tds[AUTHOR]) ? "";
        var fTags = cln(tds[TAGS]) ? "";
        var fScore = cln(tds[SCORE]) ? "";
        var fCaptionFr = cln(tds[FRTEXT]) ? "";
        var fTagsFr = cln(tds[FRTAGS]) ? "";
   
        // if (Size(tds) == TAGS+1) or (Size(tds) == SCORE+1) or (Size(tds) == SCORE+2) then
        if (fImg != "") and (fPageUrl != "") and (fCaption != "") and (fPageUrl != "SiteUrl") then
  
           var s = fPageUrl;
           var img = fImg;
           if (s != "") and (img != "") and (s != "SiteUrl") then
                 
              var doc = Wub_NewLuceneDocument();

              var isNewYorker = "no";
              if (Str_IndexOf("imgc.allpostersimages.com", fImg) > 0) then
                 isNewYorker = "yes"
              end;
  
              var syns = "";
              every cat in Str_Split(fTags, ",") do
                 cat = Str_ToLowerCase(cln3(cat));
                 if cat member synonyms then
                    syns = syns + synonyms[cat]
                 end
              end;
  
     		   var allFields = fTags + " " + fTags + " " + fTags + " " + 
         						fCaption + " " + fAuthor + " " + cln3(syns);
              allFields = Str_Trim(allFields);
     		   var allFieldsFr = fTagsFr + " " + fTagsFr + " " + fTagsFr + " " + 
         						fCaptionFr + " " + fAuthor + " " + cln3(syns);
              allFieldsFr = Str_Trim(allFieldsFr);

              // Add fields to doc
              ok = (doc != nil) and Wub_AddLuceneField(doc, "pageUrl", fPageUrl, true, false, 0);
              ok = Wub_AddLuceneField(doc, "imgUrl", fImg, true, false, 0);
              if fCaption != "" then
                 ok = Wub_AddLuceneField(doc, "text", fCaption, true, true, 0);
              end;
              if fCaptionFr != "" then
                 ok = Wub_AddLuceneField(doc, "textfr", fCaptionFr, true, true, 0);
              end;
              if fAuthor != "" then
                 ok = Wub_AddLuceneField(doc, "author", fAuthor, true, true, 0);
              end;
              if fTags != "" then
                 ok = Wub_AddLuceneField(doc, "tags", fTags, true, true, 0);
              end;
              if fTagsFr != "" then
                 ok = Wub_AddLuceneField(doc, "tagsfr", fTagsFr, true, true, 0);
              end;
              if fScore != "" then
                 ok = Wub_AddLuceneField(doc, "handScore", fScore, true, true, 0);
              end;
              ok = Wub_AddLuceneField(doc, "search", allFields, false, true, 0);
              ok = Wub_AddLuceneField(doc, "searchFr", allFieldsFr, false, true, 0);
              ok = Wub_AddLuceneField(doc, "newYorker", isNewYorker, true, true, 0);
     
     		  var shouldAdd = true;

      	      // if (Size(tds) == SCORE+1) or (Size(tds) == SCORE+2) then
      	      if (fScore != "") then
                 var val = fScore;
                 if (val != "") then
                    var f = (ToReal(val) ? 2.0);
     
                    // Score = -1 has special meaning: don't add
                    if (f == -1.0) then
                       shouldAdd = false
                    end;
        
                    if (f > 1.0) then
     				   if (Str_IndexOf("gcoat", Str_ToLowerCase(fTags)) >= 0) then
    				      f = f + 90.0
     				   elsif (Str_IndexOf("fcoat", Str_ToLowerCase(fTags)) >= 0) then
    				      f = f + 70.0
     				   elsif (Str_IndexOf("ecoat", Str_ToLowerCase(fTags)) >= 0) then
    				      f = f + 50.0
        			   end;
   
   		   			/*
                       var prv = scoreSet[val] ? nil;
                       if (prv == nil) then
                          scoreSet[val] := [cln(tds[TEXT])]
                       else
                          scoreSet[val] := scoreSet[val] + [cln(tds[TEXT])]
                       end;  */
  
                       // Will use post ranking by handScore instead
                       doc.setBoost(f)
                    end;
                 end
              end;
   
              ok = (doc != nil) and shouldAdd and addDebug and Wub_AddLuceneDocument(writer, doc);
           
              if shouldAdd and addDebug then
                 every tag in Str_Split(fTags, ",") do
                    var tagName = Str_Trim(cln3(tag));
                    var num = tagSet[ tagName ] ? 0;
                    tagSet[ tagName ] := num + 1
                 end;                             

                 every tag in Str_Split(fTagsFr, ",") do
                    var tagName = Str_Trim(cln3(tag));
                    var num = tagSetFr[ tagName ] ? 0;
                    tagSetFr[ tagName ] := num + 1
                 end;                             

                 var author = fAuthor;
                 if (author != "") then
                    var num = authorSet[ author ] ? 0;
                    authorSet[ author ] := num + 1
                 end;                             

   				 numDocs = numDocs + 1;
                 // if numDocs > 40 then
                 //    addDebug = false
                 // end
              end
           end
        end
     end
   end;          
           
   ok = ok and Wub_OptimizeLucene(writer);
   if (writer != nil) then
      Wub_CloseLuceneIndex(writer);
   end;

   settings.tagSet := tagSet;
   settings.tagSetFr := tagSetFr;
   settings.authorSet := authorSet;           
   fi.exec := ToString(settings);
   Wub_SaveFunctionInfo(fi);
           
   WubCall("adminConsole", ["Index created on STAGING with " + ToString(numDocs) + " entries!"]);