Collaborama About Help Contact Anonymous [login] Source: site.view [edit] Function name: testBuildSearchEngine Arguments: Description: Compile the exported dataset into a search index. Page type: webl Render function: Module: perfectCartoon Page source: var fi = Wub_GetFunctionInfo("cartoonDBSettings"); var settings = WubEval(fi.exec); // var writer = Wub_NewLuceneIndex(settings.staging, true, settings.stemmer); // Try it three times to increase robustness? var P = GetURL("https://docs.google.com/spreadsheets/d/e/2PACX-1vS6fVF6rf-5wt0NpFcqN5YZZ3NYlyZGPOrmHbIYtP5Be3UoYpHwDMmdTtOI5T27yrIPbtX30ZLFTXNk/pub?gid=0&single=true&output=tsv", nil, nil, [. mimetype="text/plain" .]) ? GetURL("https://docs.google.com/spreadsheets/d/e/2PACX-1vS6fVF6rf-5wt0NpFcqN5YZZ3NYlyZGPOrmHbIYtP5Be3UoYpHwDMmdTtOI5T27yrIPbtX30ZLFTXNk/pub?gid=0&single=true&output=tsv", nil, nil, [. mimetype="text/plain" .]) ? GetURL("https://docs.google.com/spreadsheets/d/e/2PACX-1vS6fVF6rf-5wt0NpFcqN5YZZ3NYlyZGPOrmHbIYtP5Be3UoYpHwDMmdTtOI5T27yrIPbtX30ZLFTXNk/pub?gid=0&single=true&output=tsv", nil, nil, [. mimetype="text/plain" .]); var synonyms = [. .]; var SynP = GetURL("https://docs.google.com/spreadsheet/pub?key=0AsUaQihpzloZdERRWXg5QVZwYUVkbV91UTdmcTRuQ2c&output=csv", nil, nil, [. mimetype="text/plain" .]); every line in Str_Split(Markup(SynP),"\n") do var cols = []; every col in Str_Split(line, ",") do col = Str_Trim(Wub_ReplaceAll(col, `"`, "")); if col != "" then cols = cols + [col] end end; if Size(cols) > 1 then var syn = ""; every s in Rest(cols) do syn = syn + " " + s end; synonyms[Str_ToLowerCase(First(cols))] := Str_Trim(syn) end end; var numDocs = 0; var ok = true; // (writer != nil); var tagSet = [. .]; var tagSetFr = [. .]; var authorSet = [. .]; var scoreSet = [. .]; var URL=0; var IMG=1; var CAPTION=2; var AUTHOR=3; var TAGS=4; var SCORE=5; var FRTAGS=6; var FRTEXT=7; var cln3 = fun(s) s = ExpandCharEntities(Str_Trim(s)); var GOODCHARS = "abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ-'1234567890"; var i = 0; var r = ""; while i < Size(s) do var c = Select(s, i, i+1); if (Str_IndexOf(c, GOODCHARS) >= 0) then r = r + c end; i = i + 1 end; return r end; var cln2 = fun(s) s = ExpandCharEntities(Str_Trim(s)); s = Wub_ReplaceAll(s, "/>", ""); s = Wub_ReplaceAll(s, "\t", " "); s = Wub_ReplaceAll(s, "\\", ""); s = Wub_ReplaceAll(s, `"`, "'"); s = Wub_ReplaceAll(s, "???", ""); s = Wub_ReplaceAll(s, "??", ""); // s = Wub_ReplaceAll(s, ",", " "); return s; end; var cln = fun(s) return cln2(s); end; var nn = ""; var goodCount = 0; every tr in Str_Split(Markup(P), "\n") do var dir = tr.dir ? nil; var tds = Str_Split(tr, "\t"); if Size(tds) == 8 then // (dir == "ltr") then var fImg = cln(tds[IMG]) ? ""; var fPageUrl = cln(tds[URL]) ? ""; var fCaption = cln(tds[CAPTION]) ? ""; var fAuthor = cln(tds[AUTHOR]) ? ""; var fTags = cln(tds[TAGS]) ? ""; var fScore = cln(tds[SCORE]) ? ""; var fCaptionFr = cln(tds[FRTEXT]) ? ""; var fTagsFr = cln(tds[FRTAGS]) ? ""; // if (Size(tds) == TAGS+1) or (Size(tds) == SCORE+1) or (Size(tds) == SCORE+2) then if (fImg != "") and (fPageUrl != "") and (fCaption != "") and (fPageUrl != "URL") then var s = fPageUrl; var img = fImg; if (s != "") and (img != "") and (s != "pageUrl") then // var doc = Wub_NewLuceneDocument(); var isNewYorker = "no"; if (Str_IndexOf("imgc.allpostersimages.com", fImg) > 0) then isNewYorker = "yes" end; var syns = ""; every cat in Str_Split(fTags, ",") do cat = Str_ToLowerCase(cln3(cat)); if cat member synonyms then syns = syns + synonyms[cat] end end; var allFields = fTags + " " + fTags + " " + fTags + " " + fCaption + " " + fAuthor + " " + cln3(syns); allFields = Str_Trim(allFields); var allFieldsFr = fTagsFr + " " + fTagsFr + " " + fTagsFr + " " + fCaptionFr + " " + fAuthor + " " + cln3(syns); allFieldsFr = Str_Trim(allFieldsFr); // Add fields to doc // ok = (doc != nil) and Wub_AddLuceneField(doc, "pageUrl", fPageUrl, true, false, 0); // ok = Wub_AddLuceneField(doc, "imgUrl", fImg, true, false, 0); // if fCaption != "" then // ok = Wub_AddLuceneField(doc, "text", fCaption, true, true, 0); // end; // if fCaptionFr != "" then // ok = Wub_AddLuceneField(doc, "textfr", fCaptionFr, true, true, 0); // end; // if fAuthor != "" then // ok = Wub_AddLuceneField(doc, "author", fAuthor, true, true, 0); // end; // if fTags != "" then // ok = Wub_AddLuceneField(doc, "tags", fTags, true, true, 0); // end; // if fTagsFr != "" then // ok = Wub_AddLuceneField(doc, "tagsfr", fTagsFr, true, true, 0); // end; // if fScore != "" then // ok = Wub_AddLuceneField(doc, "handScore", fScore, true, true, 0); // end; // ok = Wub_AddLuceneField(doc, "search", allFields, false, true, 0); // ok = Wub_AddLuceneField(doc, "searchFr", allFieldsFr, false, true, 0); // ok = Wub_AddLuceneField(doc, "newYorker", isNewYorker, true, true, 0); // var shouldAdd = true; // if (Size(tds) == SCORE+1) or (Size(tds) == SCORE+2) then if (fScore != "") then var val = fScore; if (val != "") then var f = (ToReal(val) ? 2.0); // Score = -1 has special meaning: don't add if (f == -1.0) then shouldAdd = false end; if (f > 1.0) then if (Str_IndexOf("gcoat", Str_ToLowerCase(fTags)) >= 0) then f = f + 90.0 elsif (Str_IndexOf("fcoat", Str_ToLowerCase(fTags)) >= 0) then f = f + 70.0 elsif (Str_IndexOf("ecoat", Str_ToLowerCase(fTags)) >= 0) then f = f + 50.0 end; /* var prv = scoreSet[val] ? nil; if (prv == nil) then scoreSet[val] := [cln(tds[TEXT])] else scoreSet[val] := scoreSet[val] + [cln(tds[TEXT])] end; */ // Will use post ranking by handScore instead // doc.setBoost(f) end; end end; // ok = (doc != nil) and shouldAdd and Wub_AddLuceneDocument(writer, doc); if shouldAdd then goodCount = goodCount + 1; every tag in Str_Split(fTags, ",") do var tagName = Str_Trim(cln3(tag)); var num = tagSet[ tagName ] ? 0; tagSet[ tagName ] := num + 1 end; every tag in Str_Split(fTagsFr, ",") do var tagName = Str_Trim(cln3(tag)); var num = tagSetFr[ tagName ] ? 0; tagSetFr[ tagName ] := num + 1 end; var author = fAuthor; if (author != "") then var num = authorSet[ author ] ? 0; authorSet[ author ] := num + 1 end; numDocs = numDocs + 1; end end else nn = nn + fCaption + fImg + "\n" end end end; // ok = ok and Wub_OptimizeLucene(writer); // if (writer != nil) then // Wub_CloseLuceneIndex(writer); // end; settings.tagSet := tagSet; settings.tagSetFr := tagSetFr; settings.authorSet := authorSet; // fi.exec := ToString(settings); // Wub_SaveFunctionInfo(fi); // WubCall("adminConsole", ["Index created on STAGING!" + ToString(goodCount)]); nn;