Ticket #223: 0001-WebSearch-Verbosity-of-the-search-engine-via-CLI.patch

File 0001-WebSearch-Verbosity-of-the-search-engine-via-CLI.patch, 20.9 KB (added by bthiell, 4 years ago)

Patch

  • modules/websearch/lib/search_engine.py

    From 1fec6dce6638bb019206c205f719d504061656e3 Mon Sep 17 00:00:00 2001
    From: Benoit Thiell <bthiell@cfa.harvard.edu>
    Date: Mon, 2 Aug 2010 07:26:30 -0400
    Subject: [PATCH] WebSearch: Verbosity of the search engine via CLI
    
    * The debug messages are now written to stdout if no
      request object is passed to the functions.
    
    (fixes #223)
    ---
     modules/websearch/lib/search_engine.py |  105 ++++++++++++++++---------------
     1 files changed, 54 insertions(+), 51 deletions(-)
    
    diff --git a/modules/websearch/lib/search_engine.py b/modules/websearch/lib/search_engine.py
    index 9d83b5c..d6d288d 100644
    a b def create_basic_search_units(req, p, f, m=None, of='hb'): 
    597597                else: 
    598598                    opfts.append(['|', word, f, 'w']) # '|' in further units 
    599599        else: 
    600             if of.startswith("h"): 
     600            if of.startswith("h") or req is None: 
    601601                print_warning(req, "Matching type '%s' is not implemented yet." % cgi.escape(m), "Warning") 
    602602            opfts.append(['+', "%" + p + "%", f, 'w']) 
    603603    else: 
    def create_basic_search_units(req, p, f, m=None, of='hb'): 
    688688        try: 
    689689            pi = opfts[i][1] 
    690690            if pi == '*': 
    691                 if of.startswith("h"): 
     691                if of.startswith("h") or req is None: 
    692692                    print_warning(req, "Ignoring standalone wildcard word.", "Warning") 
    693693                del opfts[i] 
    694694            if pi == '' or pi == ' ': 
    695695                fi = opfts[i][2] 
    696696                if fi: 
    697                     if of.startswith("h"): 
     697                    if of.startswith("h") or req is None: 
    698698                        print_warning(req, "Ignoring empty <em>%s</em> search term." % fi, "Warning") 
    699699                del opfts[i] 
    700700        except: 
    def search_pattern(req=None, p=None, f=None, m=None, ap=0, of="id", verbose=0, l 
    17411741        # no pattern, so return all universe 
    17421742        return hitset_full 
    17431743    # search stage 1: break up arguments into basic search units: 
    1744     if verbose and of.startswith("h"): 
     1744    if verbose and (of.startswith("h") or req is None): 
    17451745        t1 = os.times()[4] 
    17461746    basic_search_units = create_basic_search_units(req, p, f, m, of) 
    1747     if verbose and of.startswith("h"): 
     1747    if verbose and (of.startswith("h") or req is None): 
    17481748        t2 = os.times()[4] 
    17491749        print_warning(req, "Search stage 1: basic search units are: %s" % cgi.escape(repr(basic_search_units))) 
    17501750        print_warning(req, "Search stage 1: execution took %.2f seconds." % (t2 - t1)) 
    17511751    # search stage 2: do search for each search unit and verify hit presence: 
    1752     if verbose and of.startswith("h"): 
     1752    if verbose and (of.startswith("h") or req is None): 
    17531753        t1 = os.times()[4] 
    17541754    basic_search_units_hitsets = [] 
    17551755    #prepare hiddenfield-related.. 
    def search_pattern(req=None, p=None, f=None, m=None, ap=0, of="id", verbose=0, l 
    17731773                if samelenfield == htag: #user searches by a hidden tag 
    17741774                    #we won't show you anything.. 
    17751775                    basic_search_unit_hitset = HitSet() 
    1776                     if verbose >= 9 and of.startswith("h"): 
     1776                    if verbose >= 9 and (of.startswith("h") or req is None): 
    17771777                        print_warning(req, "Pattern %s hitlist omitted since \ 
    17781778                                            it queries in a hidden tag %s" % 
    17791779                                      (repr(bsu_p), repr(myhiddens))) 
    17801780                    display_nearest_terms_box=False #..and stop spying, too. 
    17811781 
    1782         if verbose >= 9 and of.startswith("h"): 
     1782        if verbose >= 9 and (of.startswith("h") or req is None): 
    17831783            print_warning(req, "Search stage 1: pattern %s gave hitlist %s" % (cgi.escape(bsu_p), basic_search_unit_hitset)) 
    17841784        if len(basic_search_unit_hitset) > 0 or \ 
    17851785           ap==0 or \ 
    def search_pattern(req=None, p=None, f=None, m=None, ap=0, of="id", verbose=0, l 
    17981798                    bsu_pn = re.sub(r'[^a-zA-Z0-9\s\:]+', "*", bsu_p) 
    17991799                else: # it is WRD query 
    18001800                    bsu_pn = re.sub(r'[^a-zA-Z0-9\s\:]+', " ", bsu_p) 
    1801                 if verbose and of.startswith('h') and req: 
     1801                if verbose and (of.startswith('h') or req is None): 
    18021802                    print_warning(req, "Trying (%s,%s,%s)" % (cgi.escape(bsu_pn), cgi.escape(bsu_f), cgi.escape(bsu_m))) 
    1803                 basic_search_unit_hitset = search_pattern(req=None, p=bsu_pn, f=bsu_f, m=bsu_m, of="id", ln=ln) 
     1803                basic_search_unit_hitset = search_pattern(req=None, p=bsu_pn, 
     1804                        f=bsu_f, m=bsu_m, of="id", verbose=verbose, ln=ln) 
    18041805                if len(basic_search_unit_hitset) > 0: 
    18051806                    # we retain the new unit instead 
    1806                     if of.startswith('h'): 
     1807                    if of.startswith('h') or req is None: 
    18071808                        print_warning(req, _("No exact match found for %(x_query1)s, using %(x_query2)s instead...") % \ 
    18081809                                      {'x_query1': "<em>" + cgi.escape(bsu_p) + "</em>", 
    18091810                                       'x_query2': "<em>" + cgi.escape(bsu_pn) + "</em>"}) 
    def search_pattern(req=None, p=None, f=None, m=None, ap=0, of="id", verbose=0, l 
    18111812                    basic_search_units_hitsets.append(basic_search_unit_hitset) 
    18121813                else: 
    18131814                    # stage 2-3: no hits found either, propose nearest indexed terms: 
    1814                     if of.startswith('h') and display_nearest_terms_box: 
    1815                         if req: 
    1816                             if bsu_f == "recid": 
    1817                                 print_warning(req, _("Requested record does not seem to exist.")) 
    1818                             else: 
    1819                                 print_warning(req, create_nearest_terms_box(req.argd, bsu_p, bsu_f, bsu_m, ln=ln)) 
    1820                     return hitset_empty 
    1821             else: 
    1822                 # stage 2-3: no hits found either, propose nearest indexed terms: 
    1823                 if of.startswith('h') and display_nearest_terms_box: 
    1824                     if req: 
     1815                    if req is not None and of.startswith('h') and display_nearest_terms_box: 
    18251816                        if bsu_f == "recid": 
    18261817                            print_warning(req, _("Requested record does not seem to exist.")) 
    18271818                        else: 
    18281819                            print_warning(req, create_nearest_terms_box(req.argd, bsu_p, bsu_f, bsu_m, ln=ln)) 
     1820                    return hitset_empty 
     1821            else: 
     1822                # stage 2-3: no hits found either, propose nearest indexed terms: 
     1823                if req is not None and of.startswith('h') and display_nearest_terms_box: 
     1824                    if bsu_f == "recid": 
     1825                        print_warning(req, _("Requested record does not seem to exist.")) 
     1826                    else: 
     1827                        print_warning(req, create_nearest_terms_box(req.argd, bsu_p, bsu_f, bsu_m, ln=ln)) 
    18291828                return hitset_empty 
    1830     if verbose and of.startswith("h"): 
     1829    if verbose and (of.startswith("h") or req is None): 
    18311830        t2 = os.times()[4] 
    18321831        for idx_unit in range(0, len(basic_search_units)): 
    18331832            print_warning(req, "Search stage 2: basic search unit %s gave %d hits." % 
    18341833                          (basic_search_units[idx_unit][1:], len(basic_search_units_hitsets[idx_unit]))) 
    18351834        print_warning(req, "Search stage 2: execution took %.2f seconds." % (t2 - t1)) 
    18361835    # search stage 3: apply boolean query for each search unit: 
    1837     if verbose and of.startswith("h"): 
     1836    if verbose and (of.startswith("h") or req is None): 
    18381837        t1 = os.times()[4] 
    18391838    # let the initial set be the complete universe: 
    18401839    hitset_in_any_collection = HitSet(trailing_bits=1) 
    def search_pattern(req=None, p=None, f=None, m=None, ap=0, of="id", verbose=0, l 
    18491848        elif this_unit_operation == '|': 
    18501849            hitset_in_any_collection.union_update(this_unit_hitset) 
    18511850        else: 
    1852             if of.startswith("h"): 
     1851            if of.startswith("h") or req is None: 
    18531852                print_warning(req, "Invalid set operation %s." % cgi.escape(this_unit_operation), "Error") 
    18541853    if len(hitset_in_any_collection) == 0: 
    18551854        # no hits found, propose alternative boolean query: 
    def search_pattern(req=None, p=None, f=None, m=None, ap=0, of="id", verbose=0, l 
    18731872            text = websearch_templates.tmpl_search_no_boolean_hits( 
    18741873                     ln=ln,  nearestterms=nearestterms) 
    18751874            print_warning(req, text) 
    1876     if verbose and of.startswith("h"): 
     1875    if verbose and (of.startswith("h") or req is None): 
    18771876        t2 = os.times()[4] 
    18781877        print_warning(req, "Search stage 3: boolean query gave %d hits." % len(hitset_in_any_collection)) 
    18791878        print_warning(req, "Search stage 3: execution took %.2f seconds." % (t2 - t1)) 
    def search_pattern_parenthesised(req=None, p=None, f=None, m=None, ap=0, of="id" 
    19051904 
    19061905        # parse the query. The result is list of [op1, expr1, op2, expr2, ..., opN, exprN] 
    19071906        parsing_result = parser.parse_query(p) 
    1908         if verbose  and of.startswith("h"): 
     1907        if verbose and (of.startswith("h") or req is None): 
    19091908            print_warning(req, "Search stage 1: search_pattern_parenthesised() returned %s." % repr(parsing_result)) 
    19101909 
    19111910        # go through every pattern 
    def intersect_results_with_collrecs(req, hitset_in_any_collection, colls, ap=0, 
    22042203    _ = gettext_set_language(ln) 
    22052204 
    22062205    # search stage 4: intersect with the collection universe: 
    2207     if verbose and of.startswith("h"): 
     2206    if verbose and (of.startswith("h") or req is None): 
    22082207        t1 = os.times()[4] 
    22092208    results = {} 
    22102209    results_nbhits = 0 
    def intersect_results_with_collrecs(req, hitset_in_any_collection, colls, ap=0, 
    22312230                                     "If you were looking for a non-public document, please choose " 
    22322231                                     "the desired restricted collection first.")) 
    22332232            results = {} 
    2234     if verbose and of.startswith("h"): 
     2233    if verbose and (of.startswith("h") or req is None): 
    22352234        t2 = os.times()[4] 
    2236         print_warning(req, "Search stage 4: intersecting with collection universe gave %d hits." % results_nbhits) 
     2235        print_warning(req, "Search stage 4: intersecting with collection%s '%s' " 
     2236                "gave %d hits." % (len(colls) > 1 and 's' or '', 
     2237                    ', '.join(colls), results_nbhits)) 
    22372238        print_warning(req, "Search stage 4: execution took %.2f seconds." % (t2 - t1)) 
    22382239    return results 
    22392240 
    def get_modification_date(recID, fmt="%Y-%m-%d"): 
    29572958 
    29582959def print_warning(req, msg, type='', prologue='<br />', epilogue='<br />'): 
    29592960    "Prints warning message and flushes output." 
    2960     if req and msg: 
     2961    if req is None: 
     2962        print msg 
     2963    elif msg: 
    29612964        req.write(websearch_templates.tmpl_print_warning( 
    29622965                   msg = msg, 
    29632966                   type = type, 
    def perform_request_search(req=None, cc=CFG_SITE_NAME, c=None, p="", f="", rg=CF 
    43034306                # Print empty, but valid XML 
    43044307                print_records_prologue(req, of) 
    43054308                print_records_epilogue(req, of) 
    4306             elif of.startswith("h"): 
    4307                 if req.header_only: 
     4309            elif of.startswith("h") or req is None: 
     4310                if req is not None and req.header_only: 
    43084311                    raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND 
    43094312                else: 
    43104313                    print_warning(req, _("Requested record does not seem to exist.")) 
    def perform_request_search(req=None, cc=CFG_SITE_NAME, c=None, p="", f="", rg=CF 
    43414344                                        p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec, action)) 
    43424345        if record_exists(p[6:]) != 1: 
    43434346            # record does not exist 
    4344             if of.startswith("h"): 
    4345                 if req.header_only: 
     4347            if of.startswith("h") or req is None: 
     4348                if req is not None and req.header_only: 
    43464349                    raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND 
    43474350                else: 
    43484351                    print_warning(req, _("Requested record does not seem to exist.")) 
    def perform_request_search(req=None, cc=CFG_SITE_NAME, c=None, p="", f="", rg=CF 
    43754378                                  results_similar_relevances, results_similar_relevances_prologue, results_similar_relevances_epilogue, search_pattern=p, verbose=verbose) 
    43764379            else: 
    43774380                # rank_records failed and returned some error message to display: 
    4378                 if of.startswith("h"): 
     4381                if of.startswith("h") or req is None: 
    43794382                    print_warning(req, results_similar_relevances_prologue) 
    43804383                    print_warning(req, results_similar_relevances_epilogue) 
    43814384                    print_warning(req, results_similar_comments) 
    def perform_request_search(req=None, cc=CFG_SITE_NAME, c=None, p="", f="", rg=CF 
    43954398        recID = p[12:] 
    43964399        if record_exists(recID) != 1: 
    43974400            # record does not exist 
    4398             if of.startswith("h"): 
     4401            if of.startswith("h") or req is None: 
    43994402                print_warning(req, _("Requested record does not seem to exist.")) 
    44004403            if of == "id": 
    44014404                return [] 
    def perform_request_search(req=None, cc=CFG_SITE_NAME, c=None, p="", f="", rg=CF 
    44234426 
    44244427            else: 
    44254428                # cited rank_records failed and returned some error message to display: 
    4426                 if of.startswith("h"): 
     4429                if of.startswith("h") or req is None: 
    44274430                    print_warning(req, "nothing found") 
    44284431                if of == "id": 
    44294432                    return [] 
    def perform_request_search(req=None, cc=CFG_SITE_NAME, c=None, p="", f="", rg=CF 
    44554458                    if result[1] == None or result[1] == False: 
    44564459                        # these are the searches the returned no or zero results 
    44574460                        if verbose: 
    4458                             print_warning(req, "Hosted collections (perform_search_request): %s returned no results" % result[0][1].name) 
     4461                            print_warning(req, "Hosted collections (perform_request_search): %s returned no results" % result[0][1].name) 
    44594462                    else: 
    44604463                        # these are the searches that actually returned results on time 
    44614464                        hosted_colls_true_results.append(result) 
    44624465                        if verbose: 
    4463                             print_warning(req, "Hosted collections (perform_search_request): %s returned %s results in %s seconds" % (result[0][1].name, result[1], result[2])) 
     4466                            print_warning(req, "Hosted collections (perform_request_search): %s returned %s results in %s seconds" % (result[0][1].name, result[1], result[2])) 
    44644467            else: 
    44654468                if verbose: 
    4466                     print_warning(req, "Hosted collections (perform_search_request): there were no hosted collections results to be printed at this time") 
     4469                    print_warning(req, "Hosted collections (perform_request_search): there were no hosted collections results to be printed at this time") 
    44674470            if hosted_colls_timeouts: 
    44684471                if verbose: 
    44694472                    for timeout in hosted_colls_timeouts: 
    4470                         print_warning(req, "Hosted collections (perform_search_request): %s timed out and will be searched again later" % timeout[0][1].name) 
     4473                        print_warning(req, "Hosted collections (perform_request_search): %s timed out and will be searched again later" % timeout[0][1].name) 
    44714474        # we need to know for later use if there were any hosted collections to be searched even if they weren't in the end 
    44724475        elif hosted_colls and ((not (of.startswith("h") or of.startswith("x"))) or p.startswith("recid:")): 
    44734476            (hosted_colls_results, hosted_colls_timeouts) = (None, None) 
    44744477        else: 
    44754478            if verbose: 
    4476                 print_warning(req, "Hosted collections (perform_search_request): there were no hosted collections to be searched") 
     4479                print_warning(req, "Hosted collections (perform_request_search): there were no hosted collections to be searched") 
    44774480 
    44784481        ## let's define some useful boolean variables: 
    44794482        # True means there are actual or potential hosted collections results to be printed 
    def perform_request_search(req=None, cc=CFG_SITE_NAME, c=None, p="", f="", rg=CF 
    45124515                    elif op1 == "n": # not 
    45134516                        results_in_any_collection.difference_update(results_tmp) 
    45144517                    else: 
    4515                         if of.startswith("h"): 
     4518                        if of.startswith("h") or req is None: 
    45164519                            print_warning(req, "Invalid set operation %s." % cgi.escape(op1), "Error") 
    45174520                    if len(results_in_any_collection) == 0: 
    45184521                        if of.startswith("h"): 
    def perform_request_search(req=None, cc=CFG_SITE_NAME, c=None, p="", f="", rg=CF 
    45314534                    elif op2 == "n": # not 
    45324535                        results_in_any_collection.difference_update(results_tmp) 
    45334536                    else: 
    4534                         if of.startswith("h"): 
     4537                        if of.startswith("h") or req is None: 
    45354538                            print_warning(req, "Invalid set operation %s." % cgi.escape(op2), "Error") 
    45364539            except: 
    45374540                register_exception(req=req, alert_admin=True) 
    def perform_request_search(req=None, cc=CFG_SITE_NAME, c=None, p="", f="", rg=CF 
    45504553                # query is not in the cache already, so reuse it: 
    45514554                query_in_cache = True 
    45524555                results_in_any_collection = search_results_cache.cache[query_representation_in_cache] 
    4553                 if verbose and of.startswith("h"): 
     4556                if verbose and (of.startswith("h") or req is None): 
    45544557                    print_warning(req, "Search stage 0: query found in cache, reusing cached results.") 
    45554558            else: 
    45564559                try: 
    def perform_request_search(req=None, cc=CFG_SITE_NAME, c=None, p="", f="", rg=CF 
    45804583            if len(search_results_cache.cache) > CFG_WEBSEARCH_SEARCH_CACHE_SIZE: 
    45814584                search_results_cache.clear() 
    45824585            search_results_cache.cache[query_representation_in_cache] = results_in_any_collection 
    4583             if verbose and of.startswith("h"): 
     4586            if verbose and (of.startswith("h") or req is None): 
    45844587                print_warning(req, "Search stage 3: storing query results in cache.") 
    45854588 
    45864589        # search stage 4: intersection with collection universe: 
    def perform_request_search(req=None, cc=CFG_SITE_NAME, c=None, p="", f="", rg=CF 
    46104613 
    46114614        # search stage 5: apply search option limits and restrictions: 
    46124615        if datetext1 != "" and results_final != {}: 
    4613             if verbose and of.startswith("h"): 
     4616            if verbose and (of.startswith("h") or req is None): 
    46144617                print_warning(req, "Search stage 5: applying time etc limits, from %s until %s..." % (datetext1, datetext2)) 
    46154618            try: 
    46164619                results_final = intersect_results_with_hitset(req, 
    def perform_request_search(req=None, cc=CFG_SITE_NAME, c=None, p="", f="", rg=CF 
    46374640 
    46384641        if pl and results_final != {}: 
    46394642            pl = wash_pattern(pl) 
    4640             if verbose and of.startswith("h"): 
     4643            if verbose and (of.startswith("h") or req is None): 
    46414644                print_warning(req, "Search stage 5: applying search pattern limit %s..." % cgi.escape(pl)) 
    46424645            try: 
    46434646                results_final = intersect_results_with_hitset(req, 
    def perform_request_search(req=None, cc=CFG_SITE_NAME, c=None, p="", f="", rg=CF 
    47064709 
    47074710        # we continue past this point only if there is a hosted collection that has timed out and might offer potential results 
    47084711        if results_final_nb_total ==0 and not hosted_colls_potential_results_p: 
    4709             if of.startswith("h"): 
     4712            if of.startswith("h") or req is None: 
    47104713                print_warning(req, "No match found, please enter different search terms.") 
    47114714            elif of.startswith("x"): 
    47124715                # Print empty, but valid XML 
    def perform_request_search(req=None, cc=CFG_SITE_NAME, c=None, p="", f="", rg=CF 
    47664769                                                         rank_records(rm, 0, results_final[coll], 
    47674770                                                                      string.split(p) + string.split(p1) + 
    47684771                                                                      string.split(p2) + string.split(p3), verbose) 
    4769                             if of.startswith("h"): 
     4772                            if of.startswith("h") or req is None: 
    47704773                                print_warning(req, results_final_comments) 
    47714774                            if results_final_recIDs_ranked: 
    47724775                                results_final_recIDs = results_final_recIDs_ranked