From 6fd3a8702de3213169cd56a75632e3faee52e955 Mon Sep 17 00:00:00 2001
From: Samuele Kaplun <samuele.kaplun@cern.ch>
Date: Tue, 27 Jul 2010 16:05:00 +0200
Subject: [PATCH] BibIndex: fix wrong idxPAIR term washing
* fix idxPAIR washing by truncating terms at 100 chars instead of
50 as it is happening for idxWORD. This fix duplicates keys errors
in idxPAIR term insertion.
---
modules/bibindex/lib/bibindex_engine.py | 55 ++++++++++++++++++++++--------
1 files changed, 40 insertions(+), 15 deletions(-)
diff --git a/modules/bibindex/lib/bibindex_engine.py b/modules/bibindex/lib/bibindex_engine.py
index 6c70473..cef2c86 100644
|
a
|
b
|
def update_index_last_updated(index_id, starting_time=None): |
| 705 | 705 | class WordTable: |
| 706 | 706 | "A class to hold the words table." |
| 707 | 707 | |
| 708 | | def __init__(self, index_id, fields_to_index, table_name_pattern, default_get_words_fnc, tag_to_words_fnc_map, wash_index_terms=True, is_fulltext_index=False): |
| | 708 | def __init__(self, index_id, fields_to_index, table_name_pattern, default_get_words_fnc, tag_to_words_fnc_map, wash_index_terms=50, is_fulltext_index=False): |
| 709 | 709 | """Creates words table instance. |
| 710 | 710 | @param index_id: the index integer identificator |
| 711 | 711 | @param fields_to_index: a list of fields to index |
| … |
… |
class WordTable: |
| 1087 | 1087 | """Adds/deletes a word to the word list.""" |
| 1088 | 1088 | try: |
| 1089 | 1089 | if self.wash_index_terms: |
| 1090 | | word = wash_index_term(word) |
| | 1090 | word = wash_index_term(word, self.wash_index_terms) |
| 1091 | 1091 | if self.value.has_key(word): |
| 1092 | 1092 | # the word 'word' exist already: update sign |
| 1093 | 1093 | self.value[word][recID] = sign |
| … |
… |
def task_run_core(): |
| 1427 | 1427 | fnc_get_words_from_phrase = get_words_from_date_tag |
| 1428 | 1428 | else: |
| 1429 | 1429 | fnc_get_words_from_phrase = get_words_from_phrase |
| 1430 | | wordTable = WordTable(index_id, index_tags, 'idxWORD%02dF', |
| 1431 | | fnc_get_words_from_phrase, |
| 1432 | | {'8564_u': get_words_from_fulltext}) |
| | 1430 | |
| | 1431 | wordTable = WordTable(index_id=index_id, |
| | 1432 | fields_to_index=index_tags, |
| | 1433 | table_name_pattern='idxWORD%02dF', |
| | 1434 | default_get_words_fnc=fnc_get_words_from_phrase, |
| | 1435 | tag_to_words_fnc_map={'8564_u': get_words_from_fulltext}, |
| | 1436 | wash_index_terms=50) |
| 1433 | 1437 | _last_word_table = wordTable |
| 1434 | 1438 | wordTable.report_on_table_consistency() |
| 1435 | 1439 | task_sleep_now_if_required(can_stop_too=True) |
| 1436 | 1440 | |
| 1437 | | wordTable = WordTable(index_id, index_tags, 'idxPAIR%02dF', get_pairs_from_phrase, {'8564_u': get_nothing_from_phrase}, False) |
| | 1441 | wordTable = WordTable(index_id=index_id, |
| | 1442 | fields_to_index=index_tags, |
| | 1443 | table_name_pattern='idxPAIR%02dF', |
| | 1444 | default_get_words_fnc=get_pairs_from_phrase, |
| | 1445 | tag_to_words_fnc_map={'8564_u': get_nothing_from_phrase}, |
| | 1446 | wash_index_terms=100) |
| 1438 | 1447 | _last_word_table = wordTable |
| 1439 | 1448 | wordTable.report_on_table_consistency() |
| 1440 | 1449 | task_sleep_now_if_required(can_stop_too=True) |
| … |
… |
def task_run_core(): |
| 1445 | 1454 | fnc_get_phrases_from_phrase = get_exact_authors_from_phrase |
| 1446 | 1455 | else: |
| 1447 | 1456 | fnc_get_phrases_from_phrase = get_phrases_from_phrase |
| 1448 | | wordTable = WordTable(index_id, index_tags, 'idxPHRASE%02dF', |
| 1449 | | fnc_get_phrases_from_phrase, |
| 1450 | | {'8564_u': get_nothing_from_phrase}, False) |
| | 1457 | wordTable = WordTable(index_id=index_id, |
| | 1458 | fields_to_index=index_tags, |
| | 1459 | table_name_pattern='idxPHRASE%02dF', |
| | 1460 | default_get_words_fnc=fnc_get_phrases_from_phrase, |
| | 1461 | tag_to_words_fnc_map={'8564_u': get_nothing_from_phrase}, |
| | 1462 | wash_index_terms=0) |
| 1451 | 1463 | _last_word_table = wordTable |
| 1452 | 1464 | wordTable.report_on_table_consistency() |
| 1453 | 1465 | task_sleep_now_if_required(can_stop_too=True) |
| … |
… |
def task_run_core(): |
| 1466 | 1478 | fnc_get_words_from_phrase = get_words_from_date_tag |
| 1467 | 1479 | else: |
| 1468 | 1480 | fnc_get_words_from_phrase = get_words_from_phrase |
| 1469 | | wordTable = WordTable(index_id, index_tags, reindex_prefix + 'idxWORD%02dF', |
| 1470 | | fnc_get_words_from_phrase, {'8564_u': get_words_from_fulltext}, is_fulltext_index=is_fulltext_index) |
| | 1481 | wordTable = WordTable(index_id=index_id, |
| | 1482 | fields_to_index=index_tags, |
| | 1483 | table_name_pattern=reindex_prefix + 'idxWORD%02dF', |
| | 1484 | default_get_words_fnc=fnc_get_words_from_phrase, |
| | 1485 | tag_to_words_fnc_map={'8564_u': get_words_from_fulltext}, |
| | 1486 | is_fulltext_index=is_fulltext_index, |
| | 1487 | wash_index_terms=50) |
| 1471 | 1488 | _last_word_table = wordTable |
| 1472 | 1489 | wordTable.report_on_table_consistency() |
| 1473 | 1490 | try: |
| … |
… |
def task_run_core(): |
| 1525 | 1542 | task_sleep_now_if_required(can_stop_too=True) |
| 1526 | 1543 | |
| 1527 | 1544 | # Let's work on pairs now |
| 1528 | | wordTable = WordTable(index_id, index_tags, reindex_prefix + 'idxPAIR%02dF', get_pairs_from_phrase, {'8564_u': get_nothing_from_phrase}, False) |
| | 1545 | wordTable = WordTable(index_id=index_id, |
| | 1546 | fields_to_index=index_tags, |
| | 1547 | table_name_pattern=reindex_prefix + 'idxPAIR%02dF', |
| | 1548 | default_get_words_fnc=get_pairs_from_phrase, |
| | 1549 | tag_to_words_fnc_map={'8564_u': get_nothing_from_phrase}, |
| | 1550 | wash_index_terms=100) |
| 1529 | 1551 | _last_word_table = wordTable |
| 1530 | 1552 | wordTable.report_on_table_consistency() |
| 1531 | 1553 | try: |
| … |
… |
def task_run_core(): |
| 1588 | 1610 | fnc_get_phrases_from_phrase = get_exact_authors_from_phrase |
| 1589 | 1611 | else: |
| 1590 | 1612 | fnc_get_phrases_from_phrase = get_phrases_from_phrase |
| 1591 | | wordTable = WordTable(index_id, index_tags, reindex_prefix + 'idxPHRASE%02dF', |
| 1592 | | fnc_get_phrases_from_phrase, |
| 1593 | | {'8564_u': get_nothing_from_phrase}, False) |
| | 1613 | wordTable = WordTable(index_id=index_id, |
| | 1614 | fields_to_index=index_tags, |
| | 1615 | table_name_pattern=reindex_prefix + 'idxPHRASE%02dF', |
| | 1616 | default_get_words_fnc=fnc_get_phrases_from_phrase, |
| | 1617 | tag_to_words_fnc_map={'8564_u': get_nothing_from_phrase}, |
| | 1618 | wash_index_terms=0) |
| 1594 | 1619 | _last_word_table = wordTable |
| 1595 | 1620 | wordTable.report_on_table_consistency() |
| 1596 | 1621 | try: |