From 9e021187b6444fa7352905fa795ef1914e8d03c8 Mon Sep 17 00:00:00 2001
From: Benoit Thiell <bthiell@localhost.(none)>
Date: Mon, 5 Jul 2010 09:08:37 -0400
Subject: [PATCH] Miscutil: Separate extraction of fulltext and load-demo-records
* Because the text extraction is a long process that is not needed every time
one sets up a demo site, it has been separated from the default
load-demo-records routine. It is now a separate inveniocfg option
'--extract-text-from-records'.
* It also makes the text extraction capabilities of Invenio more visible
to the new user as the install guide encourages him to launch a command
named '--extract-text-from-records'.
---
INSTALL | 7 ++++++
modules/miscutil/lib/inveniocfg.py | 29 ++++++++++++++++++++++--
modules/miscutil/lib/testutils.py | 3 +-
modules/webhelp/web/hacking/test-suite.webdoc | 1 +
4 files changed, 36 insertions(+), 4 deletions(-)
diff --git a/INSTALL b/INSTALL
index f38833c..4db6d75 100644
|
a
|
b
|
Contents |
| 241 | 241 | $ sudo /etc/init.d/apache2 restart |
| 242 | 242 | $ sudo -u www-data /opt/cds-invenio/bin/inveniocfg --create-demo-site |
| 243 | 243 | $ sudo -u www-data /opt/cds-invenio/bin/inveniocfg --load-demo-records |
| | 244 | $ sudo -u www-data /opt/cds-invenio/bin/inveniocfg --extract-text-from-records |
| 244 | 245 | $ sudo -u www-data /opt/cds-invenio/bin/inveniocfg --run-unit-tests |
| 245 | 246 | $ sudo -u www-data /opt/cds-invenio/bin/inveniocfg --run-regression-tests |
| 246 | 247 | $ sudo -u www-data /opt/cds-invenio/bin/inveniocfg --run-web-tests |
| … |
… |
Contents |
| 606 | 607 | indexing and searching of your local CDS Invenio demo |
| 607 | 608 | installation. |
| 608 | 609 | |
| | 610 | $ sudo -u www-data /opt/cds-invenio/bin/inveniocfg --extract-text-from-records |
| | 611 | |
| | 612 | Optionally, populate the fulltext index by extracting text from the |
| | 613 | fulltext files. Also runs OCR (Optical Character Recognition) for |
| | 614 | one document (record ID 97). |
| | 615 | |
| 609 | 616 | $ sudo -u www-data /opt/cds-invenio/bin/inveniocfg --run-unit-tests |
| 610 | 617 | |
| 611 | 618 | Optionally, you can run the unit test suite to verify the |
diff --git a/modules/miscutil/lib/inveniocfg.py b/modules/miscutil/lib/inveniocfg.py
index 8f3c6d8..dcf3b63 100644
|
a
|
b
|
def cli_cmd_load_demo_records(conf): |
| 691 | 691 | """Load demo records. Useful for testing purposes.""" |
| 692 | 692 | from invenio.config import CFG_PREFIX |
| 693 | 693 | from invenio.dbquery import run_sql |
| | 694 | |
| | 695 | from invenio.bibindex_engine import get_all_indexes |
| | 696 | all_indexes = get_all_indexes() |
| | 697 | # Run all indexes except fulltext index for speed reasons. |
| | 698 | all_indexes.remove('fulltext') |
| | 699 | |
| 694 | 700 | print ">>> Going to load demo records..." |
| 695 | 701 | run_sql("TRUNCATE schTASK") |
| 696 | 702 | for cmd in ["%s/bin/bibupload -u admin -i %s/var/tmp/demobibdata.xml" % (CFG_PREFIX, CFG_PREFIX), |
| 697 | 703 | "%s/bin/bibupload 1" % CFG_PREFIX, |
| 698 | | "%s/bin/bibdocfile --textify --with-ocr --recid 97" % CFG_PREFIX, |
| 699 | | "%s/bin/bibdocfile --textify --all" % CFG_PREFIX, |
| 700 | | "%s/bin/bibindex -u admin" % CFG_PREFIX, |
| | 704 | "%s/bin/bibindex -w %s -u admin" % (CFG_PREFIX, ','.join(all_indexes)), |
| 701 | 705 | "%s/bin/bibindex 2" % CFG_PREFIX, |
| 702 | 706 | "%s/bin/bibreformat -u admin -o HB" % CFG_PREFIX, |
| 703 | 707 | "%s/bin/bibreformat 3" % CFG_PREFIX, |
| … |
… |
def cli_cmd_load_demo_records(conf): |
| 710 | 714 | sys.exit(1) |
| 711 | 715 | print ">>> Demo records loaded successfully." |
| 712 | 716 | |
| | 717 | def cli_cmd_extract_text_from_records(conf): |
| | 718 | """Extracts the text from the records and populates the fulltext index.""" |
| | 719 | from invenio.config import CFG_PREFIX |
| | 720 | from invenio.dbquery import run_sql |
| | 721 | print ">>> Going to extract text from demo records..." |
| | 722 | run_sql("TRUNCATE schTASK") |
| | 723 | # Use (slow) OCR only for one document for demonstration purpose. |
| | 724 | for cmd in ["%s/bin/bibdocfile --textify --with-ocr --recid 97" % CFG_PREFIX, |
| | 725 | "%s/bin/bibdocfile --textify --all" % CFG_PREFIX, |
| | 726 | "%s/bin/bibindex -w fulltext -u admin" % CFG_PREFIX, |
| | 727 | "%s/bin/bibindex 2" % CFG_PREFIX]: |
| | 728 | if os.system(cmd): |
| | 729 | print "ERROR: failed execution of", cmd |
| | 730 | sys.exit(1) |
| | 731 | print ">>> Texts extracted successfully." |
| | 732 | |
| 713 | 733 | def cli_cmd_remove_demo_records(conf): |
| 714 | 734 | """Remove demo records. Useful when you are finished testing.""" |
| 715 | 735 | print ">>> Going to remove demo records..." |
| … |
… |
def main(): |
| 1165 | 1185 | elif opt == '--load-demo-records': |
| 1166 | 1186 | cli_cmd_load_demo_records(conf) |
| 1167 | 1187 | done = True |
| | 1188 | elif opt == '--extract-text-from-records': |
| | 1189 | cli_cmd_extract_text_from_records(conf) |
| | 1190 | done = True |
| 1168 | 1191 | elif opt == '--remove-demo-records': |
| 1169 | 1192 | cli_cmd_remove_demo_records(conf) |
| 1170 | 1193 | done = True |
diff --git a/modules/miscutil/lib/testutils.py b/modules/miscutil/lib/testutils.py
index e05ffb8..d8ed2d2 100644
|
a
|
b
|
def warn_user_about_tests(test_suite_type='regression'): |
| 76 | 76 | ** ** |
| 77 | 77 | ** $ inveniocfg --drop-demo-site \ ** |
| 78 | 78 | ** --create-demo-site \ ** |
| 79 | | ** --load-demo-records ** |
| | 79 | ** --load-demo-records \ ** |
| | 80 | ** --extract-text-from-records \ ** |
| 80 | 81 | ** ** |
| 81 | 82 | ** Note that DOING THE ABOVE WILL ERASE YOUR ENTIRE DATABASE. ** |
| 82 | 83 | ** ** |
diff --git a/modules/webhelp/web/hacking/test-suite.webdoc b/modules/webhelp/web/hacking/test-suite.webdoc
index 7474d2e..c4fa427 100644
|
a
|
b
|
$ /opt/cds-invenio/bin/inveniocfg --run-regression-tests |
| 450 | 450 | ** $ inveniocfg --drop-demo-site \ ** |
| 451 | 451 | ** --create-demo-site \ ** |
| 452 | 452 | ** --load-demo-records ** |
| | 453 | ** --extract-text-from-records ** |
| 453 | 454 | ** ** |
| 454 | 455 | ** Note that DOING THE ABOVE WILL ERASE YOUR ENTIRE DATABASE. ** |
| 455 | 456 | ** ** |