updates to nhs search app

2023-10-11 20:04:54 +03:00 · 2022-07-14 17:26:24 +02:00
parent fe4679f5a8
commit e2676ef671
2 changed files with 590 additions and 901 deletions
--- a/integrations/haystack/nhs-search/notebooks/00_indexing.ipynb
+++ b/integrations/haystack/nhs-search/notebooks/00_indexing.ipynb
--- a/integrations/haystack/nhs-search/notebooks/01_test_pipeline.ipynb
+++ b/integrations/haystack/nhs-search/notebooks/01_test_pipeline.ipynb
@@ -59,12 +59,12 @@
     "text": [
      "INFO - haystack.modeling.utils -  Using devices: CPU\n",
      "INFO - haystack.modeling.utils -  Number of GPUs: 0\n",
-      "INFO - haystack.nodes.retriever.dense -  Init retriever using embeddings of model sentence-transformers/multi-qa-mpnet-base-dot-v1\n"
+      "INFO - haystack.retriever.dense -  Init retriever using embeddings of model sentence-transformers/multi-qa-mpnet-base-dot-v1\n"
     ]
    }
   ],
   "source": [
-    "from haystack.nodes.retriever import EmbeddingRetriever\n",
+    "from haystack.retriever.dense import EmbeddingRetriever\n",
    "\n",
    "retriever = EmbeddingRetriever(\n",
    "    document_store=document_store,\n",
@@ -87,118 +87,8 @@
      "INFO - haystack.modeling.model.language_model -  LOADING MODEL\n",
      "INFO - haystack.modeling.model.language_model -  =============\n",
      "INFO - haystack.modeling.model.language_model -  Could not find deepset/roberta-base-squad2-distilled locally.\n",
-      "INFO - haystack.modeling.model.language_model -  Looking on Transformers Model Hub (in local cache and online)...\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "fae8519fb44d405bacae249f96bb9d69",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading:   0%|          | 0.00/729 [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "098a78dd37da48b5bccdf95c85e27bbc",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading:   0%|          | 0.00/473M [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO - haystack.modeling.model.language_model -  Loaded deepset/roberta-base-squad2-distilled\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "da97f59cc2854872b11751048ab910bc",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading:   0%|          | 0.00/295 [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "4670b856e53645eeb59a22922d3d082e",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading:   0%|          | 0.00/780k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "938ab35200e54240870e2eee72d6bda2",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "33780f9f6e0f495893004257e95d7c5e",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading:   0%|          | 0.00/1.29M [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "bc09919fe2d64cae89b573fc93042b6b",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading:   0%|          | 0.00/239 [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
+      "INFO - haystack.modeling.model.language_model -  Looking on Transformers Model Hub (in local cache and online)...\n",
+      "INFO - haystack.modeling.model.language_model -  Loaded deepset/roberta-base-squad2-distilled\n",
      "INFO - haystack.modeling.utils -  Using devices: CPU\n",
      "INFO - haystack.modeling.utils -  Number of GPUs: 0\n",
      "INFO - haystack.modeling.infer -  Got ya 9 parallel workers to do inference ...\n",
@@ -213,7 +103,8 @@
    "from haystack.pipelines import ExtractiveQAPipeline\n",
    "\n",
    "reader = FARMReader(\n",
-    "    model_name_or_path='deepset/roberta-base-squad2-distilled'\n",
+    "    model_name_or_path='deepset/roberta-base-squad2-distilled',\n",
+    "    use_gpu=True\n",
    ")\n",
    "pipe = ExtractiveQAPipeline(reader, retriever)"
   ]
@@ -233,7 +124,7 @@
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "baca227f02f647c8ad8f76bd121a220c",
+       "model_id": "ffea2aea7d8743b4a2eeb63f74ed5f5f",
       "version_major": 2,
       "version_minor": 0
      },
@@ -292,11 +183,11 @@
     "text": [
      "Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]/Users/jamesbriggs/opt/anaconda3/envs/ml/lib/python3.9/site-packages/haystack/modeling/model/prediction_head.py:483: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').\n",
      "  start_indices = flat_sorted_indices // max_seq_len\n",
-      "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  8.38 Batches/s]\n",
-      "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  9.36 Batches/s]\n",
-      "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  9.45 Batches/s]\n",
-      "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  9.58 Batches/s]\n",
-      "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  9.61 Batches/s]\n"
+      "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  7.88 Batches/s]\n",
+      "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  9.03 Batches/s]\n",
+      "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  9.26 Batches/s]\n",
+      "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  9.73 Batches/s]\n",
+      "Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  9.27 Batches/s]\n"
     ]
    }
   ],
@@ -416,7 +307,7 @@
    {
     "data": {
      "text/plain": [
-       "<Answer {'answer': 'pregnant women', 'type': 'extractive', 'score': 0.8104832470417023, 'context': 'atment Complications Pre-eclampsia is a condition that affects some pregnant women, usually during the second half of pregnancy (from 20 weeks) or soo', 'offsets_in_document': [{'start': 140, 'end': 154}], 'offsets_in_context': [{'start': 68, 'end': 82}], 'document_id': '3bc401b213c2720c83ee9bddb0e769b8', 'meta': {'url': 'www.nhs.uk_conditions_pre-eclampsia_.txt'}}>"
+       "<Answer {'answer': 'pregnant women', 'type': 'extractive', 'score': 0.8104832470417023, 'context': 'atment Complications Pre-eclampsia is a condition that affects some pregnant women, usually during the second half of pregnancy (from 20 weeks) or soo', 'offsets_in_document': [{'start': 140, 'end': 154}], 'offsets_in_context': [{'start': 68, 'end': 82}], 'document_id': '3bc401b213c2720c83ee9bddb0e769b8', 'meta': {'url': 'www.nhs.uk/conditions/pre-eclampsia'}}>"
      ]
     },
     "execution_count": 10,
@@ -456,7 +347,7 @@
    {
     "data": {
      "text/plain": [
-       "'www.nhs.uk_conditions_pre-eclampsia_.txt'"
+       "'www.nhs.uk/conditions/pre-eclampsia'"
      ]
     },
     "execution_count": 12,