Skip to content

Commit 42ba23b

Browse files
committed
Small fixes and improvements
1 parent 7bbf435 commit 42ba23b

10 files changed

+572
-659
lines changed

03-Quering-AOpenAI.ipynb

+87-95
Large diffs are not rendered by default.

04-Complex-Docs.ipynb

+148-51
Large diffs are not rendered by default.

07-TabularDataQA.ipynb

+23-36
Original file line numberDiff line numberDiff line change
@@ -85,16 +85,16 @@
8585
"name": "stdout",
8686
"output_type": "stream",
8787
"text": [
88-
"--2024-03-15 09:41:47-- https://covidtracking.com/data/download/all-states-history.csv\n",
89-
"Resolving covidtracking.com (covidtracking.com)... 104.21.64.114, 172.67.183.132, 2606:4700:3032::ac43:b784, ...\n",
90-
"Connecting to covidtracking.com (covidtracking.com)|104.21.64.114|:443... connected.\n",
88+
"--2024-04-19 22:32:36-- https://covidtracking.com/data/download/all-states-history.csv\n",
89+
"Resolving covidtracking.com (covidtracking.com)... 172.67.183.132, 104.21.64.114, 2606:4700:3034::6815:4072, ...\n",
90+
"Connecting to covidtracking.com (covidtracking.com)|172.67.183.132|:443... connected.\n",
9191
"HTTP request sent, awaiting response... 200 OK\n",
9292
"Length: unspecified [text/csv]\n",
93-
"Saving to: ‘./data/all-states-history.csv’\n",
93+
"Saving to: ‘./data/all-states-history.csv.1\n",
9494
"\n",
95-
"all-states-history. [ <=> ] 2.61M --.-KB/s in 0.05s \n",
95+
"all-states-history. [ <=> ] 2.61M --.-KB/s in 0.02s \n",
9696
"\n",
97-
"2024-03-15 09:41:48 (49.1 MB/s) - ‘./data/all-states-history.csv’ saved [2738601]\n",
97+
"2024-04-19 22:32:36 (106 MB/s) - ‘./data/all-states-history.csv.1’ saved [2738601]\n",
9898
"\n"
9999
]
100100
}
@@ -335,19 +335,6 @@
335335
"execution_count": 5,
336336
"metadata": {},
337337
"output_type": "execute_result"
338-
},
339-
{
340-
"name": "stderr",
341-
"output_type": "stream",
342-
"text": [
343-
"Bad pipe message: %s [b'~z9\\xc7j\\x11F\\xa0xX\\x0f\\xc7\\x83\\xd7\\xbb9\\x0f\\x1e Ar\\xf0\\x18*\\xf2\\x89\\xd6\\xdb^\\n?a\\xe0\\x10(\\x99\\xd7~\\x8e\\x80U\\xb1j\\xb5\\xcdH+\\xfb\\x16\\x9bY\\x00\\x08\\x13\\x02\\x13\\x03\\x13\\x01\\x00\\xff\\x01\\x00\\x00\\x8f\\x00\\x00\\x00\\x0e\\x00\\x0c\\x00\\x00\\t127.0.0.1\\x00\\x0b\\x00']\n",
344-
"Bad pipe message: %s [b\"h\\x80\\xdb\\x00lL\\xcbt\\xb8e\\xb68;e\\xd9\\x8bj\\xa4\\x00\\x00|\\xc0,\\xc00\\x00\\xa3\\x00\\x9f\\xcc\\xa9\\xcc\\xa8\\xcc\\xaa\\xc0\\xaf\\xc0\\xad\\xc0\\xa3\\xc0\\x9f\\xc0]\\xc0a\\xc0W\\xc0S\\xc0+\\xc0/\\x00\\xa2\\x00\\x9e\\xc0\\xae\\xc0\\xac\\xc0\\xa2\\xc0\\x9e\\xc0\\\\\\xc0`\\xc0V\\xc0R\\xc0$\\xc0(\\x00k\\x00j\\xc0#\\xc0'\\x00g\\x00@\\xc0\\n\\xc0\\x14\\x009\\x008\\xc0\\t\\xc0\\x13\\x003\\x002\\x00\\x9d\\xc0\\xa1\\xc0\\x9d\\xc0Q\\x00\\x9c\\xc0\\xa0\\xc0\\x9c\\xc0P\\x00=\\x00<\\x005\\x00/\\x00\\x9a\\x00\\x99\\xc0\\x07\\xc0\\x11\\x00\\x96\\x00\\x05\\x00\\xff\\x01\\x00\\x00j\\x00\\x00\\x00\\x0e\\x00\\x0c\\x00\\x00\\t127.0.0.1\\x00\\x0b\\x00\\x04\\x03\\x00\"]\n",
345-
"Bad pipe message: %s [b'']\n",
346-
"Bad pipe message: %s [b'\\xd7%y\\xa0\\xc1\\xd2\\x87p\\xae\\x9bK\\x01D\\xe9\\xa7\\xa7\\x96\\xb5\\x00\\x00>\\xc0\\x14\\xc0\\n\\x009\\x008\\x007\\x006\\xc0\\x0f\\xc0\\x05\\x005\\xc0\\x13\\xc0\\t\\x003\\x002\\x001\\x000\\xc0\\x0e\\xc0\\x04\\x00/\\x00\\x9a\\x00\\x99\\x00\\x98\\x00\\x97\\x00\\x96\\x00\\x07\\xc0\\x11\\xc0\\x07\\xc0\\x0c\\xc0\\x02\\x00\\x05\\x00\\x04\\x00\\xff\\x02\\x01\\x00\\x00C\\x00\\x00\\x00\\x0e\\x00\\x0c\\x00\\x00\\t127.0.0.']\n",
347-
"Bad pipe message: %s [b'\\x0b\\x00\\x04\\x03\\x00\\x01\\x02\\x00\\n\\x00\\x1c\\x00\\x1a\\x00\\x17\\x00\\x19\\x00\\x1c\\x00\\x1b\\x00\\x18\\x00\\x1a\\x00\\x16\\x00\\x0e\\x00\\r\\x00\\x0b\\x00\\x0c\\x00\\t\\x00\\n\\x00#\\x00\\x00\\x00\\x0f\\x00\\x01\\x01']\n",
348-
"Bad pipe message: %s [b'u\\xab\\xc0\\xb9\\xba\\xbfB\\xbd\\xf1qH\\x9a=\\xd9DE\\xa8W\\x00\\x00\\xa2\\xc0\\x14\\xc0\\n\\x009\\x008\\x007\\x006\\x00\\x88\\x00\\x87\\x00\\x86\\x00\\x85\\xc0\\x19\\x00:\\x00\\x89\\xc0\\x0f\\xc0\\x05\\x005\\x00\\x84\\xc0\\x13\\xc0\\t\\x003\\x002\\x001\\x000\\x00\\x9a\\x00\\x99\\x00\\x98\\x00\\x97\\x00E\\x00D\\x00C\\x00B\\xc0\\x18\\x004\\x00\\x9b\\x00F\\xc0\\x0e\\xc0\\x04\\x00/\\x00\\x96\\x00A\\x00\\x07\\xc0\\x11\\xc0\\x07\\xc0\\x16\\x00\\x18\\xc0\\x0c\\xc0\\x02\\x00\\x05\\x00\\x04\\xc0\\x12\\xc0\\x08\\x00\\x16\\x00\\x13\\x00\\x10\\x00\\r\\xc0\\x17\\x00\\x1b\\xc0\\r\\xc0\\x03\\x00\\n\\x00\\x15\\x00\\x12\\x00\\x0f\\x00\\x0c\\x00', b'\\t\\x00\\x14\\x00\\x11\\x00\\x19\\x00\\x08\\x00\\x06\\x00\\x17\\x00\\x03\\xc0\\x10\\xc0\\x06\\xc0\\x15\\xc0\\x0b\\xc0\\x01']\n",
349-
"Bad pipe message: %s [b'\\xba\\x86D\\xb1\\x7fBR\\xd0\\xed1\\xd3\\xf21\\xc3\\xdc\\xdbl\\x8c\\x00\\x00\\xa2\\xc0\\x14\\xc0\\n\\x009\\x008\\x007\\x006\\x00\\x88\\x00\\x87\\x00\\x86\\x00\\x85\\xc0\\x19\\x00:\\x00\\x89\\xc0\\x0f\\xc0\\x05\\x005\\x00\\x84\\xc0\\x13\\xc0\\t\\x003\\x002\\x001\\x000\\x00\\x9a\\x00\\x99\\x00\\x98\\x00\\x97\\x00E\\x00D\\x00C\\x00B\\xc0\\x18\\x004\\x00\\x9b\\x00F\\xc0\\x0e\\xc0\\x04\\x00/\\x00\\x96\\x00A\\x00\\x07\\xc0\\x11\\xc0\\x07\\xc0\\x16\\x00\\x18\\xc0\\x0c\\xc0\\x02\\x00\\x05\\x00\\x04\\xc0\\x12\\xc0\\x08\\x00\\x16\\x00\\x13\\x00\\x10\\x00\\r\\xc0\\x17\\x00\\x1b\\xc0\\r\\xc0\\x03\\x00\\n\\x00\\x15\\x00\\x12\\x00\\x0f\\x00\\x0c\\x00\\x1a\\x00\\t\\x00\\x14\\x00\\x11\\x00\\x19\\x00\\x08\\x00', b'\\x17\\x00\\x03\\xc0\\x10']\n"
350-
]
351338
}
352339
],
353340
"source": [
@@ -483,15 +470,15 @@
483470
"Invoking: `python_repl_ast` with `{'query': \"df[df['date'].str.startswith('2020-07')]['hospitalizedIncrease'].sum()\"}`\n",
484471
"\n",
485472
"\n",
486-
"\u001b[0m\u001b[36;1m\u001b[1;3m63105\u001b[0m\u001b[32;1m\u001b[1;3mIn July 2020, there were 0 patients hospitalized in Texas. Nationwide, the total number of patients hospitalized in July 2020 across all states was 63,105.\u001b[0m\n",
473+
"\u001b[0m\u001b[36;1m\u001b[1;3m63105\u001b[0m\u001b[32;1m\u001b[1;3mIn July 2020, there were 0 patients hospitalized in Texas. Nationwide, the total number of hospitalized patients across all states in July 2020 was 63,105.\u001b[0m\n",
487474
"\n",
488475
"\u001b[1m> Finished chain.\u001b[0m\n"
489476
]
490477
},
491478
{
492479
"data": {
493480
"text/markdown": [
494-
"In July 2020, there were 0 patients hospitalized in Texas. Nationwide, the total number of patients hospitalized in July 2020 across all states was 63,105."
481+
"In July 2020, there were 0 patients hospitalized in Texas. Nationwide, the total number of hospitalized patients across all states in July 2020 was 63,105."
495482
],
496483
"text/plain": [
497484
"<IPython.core.display.Markdown object>"
@@ -504,8 +491,8 @@
504491
"name": "stdout",
505492
"output_type": "stream",
506493
"text": [
507-
"CPU times: user 196 ms, sys: 19.7 ms, total: 216 ms\n",
508-
"Wall time: 2.76 s\n"
494+
"CPU times: user 147 ms, sys: 16.5 ms, total: 163 ms\n",
495+
"Wall time: 1.33 s\n"
509496
]
510497
}
511498
],
@@ -578,7 +565,7 @@
578565
},
579566
{
580567
"cell_type": "code",
581-
"execution_count": 14,
568+
"execution_count": 13,
582569
"id": "c76e0a0c-b615-45d9-991d-035ddb28f09f",
583570
"metadata": {},
584571
"outputs": [
@@ -702,30 +689,30 @@
702689
"2 3380 \n",
703690
"3 0 \n",
704691
"4 45110 \u001b[0m\u001b[32;1m\u001b[1;3m\n",
705-
"Invoking: `python_repl_ast` with `{'query': \"df.loc[(df['date'].str.contains('2020-07')) & (df['state'] == 'TX'), 'hospitalizedIncrease'].sum()\"}`\n",
692+
"Invoking: `python_repl_ast` with `{'query': \"df[df['date'].str.contains('2020-07') & (df['state'] == 'TX')]['hospitalizedIncrease'].sum()\"}`\n",
706693
"\n",
707694
"\n",
708695
"\u001b[0m\u001b[36;1m\u001b[1;3m0\u001b[0m\u001b[32;1m\u001b[1;3m\n",
709-
"Invoking: `python_repl_ast` with `{'query': \"df.loc[df['date'].str.contains('2020-07'), 'hospitalizedIncrease'].sum()\"}`\n",
696+
"Invoking: `python_repl_ast` with `{'query': \"df[df['date'].str.contains('2020-07')]['hospitalizedIncrease'].sum()\"}`\n",
710697
"\n",
711698
"\n",
712-
"\u001b[0m\u001b[36;1m\u001b[1;3m63105\u001b[0m\u001b[32;1m\u001b[1;3mThe number of patients hospitalized during July 2020 in Texas is 0, and nationwide the total hospitalized patients for all states during July 2020 is 63,105.\n",
699+
"\u001b[0m\u001b[36;1m\u001b[1;3m63105\u001b[0m\u001b[32;1m\u001b[1;3mThe number of patients hospitalized in Texas during July 2020 was 0, and the nationwide total of all states was 63,105.\n",
713700
"\n",
714701
"Explanation:\n",
715-
"- To find the number of hospitalized patients in Texas during July 2020, we filtered the dataset for rows where the date contains '2020-07' and the state is 'TX'. Then we summed the values in the 'hospitalizedIncrease' column for Texas, which resulted in 0 hospitalizations.\n",
716-
"- To find the nationwide total of hospitalized patients for all states during July 2020, we filtered the dataset for rows where the date contains '2020-07' and summed the values in the 'hospitalizedIncrease' column, resulting in 63,105 hospitalizations.\u001b[0m\n",
702+
"- To find the number of patients hospitalized in Texas during July 2020, I filtered the dataset based on the date containing '2020-07' and the state being 'TX', then summed the 'hospitalizedIncrease' column. The result was 0.\n",
703+
"- To find the nationwide total for all states during July 2020, I filtered the dataset based on the date containing '2020-07' and summed the 'hospitalizedIncrease' column. The result was 63,105.\u001b[0m\n",
717704
"\n",
718705
"\u001b[1m> Finished chain.\u001b[0m\n"
719706
]
720707
},
721708
{
722709
"data": {
723710
"text/markdown": [
724-
"The number of patients hospitalized during July 2020 in Texas is 0, and nationwide the total hospitalized patients for all states during July 2020 is 63,105.\n",
711+
"The number of patients hospitalized in Texas during July 2020 was 0, and the nationwide total of all states was 63,105.\n",
725712
"\n",
726713
"Explanation:\n",
727-
"- To find the number of hospitalized patients in Texas during July 2020, we filtered the dataset for rows where the date contains '2020-07' and the state is 'TX'. Then we summed the values in the 'hospitalizedIncrease' column for Texas, which resulted in 0 hospitalizations.\n",
728-
"- To find the nationwide total of hospitalized patients for all states during July 2020, we filtered the dataset for rows where the date contains '2020-07' and summed the values in the 'hospitalizedIncrease' column, resulting in 63,105 hospitalizations."
714+
"- To find the number of patients hospitalized in Texas during July 2020, I filtered the dataset based on the date containing '2020-07' and the state being 'TX', then summed the 'hospitalizedIncrease' column. The result was 0.\n",
715+
"- To find the nationwide total for all states during July 2020, I filtered the dataset based on the date containing '2020-07' and summed the 'hospitalizedIncrease' column. The result was 63,105."
729716
],
730717
"text/plain": [
731718
"<IPython.core.display.Markdown object>"
@@ -738,8 +725,8 @@
738725
"name": "stdout",
739726
"output_type": "stream",
740727
"text": [
741-
"CPU times: user 488 ms, sys: 4.79 ms, total: 492 ms\n",
742-
"Wall time: 5.31 s\n"
728+
"CPU times: user 325 ms, sys: 5.25 ms, total: 331 ms\n",
729+
"Wall time: 2.66 s\n"
743730
]
744731
}
745732
],
@@ -762,7 +749,7 @@
762749
},
763750
{
764751
"cell_type": "code",
765-
"execution_count": 15,
752+
"execution_count": 14,
766753
"id": "42209997-aa2a-4b97-b94b-a203bc4c6096",
767754
"metadata": {},
768755
"outputs": [],
@@ -775,7 +762,7 @@
775762
},
776763
{
777764
"cell_type": "code",
778-
"execution_count": 16,
765+
"execution_count": 15,
779766
"id": "349c3020-3383-4ad3-83a4-07c1ead1207d",
780767
"metadata": {},
781768
"outputs": [

0 commit comments

Comments
 (0)