Initial Commit

master
Don Aldrich 3 years ago
commit be8a0afa28
  1. BIN
      .DS_Store
  2. 0
      .gitignore
  3. 8
      .vscode/settings.json
  4. 136
      documentation/weasyprint.ipynb
  5. 138
      playground/nlp/spacy.ipynb
  6. BIN
      unpublished/Unfamiliar tools/.DS_Store
  7. 3068
      unpublished/Unfamiliar tools/URL Watch.ipynb
  8. 0
      unpublished/Unfamiliar tools/kinto/.pypirc
  9. 177
      unpublished/Unfamiliar tools/kinto/Import_Export_Data.ipynb
  10. 35
      unpublished/Unfamiliar tools/kinto/Post_Job_Post.ipynb
  11. 2
      unpublished/Unfamiliar tools/kinto/dump.yaml
  12. 97
      unpublished/Unfamiliar tools/kinto/full_dump.yaml
  13. 10
      unpublished/Unfamiliar tools/kinto/pip.conf
  14. 79
      unpublished/Unfamiliar tools/kinto/record_dump.yaml
  15. 88
      unpublished/Unfamiliar tools/kinto/schema_dump.yaml
  16. 380
      unpublished/Unfamiliar tools/kinto/test_functional.py
  17. 86
      unpublished/Unfamiliar tools/kinto/upload.yaml
  18. 56
      unpublished/Unfamiliar tools/rich.ipynb
  19. 114
      unpublished/Unfamiliar tools/robot_framework/Untitled.ipynb
  20. 65
      unpublished/Unfamiliar tools/toapi.ipynb
  21. 5
      unpublished/Unfamiliar tools/untitled.py
  22. BIN
      unpublished/directus/.DS_Store
  23. 127
      unpublished/directus/Add Container Inventory to Directus.ipynb
  24. 326
      unpublished/directus/Add Homebrew Inventory to Directusa.ipynb
  25. 317
      unpublished/directus/Add Inventories to wiki.ipynb
  26. 193
      unpublished/directus/Add Linkace Inventory to Directus.ipynb
  27. 291
      unpublished/directus/Add traefic routers.ipynb
  28. 1559
      unpublished/directus/Dashmachine Config Template.ipynb
  29. 86
      unpublished/directus/Directus query.ipynb
  30. 101
      unpublished/directus/Untitled-Copy1.ipynb
  31. 353
      unpublished/directus/Untitled1.ipynb
  32. 324
      unpublished/directus/Update wiki-js.ipynb
  33. 110
      unpublished/directus/Wiki-js query.ipynb
  34. 455
      unpublished/directus/directus.ipynb
  35. 88
      unpublished/directus/get_data.ipynb
  36. 106
      unpublished/directus/template.ini
  37. 43
      unpublished/directus/template.test
  38. 3
      unpublished/directus/template.txt
  39. 106
      unpublished/directus/templates/template.ini
  40. 38
      unpublished/directus/templates/template.test
  41. 1190
      unpublished/directus/test.ini
  42. 1862
      unpublished/directus/test.json
  43. 300
      unpublished/directus/upload.ipynb
  44. 1387
      unpublished/docs/Regex_Basics.ipynb
  45. 579
      unpublished/docs/Scrapy.ipynb
  46. 673
      unpublished/docs/Scrapy_1.ipynb
  47. 194
      unpublished/docs/Scrapy_2.ipynb
  48. 246
      unpublished/docs/Scrapy_3.ipynb
  49. 246
      unpublished/docs/Scrapy_31.ipynb
  50. 405
      unpublished/docs/Selenium_1.ipynb
  51. 221
      unpublished/docs/Selenium_2.ipynb
  52. 64
      unpublished/docs/Untitled.ipynb
  53. BIN
      unpublished/job_scrape/.DS_Store
  54. 1712
      unpublished/job_scrape/Linkedin_EasyApply.ipynb
  55. 265
      unpublished/job_scrape/Upload_json.ipynb
  56. 2030
      unpublished/job_scrape/Widget List.ipynb
  57. BIN
      unpublished/job_scrape/data/.ipynb_checkpoints/cards_click-checkpoint.png
  58. 325
      unpublished/job_scrape/data/.ipynb_checkpoints/detail_panel-checkpoint.html
  59. BIN
      unpublished/job_scrape/data/.ipynb_checkpoints/foo-checkpoint.png
  60. BIN
      unpublished/job_scrape/data/.ipynb_checkpoints/full_page-checkpoint.png
  61. BIN
      unpublished/job_scrape/data/.ipynb_checkpoints/home-checkpoint.png
  62. 1212
      unpublished/job_scrape/data/.ipynb_checkpoints/jobID_2778653969-checkpoint.html
  63. 2651
      unpublished/job_scrape/data/.ipynb_checkpoints/jobID_2779424097-checkpoint.html
  64. 2648
      unpublished/job_scrape/data/.ipynb_checkpoints/jobID_2779881681-checkpoint.html
  65. BIN
      unpublished/job_scrape/data/.ipynb_checkpoints/job_card-checkpoint.png
  66. BIN
      unpublished/job_scrape/data/.ipynb_checkpoints/job_detail_page-checkpoint.png
  67. 102
      unpublished/job_scrape/data/.ipynb_checkpoints/jobs-checkpoint.json
  68. BIN
      unpublished/job_scrape/data/.ipynb_checkpoints/jobs_details-checkpoint.png
  69. 4261
      unpublished/job_scrape/data/.ipynb_checkpoints/page-checkpoint.html
  70. 6108
      unpublished/job_scrape/data/.ipynb_checkpoints/page1-checkpoint.html
  71. 2265
      unpublished/job_scrape/data/.ipynb_checkpoints/page1_list-checkpoint.html
  72. 4380
      unpublished/job_scrape/data/.ipynb_checkpoints/post_click-checkpoint.html
  73. BIN
      unpublished/job_scrape/data/.ipynb_checkpoints/post_click-checkpoint.png
  74. 2820
      unpublished/job_scrape/data/.ipynb_checkpoints/post_login-checkpoint.html
  75. BIN
      unpublished/job_scrape/data/.ipynb_checkpoints/post_login-checkpoint.png
  76. 4668
      unpublished/job_scrape/data/.ipynb_checkpoints/post_scrape-checkpoint.html
  77. BIN
      unpublished/job_scrape/data/.ipynb_checkpoints/query_results-checkpoint.png
  78. 6059
      unpublished/job_scrape/data/.ipynb_checkpoints/result-checkpoint.html
  79. 4171
      unpublished/job_scrape/data/.ipynb_checkpoints/resultSenior DevOps Engineer-checkpoint.html
  80. 0
      unpublished/job_scrape/data/.ipynb_checkpoints/session_cookies-checkpoint.json
  81. BIN
      unpublished/job_scrape/data/.ipynb_checkpoints/submit_apply-checkpoint.png
  82. 83
      unpublished/job_scrape/data/.ipynb_checkpoints/test-checkpoint.html
  83. BIN
      unpublished/job_scrape/data/.ipynb_checkpoints/title-checkpoint.png
  84. BIN
      unpublished/job_scrape/data/cards_click.png
  85. 311
      unpublished/job_scrape/data/detail_panel.html
  86. BIN
      unpublished/job_scrape/data/foo.png
  87. BIN
      unpublished/job_scrape/data/full_page.png
  88. BIN
      unpublished/job_scrape/data/home.png
  89. 2668
      unpublished/job_scrape/data/jobID_2778653969.html
  90. 2651
      unpublished/job_scrape/data/jobID_2779424097.html
  91. 2648
      unpublished/job_scrape/data/jobID_2779881681.html
  92. BIN
      unpublished/job_scrape/data/job_card.png
  93. BIN
      unpublished/job_scrape/data/job_detail_page.png
  94. 102
      unpublished/job_scrape/data/jobs.json
  95. BIN
      unpublished/job_scrape/data/jobs_details.png
  96. 2272
      unpublished/job_scrape/data/list_posts_1.html
  97. 4256
      unpublished/job_scrape/data/page.html
  98. 4623
      unpublished/job_scrape/data/page1.html
  99. 0
      unpublished/job_scrape/data/page1.json
  100. 2265
      unpublished/job_scrape/data/page1_list.html
  101. Some files were not shown because too many files have changed in this diff Show More

BIN
.DS_Store vendored

Binary file not shown.

@ -0,0 +1,8 @@
{
"python.formatting.provider": "black",
"python.testing.pytestArgs": [
"awesome_list"
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true
}

@ -0,0 +1,136 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "77e0481d-3f52-469d-a7bf-d0e705e5a2a4",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[33mDEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621\u001b[0m\u001b[33m\n",
"\u001b[0mCollecting weasyprint\n",
" Downloading weasyprint-55.0-py3-none-any.whl (249 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m249.5/249.5 kB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: Pillow>=4.0.0 in /opt/homebrew/lib/python3.9/site-packages (from weasyprint) (9.0.1)\n",
"Collecting cssselect2>=0.1\n",
" Downloading cssselect2-0.6.0-py3-none-any.whl (15 kB)\n",
"Collecting pydyf>=0.0.3\n",
" Downloading pydyf-0.2.0-py3-none-any.whl (6.6 kB)\n",
"Collecting Pyphen>=0.9.1\n",
" Downloading pyphen-0.12.0-py3-none-any.whl (2.0 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m13.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: tinycss2>=1.0.0 in /opt/homebrew/lib/python3.9/site-packages (from weasyprint) (1.1.1)\n",
"Requirement already satisfied: cffi>=0.6 in /opt/homebrew/lib/python3.9/site-packages (from weasyprint) (1.15.0)\n",
"Collecting html5lib>=1.1\n",
" Downloading html5lib-1.1-py2.py3-none-any.whl (112 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m112.2/112.2 kB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting fonttools[woff]>=4.0.0\n",
" Downloading fonttools-4.33.3-py3-none-any.whl (930 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m930.9/930.9 kB\u001b[0m \u001b[31m16.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: pycparser in /opt/homebrew/lib/python3.9/site-packages (from cffi>=0.6->weasyprint) (2.21)\n",
"Requirement already satisfied: webencodings in /opt/homebrew/lib/python3.9/site-packages (from cssselect2>=0.1->weasyprint) (0.5.1)\n",
"Collecting brotli>=1.0.1\n",
" Downloading Brotli-1.0.9-cp39-cp39-macosx_10_9_universal2.whl (786 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m786.7/786.7 kB\u001b[0m \u001b[31m3.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
"\u001b[?25hCollecting zopfli>=0.1.4\n",
" Downloading zopfli-0.2.1-cp39-cp39-macosx_10_9_universal2.whl (289 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m289.8/289.8 kB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: six>=1.9 in /opt/homebrew/lib/python3.9/site-packages (from html5lib>=1.1->weasyprint) (1.12.0)\n",
"Installing collected packages: brotli, zopfli, Pyphen, pydyf, html5lib, fonttools, cssselect2, weasyprint\n",
"\u001b[33m DEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621\u001b[0m\u001b[33m\n",
"\u001b[0m\u001b[33m DEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621\u001b[0m\u001b[33m\n",
"\u001b[0m\u001b[33m DEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621\u001b[0m\u001b[33m\n",
"\u001b[0m\u001b[33m DEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621\u001b[0m\u001b[33m\n",
"\u001b[0m\u001b[33m DEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621\u001b[0m\u001b[33m\n",
"\u001b[0m\u001b[33m DEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621\u001b[0m\u001b[33m\n",
"\u001b[0m\u001b[33m DEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621\u001b[0m\u001b[33m\n",
"\u001b[0m\u001b[33m DEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621\u001b[0m\u001b[33m\n",
"\u001b[0m\u001b[33mDEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621\u001b[0m\u001b[33m\n",
"\u001b[0mSuccessfully installed Pyphen-0.12.0 brotli-1.0.9 cssselect2-0.6.0 fonttools-4.33.3 html5lib-1.1 pydyf-0.2.0 weasyprint-55.0 zopfli-0.2.1\n",
"\u001b[33mWARNING: There was an error checking the latest version of pip.\u001b[0m\u001b[33m\n",
"\u001b[0m"
]
}
],
"source": [
"!pip install weasyprint"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "db0a9ff4-403f-472e-8e1e-2fa36062e35d",
"metadata": {
"tags": []
},
"outputs": [
{
"ename": "FileNotFoundError",
"evalue": "[Errno 2] No such file or directory: '/User/donaldrich/Desktop/weasyprint-website.pdf'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m/Users/donaldrich/Projects/secrets/code/Weazyprint/weasyprint.ipynb Cell 2'\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/donaldrich/Projects/secrets/code/Weazyprint/weasyprint.ipynb#ch0000001?line=0'>1</a>\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mweasyprint\u001b[39;00m \u001b[39mimport\u001b[39;00m HTML\n\u001b[0;32m----> <a href='vscode-notebook-cell:/Users/donaldrich/Projects/secrets/code/Weazyprint/weasyprint.ipynb#ch0000001?line=2'>3</a>\u001b[0m HTML(\u001b[39m\"\u001b[39;49m\u001b[39mhttp://weasyprint.org/\u001b[39;49m\u001b[39m\"\u001b[39;49m)\u001b[39m.\u001b[39;49mwrite_pdf(\u001b[39m\"\u001b[39;49m\u001b[39m/User/donaldrich/Desktop/weasyprint-website.pdf\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n",
"File \u001b[0;32m/opt/homebrew/lib/python3.9/site-packages/weasyprint/__init__.py:177\u001b[0m, in \u001b[0;36mHTML.write_pdf\u001b[0;34m(self, target, stylesheets, zoom, attachments, finisher, presentational_hints, optimize_size, font_config, counter_style, image_cache)\u001b[0m\n\u001b[1;32m 132\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mwrite_pdf\u001b[39m(\u001b[39mself\u001b[39m, target\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, stylesheets\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, zoom\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m,\n\u001b[1;32m 133\u001b[0m attachments\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, finisher\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, presentational_hints\u001b[39m=\u001b[39m\u001b[39mFalse\u001b[39;00m,\n\u001b[1;32m 134\u001b[0m optimize_size\u001b[39m=\u001b[39m(\u001b[39m'\u001b[39m\u001b[39mfonts\u001b[39m\u001b[39m'\u001b[39m,), font_config\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m,\n\u001b[1;32m 135\u001b[0m counter_style\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, image_cache\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m):\n\u001b[1;32m 136\u001b[0m \u001b[39m\"\"\"Render the document to a PDF file.\u001b[39;00m\n\u001b[1;32m 137\u001b[0m \n\u001b[1;32m 138\u001b[0m \u001b[39m This is a shortcut for calling :meth:`render`, then\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 174\u001b[0m \n\u001b[1;32m 175\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m 176\u001b[0m \u001b[39mreturn\u001b[39;00m (\n\u001b[0;32m--> 177\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mrender(\n\u001b[1;32m 178\u001b[0m stylesheets, presentational_hints\u001b[39m=\u001b[39;49mpresentational_hints,\n\u001b[1;32m 179\u001b[0m optimize_size\u001b[39m=\u001b[39;49moptimize_size, font_config\u001b[39m=\u001b[39;49mfont_config,\n\u001b[1;32m 180\u001b[0m counter_style\u001b[39m=\u001b[39;49mcounter_style, image_cache\u001b[39m=\u001b[39;49mimage_cache)\n\u001b[1;32m 181\u001b[0m \u001b[39m.\u001b[39;49mwrite_pdf(target, zoom, attachments, finisher))\n",
"File \u001b[0;32m/opt/homebrew/lib/python3.9/site-packages/weasyprint/document.py:853\u001b[0m, in \u001b[0;36mDocument.write_pdf\u001b[0;34m(self, target, zoom, attachments, finisher)\u001b[0m\n\u001b[1;32m 851\u001b[0m shutil\u001b[39m.\u001b[39mcopyfileobj(file_obj, target)\n\u001b[1;32m 852\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 853\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39;49m(target, \u001b[39m'\u001b[39;49m\u001b[39mwb\u001b[39;49m\u001b[39m'\u001b[39;49m) \u001b[39mas\u001b[39;00m fd:\n\u001b[1;32m 854\u001b[0m shutil\u001b[39m.\u001b[39mcopyfileobj(file_obj, fd)\n",
"\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '/User/donaldrich/Desktop/weasyprint-website.pdf'"
]
}
],
"source": [
"from weasyprint import HTML\n",
"\n",
"HTML(\"http://weasyprint.org/\").write_pdf(\"/User/donaldrich/Desktop/weasyprint.pdf\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b66cdca8-02c4-4761-84b1-a0a91991fb0a",
"metadata": {},
"outputs": [],
"source": [
"https://selenium-python.readthedocs.io/_/downloads/en/latest/pdf/"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.9.13 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
},
"vscode": {
"interpreter": {
"hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
}
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"state": {},
"version_major": 2,
"version_minor": 0
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}

@ -0,0 +1,138 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 7,
"id": "4b1a1586-0823-4046-ab19-0fa99a0f241b",
"metadata": {
"scrolled": true,
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: spacy in /opt/conda/lib/python3.10/site-packages (3.3.1)\n",
"Requirement already satisfied: pathy>=0.3.5 in /opt/conda/lib/python3.10/site-packages (from spacy) (0.6.1)\n",
"Requirement already satisfied: thinc<8.1.0,>=8.0.14 in /opt/conda/lib/python3.10/site-packages (from spacy) (8.0.17)\n",
"Requirement already satisfied: numpy>=1.15.0 in /opt/conda/lib/python3.10/site-packages (from spacy) (1.21.6)\n",
"Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /opt/conda/lib/python3.10/site-packages (from spacy) (1.0.2)\n",
"Requirement already satisfied: requests<3.0.0,>=2.13.0 in /opt/conda/lib/python3.10/site-packages (from spacy) (2.28.0)\n",
"Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /opt/conda/lib/python3.10/site-packages (from spacy) (3.3.0)\n",
"Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /opt/conda/lib/python3.10/site-packages (from spacy) (2.0.6)\n",
"Requirement already satisfied: blis<0.8.0,>=0.4.0 in /opt/conda/lib/python3.10/site-packages (from spacy) (0.7.7)\n",
"Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /opt/conda/lib/python3.10/site-packages (from spacy) (1.0.7)\n",
"Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /opt/conda/lib/python3.10/site-packages (from spacy) (3.0.6)\n",
"Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /opt/conda/lib/python3.10/site-packages (from spacy) (2.4.3)\n",
"Requirement already satisfied: packaging>=20.0 in /home/jovyan/.local/lib/python3.10/site-packages (from spacy) (21.3)\n",
"Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /opt/conda/lib/python3.10/site-packages (from spacy) (2.0.7)\n",
"Requirement already satisfied: wasabi<1.1.0,>=0.9.1 in /opt/conda/lib/python3.10/site-packages (from spacy) (0.9.1)\n",
"Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /opt/conda/lib/python3.10/site-packages (from spacy) (4.64.0)\n",
"Requirement already satisfied: setuptools in /opt/conda/lib/python3.10/site-packages (from spacy) (62.3.4)\n",
"Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.9 in /opt/conda/lib/python3.10/site-packages (from spacy) (3.0.9)\n",
"Requirement already satisfied: jinja2 in /opt/conda/lib/python3.10/site-packages (from spacy) (3.1.2)\n",
"Requirement already satisfied: typer<0.5.0,>=0.3.0 in /opt/conda/lib/python3.10/site-packages (from spacy) (0.4.1)\n",
"Requirement already satisfied: pydantic!=1.8,!=1.8.1,<1.9.0,>=1.7.4 in /opt/conda/lib/python3.10/site-packages (from spacy) (1.8.2)\n",
"Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /opt/conda/lib/python3.10/site-packages (from packaging>=20.0->spacy) (3.0.9)\n",
"Requirement already satisfied: smart-open<6.0.0,>=5.0.0 in /opt/conda/lib/python3.10/site-packages (from pathy>=0.3.5->spacy) (5.2.1)\n",
"Requirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/lib/python3.10/site-packages (from pydantic!=1.8,!=1.8.1,<1.9.0,>=1.7.4->spacy) (4.2.0)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests<3.0.0,>=2.13.0->spacy) (2022.6.15)\n",
"Requirement already satisfied: charset-normalizer~=2.0.0 in /opt/conda/lib/python3.10/site-packages (from requests<3.0.0,>=2.13.0->spacy) (2.0.12)\n",
"Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests<3.0.0,>=2.13.0->spacy) (3.3)\n",
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests<3.0.0,>=2.13.0->spacy) (1.26.9)\n",
"Requirement already satisfied: click<9.0.0,>=7.1.1 in /opt/conda/lib/python3.10/site-packages (from typer<0.5.0,>=0.3.0->spacy) (8.1.3)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.10/site-packages (from jinja2->spacy) (2.1.1)\n",
"Collecting en-core-web-sm==3.3.0\n",
" Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.3.0/en_core_web_sm-3.3.0-py3-none-any.whl (12.8 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.8/12.8 MB\u001b[0m \u001b[31m21.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: spacy<3.4.0,>=3.3.0.dev0 in /opt/conda/lib/python3.10/site-packages (from en-core-web-sm==3.3.0) (3.3.1)\n",
"Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /opt/conda/lib/python3.10/site-packages (from spacy<3.4.0,>=3.3.0.dev0->en-core-web-sm==3.3.0) (2.4.3)\n",
"Requirement already satisfied: pydantic!=1.8,!=1.8.1,<1.9.0,>=1.7.4 in /opt/conda/lib/python3.10/site-packages (from spacy<3.4.0,>=3.3.0.dev0->en-core-web-sm==3.3.0) (1.8.2)\n",
"Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /opt/conda/lib/python3.10/site-packages (from spacy<3.4.0,>=3.3.0.dev0->en-core-web-sm==3.3.0) (4.64.0)\n",
"Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /opt/conda/lib/python3.10/site-packages (from spacy<3.4.0,>=3.3.0.dev0->en-core-web-sm==3.3.0) (3.0.6)\n",
"Requirement already satisfied: setuptools in /opt/conda/lib/python3.10/site-packages (from spacy<3.4.0,>=3.3.0.dev0->en-core-web-sm==3.3.0) (62.3.4)\n",
"Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.9 in /opt/conda/lib/python3.10/site-packages (from spacy<3.4.0,>=3.3.0.dev0->en-core-web-sm==3.3.0) (3.0.9)\n",
"Requirement already satisfied: typer<0.5.0,>=0.3.0 in /opt/conda/lib/python3.10/site-packages (from spacy<3.4.0,>=3.3.0.dev0->en-core-web-sm==3.3.0) (0.4.1)\n",
"Requirement already satisfied: wasabi<1.1.0,>=0.9.1 in /opt/conda/lib/python3.10/site-packages (from spacy<3.4.0,>=3.3.0.dev0->en-core-web-sm==3.3.0) (0.9.1)\n",
"Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /opt/conda/lib/python3.10/site-packages (from spacy<3.4.0,>=3.3.0.dev0->en-core-web-sm==3.3.0) (1.0.2)\n",
"Requirement already satisfied: packaging>=20.0 in /home/jovyan/.local/lib/python3.10/site-packages (from spacy<3.4.0,>=3.3.0.dev0->en-core-web-sm==3.3.0) (21.3)\n",
"Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /opt/conda/lib/python3.10/site-packages (from spacy<3.4.0,>=3.3.0.dev0->en-core-web-sm==3.3.0) (3.3.0)\n",
"Requirement already satisfied: blis<0.8.0,>=0.4.0 in /opt/conda/lib/python3.10/site-packages (from spacy<3.4.0,>=3.3.0.dev0->en-core-web-sm==3.3.0) (0.7.7)\n",
"Requirement already satisfied: requests<3.0.0,>=2.13.0 in /opt/conda/lib/python3.10/site-packages (from spacy<3.4.0,>=3.3.0.dev0->en-core-web-sm==3.3.0) (2.28.0)\n",
"Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /opt/conda/lib/python3.10/site-packages (from spacy<3.4.0,>=3.3.0.dev0->en-core-web-sm==3.3.0) (2.0.6)\n",
"Requirement already satisfied: numpy>=1.15.0 in /opt/conda/lib/python3.10/site-packages (from spacy<3.4.0,>=3.3.0.dev0->en-core-web-sm==3.3.0) (1.21.6)\n",
"Requirement already satisfied: thinc<8.1.0,>=8.0.14 in /opt/conda/lib/python3.10/site-packages (from spacy<3.4.0,>=3.3.0.dev0->en-core-web-sm==3.3.0) (8.0.17)\n",
"Requirement already satisfied: pathy>=0.3.5 in /opt/conda/lib/python3.10/site-packages (from spacy<3.4.0,>=3.3.0.dev0->en-core-web-sm==3.3.0) (0.6.1)\n",
"Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /opt/conda/lib/python3.10/site-packages (from spacy<3.4.0,>=3.3.0.dev0->en-core-web-sm==3.3.0) (1.0.7)\n",
"Requirement already satisfied: jinja2 in /opt/conda/lib/python3.10/site-packages (from spacy<3.4.0,>=3.3.0.dev0->en-core-web-sm==3.3.0) (3.1.2)\n",
"Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /opt/conda/lib/python3.10/site-packages (from spacy<3.4.0,>=3.3.0.dev0->en-core-web-sm==3.3.0) (2.0.7)\n",
"Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /opt/conda/lib/python3.10/site-packages (from packaging>=20.0->spacy<3.4.0,>=3.3.0.dev0->en-core-web-sm==3.3.0) (3.0.9)\n",
"Requirement already satisfied: smart-open<6.0.0,>=5.0.0 in /opt/conda/lib/python3.10/site-packages (from pathy>=0.3.5->spacy<3.4.0,>=3.3.0.dev0->en-core-web-sm==3.3.0) (5.2.1)\n",
"Requirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/lib/python3.10/site-packages (from pydantic!=1.8,!=1.8.1,<1.9.0,>=1.7.4->spacy<3.4.0,>=3.3.0.dev0->en-core-web-sm==3.3.0) (4.2.0)\n",
"Requirement already satisfied: charset-normalizer~=2.0.0 in /opt/conda/lib/python3.10/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.4.0,>=3.3.0.dev0->en-core-web-sm==3.3.0) (2.0.12)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.4.0,>=3.3.0.dev0->en-core-web-sm==3.3.0) (2022.6.15)\n",
"Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.4.0,>=3.3.0.dev0->en-core-web-sm==3.3.0) (3.3)\n",
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.4.0,>=3.3.0.dev0->en-core-web-sm==3.3.0) (1.26.9)\n",
"Requirement already satisfied: click<9.0.0,>=7.1.1 in /opt/conda/lib/python3.10/site-packages (from typer<0.5.0,>=0.3.0->spacy<3.4.0,>=3.3.0.dev0->en-core-web-sm==3.3.0) (8.1.3)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.10/site-packages (from jinja2->spacy<3.4.0,>=3.3.0.dev0->en-core-web-sm==3.3.0) (2.1.1)\n",
"Installing collected packages: en-core-web-sm\n",
"Successfully installed en-core-web-sm-3.3.0\n",
"\u001b[38;5;2m✔ Download and installation successful\u001b[0m\n",
"You can now load the package via spacy.load('en_core_web_sm')\n"
]
}
],
"source": [
"!pip install -U spacy\n",
"!python -m spacy download en_core_web_sm"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b0d80ae8-94a9-45b6-af8b-790dbb12e469",
"metadata": {},
"outputs": [],
"source": [
"import spacy\n",
"from spacy import displacy\n",
"\n",
"text = \"When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously.\"\n",
"\n",
"nlp = spacy.load(\"en_core_web_sm\")\n",
"doc = nlp(text)\n",
"displacy.serve(doc, style=\"ent\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b2b9b88c-343b-4839-b56a-48423773a03b",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

Binary file not shown.

File diff suppressed because it is too large Load Diff

@ -0,0 +1,177 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "2ccc0579-93b3-4f31-944e-682159034c96",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"b'ZG9uYWxkcmljaDppYmFuZXo2OQ=='\n"
]
}
],
"source": [
"# <!-- base64(\"username:password\") -->\n",
"\n",
"import base64\n",
"\n",
"encoded = base64.b64encode(b'username:password')\n",
"\n",
"print(encoded)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "537efa07",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Defaulting to user installation because normal site-packages is not writeable\n",
"Requirement already satisfied: kinto-wizard in /home/donaldrich/.local/lib/python3.8/site-packages (4.0.1)\n",
"Requirement already satisfied: kinto-http in /home/donaldrich/.local/lib/python3.8/site-packages (10.7.0)\n",
"Requirement already satisfied: unidecode in /home/donaldrich/.local/lib/python3.8/site-packages (from kinto-http) (1.2.0)\n",
"Requirement already satisfied: requests>=2.8.1 in /usr/lib/python3/dist-packages (from kinto-http) (2.22.0)\n",
"Requirement already satisfied: ruamel.yaml in /home/donaldrich/.local/lib/python3.8/site-packages (from kinto-wizard) (0.17.16)\n",
"Requirement already satisfied: jsonschema in /usr/lib/python3/dist-packages (from kinto-wizard) (3.2.0)\n",
"Requirement already satisfied: ruamel.yaml.clib>=0.1.2 in /home/donaldrich/.local/lib/python3.8/site-packages (from ruamel.yaml->kinto-wizard) (0.2.6)\n",
"\u001b[33mWARNING: You are using pip version 21.0.1; however, version 21.2.4 is available.\n",
"You should consider upgrading via the '/usr/bin/python -m pip install --upgrade pip' command.\u001b[0m\n"
]
}
],
"source": [
"# import sys\n",
"!{sys.executable} -m pip install kinto-wizard kinto-http\n",
"\n",
"# import builtinsss\n",
"import io\n",
"import os\n",
"# import pytest\n",
"# import unittest\n",
"# import sys\n",
"# from contextlib import contextmanager, redirect_stdout\n",
"import yaml\n",
"import requests\n",
"\n",
"from kinto_http import Client, exceptions\n",
"from kinto_wizard.__main__ import main\n"
]
},
{
"cell_type": "markdown",
"id": "56336648",
"metadata": {},
"source": [
"## Export Schema"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "334aa655",
"metadata": {},
"outputs": [],
"source": [
"stream = os.popen(\n",
" 'kinto-wizard dump --data --server http://kinto:8888/v1 --auth username:password'\n",
")\n",
"output = stream.read()\n",
"yaml_out = yaml.load(output, Loader=yaml.FullLoader)\n",
"with open(r'dump.yaml', 'w') as file:\n",
" documents = yaml.dump(yaml_out, file)"
]
},
{
"cell_type": "markdown",
"id": "acde56d6",
"metadata": {},
"source": [
"# Upload"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "cbe39405",
"metadata": {},
"outputs": [
{
"ename": "FileNotFoundError",
"evalue": "[Errno 2] No such file or directory: 'full.yaml'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-24-13c58aac364e>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0myaml\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mr'full.yaml'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'r'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mfile\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0mdocuments\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0myaml\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfile\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mLoader\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0myaml\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mFullLoader\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'full.yaml'"
]
}
],
"source": [
"import yaml\n",
"\n",
"with open(r'full.yaml', 'r') as file:\n",
" documents = yaml.load(file, Loader=yaml.FullLoader)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "56c38c26",
"metadata": {},
"outputs": [],
"source": [
"# import sys\n",
"# !{sys.executable} -m pip install kinto-wizard kinto-http\n",
"\n",
"import os\n",
"\n",
"stream = os.popen(\n",
" 'kinto-wizard load --server http://kinto:8888/v1 --auth username:password upload.yaml'\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a70021ec",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.9.13 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
},
"vscode": {
"interpreter": {
"hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}

@ -0,0 +1,35 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"response = requests.post('https://json.donavanaldrich.com/v1/buckets', json = [object_list])\n",
"response = requests.post('https://json.donavanaldrich.com/v1/buckets', json = [object_list])\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

@ -0,0 +1,97 @@
buckets:
configurations:
collections:
editors:
data:
id: editors
last_modified: 1630337131580
permissions:
read:
- account:donaldrich
- system.Everyone
record:create:
- account:donaldrich
write:
- account:donaldrich
records: {}
data:
id: configurations
last_modified: 1630337131544
groups: {}
permissions:
collection:create:
- account:donaldrich
group:create:
- account:donaldrich
read:
- account:donaldrich
- system.Everyone
write:
- account:donaldrich
grapesjs:
collections:
templates:
data:
id: templates
last_modified: 1630357574437
permissions:
read:
- account:donaldrich
- system.Everyone
record:create:
- account:donaldrich
- system.Everyone
write:
- account:donaldrich
- system.Everyone
records: {}
data:
id: grapesjs
last_modified: 1630357574392
groups: {}
permissions:
collection:create:
- account:donaldrich
group:create:
- account:donaldrich
read:
- account:donaldrich
- system.Everyone
write:
- account:donaldrich
- system.Everyone
job_posts:
collections:
ziprecruiter:
data:
id: ziprecruiter
last_modified: 1630337131788
permissions:
read:
- account:donaldrich
- system.Everyone
record:create:
- account:donaldrich
write:
- account:donaldrich
records:
44c22342-6c54-4777-867e-b4da365a691f:
data:
hi: test
id: 44c22342-6c54-4777-867e-b4da365a691f
last_modified: 1630399491080
permissions: {}
data:
id: job_posts
last_modified: 1630337131648
groups: {}
permissions:
collection:create:
- account:donaldrich
group:create:
- account:donaldrich
read:
- account:donaldrich
- system.Everyone
write:
- account:donaldrich

@ -0,0 +1,10 @@
[global]
index = http://localhost:8081/repository/pypi-all/pypi
index-url = http://localhost:8081/repository/pypi-all/simple
[distutils]
index-servers =
pypi
[pypi]
repository: http://localhost:8081/repository/pypi-internal/
username: admin
password: admin123

@ -0,0 +1,79 @@
buckets:
configurations:
collections:
editors:
permissions:
read:
- account:donaldrich
- system.Everyone
record:create:
- account:donaldrich
write:
- account:donaldrich
records: {}
groups: {}
permissions:
collection:create:
- account:donaldrich
group:create:
- account:donaldrich
read:
- account:donaldrich
- system.Everyone
write:
- account:donaldrich
grapesjs:
collections:
templates:
permissions:
read:
- account:donaldrich
- system.Everyone
record:create:
- account:donaldrich
- system.Everyone
write:
- account:donaldrich
- system.Everyone
records: {}
groups: {}
permissions:
collection:create:
- account:donaldrich
group:create:
- account:donaldrich
read:
- account:donaldrich
- system.Everyone
write:
- account:donaldrich
- system.Everyone
job_posts:
collections:
ziprecruiter:
permissions:
read:
- account:donaldrich
- system.Everyone
record:create:
- account:donaldrich
write:
- account:donaldrich
records:
44c22342-6c54-4777-867e-b4da365a691f:
data:
hi: test
id: 44c22342-6c54-4777-867e-b4da365a691f
last_modified: 1630399491080
permissions: {}
groups: {}
permissions:
collection:create:
- account:donaldrich
group:create:
- account:donaldrich
read:
- account:donaldrich
- system.Everyone
write:
- account:donaldrich

@ -0,0 +1,88 @@
buckets:
configurations:
collections:
editors:
data:
id: editors
last_modified: 1630337131580
permissions:
read:
- account:donaldrich
- system.Everyone
record:create:
- account:donaldrich
write:
- account:donaldrich
data:
id: configurations
last_modified: 1630337131544
groups: {}
permissions:
collection:create:
- account:donaldrich
group:create:
- account:donaldrich
read:
- account:donaldrich
- system.Everyone
write:
- account:donaldrich
grapesjs:
collections:
templates:
data:
id: templates
last_modified: 1630357574437
permissions:
read:
- account:donaldrich
- system.Everyone
record:create:
- account:donaldrich
- system.Everyone
write:
- account:donaldrich
- system.Everyone
data:
id: grapesjs
last_modified: 1630357574392
groups: {}
permissions:
collection:create:
- account:donaldrich
group:create:
- account:donaldrich
read:
- account:donaldrich
- system.Everyone
write:
- account:donaldrich
- system.Everyone
job_posts:
collections:
ziprecruiter:
data:
id: ziprecruiter
last_modified: 1630337131788
permissions:
read:
- account:donaldrich
- system.Everyone
record:create:
- account:donaldrich
write:
- account:donaldrich
data:
id: job_posts
last_modified: 1630337131648
groups: {}
permissions:
collection:create:
- account:donaldrich
group:create:
- account:donaldrich
read:
- account:donaldrich
- system.Everyone
write:
- account:donaldrich

@ -0,0 +1,380 @@
import builtins
import io
import os
import pytest
import unittest
import sys
from contextlib import contextmanager, redirect_stdout
import requests
from kinto_http import Client, exceptions
from kinto_wizard.__main__ import main
def load(server, auth, file, bucket=None, collection=None, extra=None):
cmd = 'kinto-wizard {} --server={} --auth={}'
if bucket:
cmd += ' --bucket={}'.format(bucket)
if collection:
cmd += ' --collection={}'.format(collection)
if extra:
cmd += ' ' + extra
load_cmd = cmd.format("load {}".format(file), server, auth)
sys.argv = load_cmd.strip().split(" ")
return main()
def dump(server, auth, bucket=None, collection=None):
cmd = 'kinto-wizard {} --server={} --auth={}'
dump_cmd = cmd.format("dump --full", server, auth)
if bucket:
dump_cmd += ' --bucket={}'.format(bucket)
if collection:
dump_cmd += ' --collection={}'.format(collection)
sys.argv = dump_cmd.split(" ")
output = io.StringIO()
with redirect_stdout(output):
main()
output.flush()
# Check that identical to original file.
return output.getvalue()
def validate(filename):
sys.argv = ['kinto-wizard', 'validate', filename]
return main()
class FunctionalTest(unittest.TestCase):
server = os.getenv("SERVER_URL", "http://localhost:8888/v1")
auth = os.getenv("AUTH", "user:pass")
file = os.getenv("FILE", "tests/kinto.yaml")
def setUp(self):
requests.post(self.server + "/__flush__")
def load(self, bucket=None, collection=None, filename=None, extra=None):
return load(self.server, self.auth, filename or self.file, bucket, collection, extra)
def dump(self, bucket=None, collection=None):
return dump(self.server, self.auth, bucket, collection)
def validate(self, filename=None, code=0):
try:
validate(filename or self.file)
except SystemExit as e:
if e.code == code:
return
else:
self.fail(f"Unexpected validation status {e.code} != {code}")
class DryRunLoad(FunctionalTest):
def test_dry_round_trip(self):
cmd = 'kinto-wizard {} --server={} --auth={} --dry-run'
load_cmd = cmd.format("load {}".format(self.file),
self.server, self.auth)
sys.argv = load_cmd.split(" ")
main()
client = Client(server_url=self.server, auth=tuple(self.auth.split(':')))
with pytest.raises(exceptions.KintoException):
client.get_bucket(id="staging")
@contextmanager
def mockInput(mock):
original_input = builtins.input
builtins.input = lambda _: mock
yield
builtins.input = original_input
class SimpleDump(FunctionalTest):
def test_round_trip(self):
cmd = 'kinto-wizard {} --server={} --auth={}'
load_cmd = cmd.format("load {}".format(self.file),
self.server, self.auth)
sys.argv = load_cmd.split(" ")
main()
dump_cmd = cmd.format("dump", self.server, self.auth)
sys.argv = dump_cmd.split(" ")
output = io.StringIO()
with redirect_stdout(output):
main()
output.flush()
# Check that identical to original file.
generated = output.getvalue()
with open(self.file) as f:
assert f.read() == generated
class FullDump(FunctionalTest):
file = os.getenv("FILE", "tests/kinto-full.yaml")
def test_round_trip(self):
# Load some data
cmd = 'kinto-wizard {} --server={} --auth={}'
load_cmd = cmd.format("load {}".format(self.file),
self.server, self.auth)
sys.argv = load_cmd.split(" ")
main()
cmd = 'kinto-wizard {} --server={} --auth={} --full'
load_cmd = cmd.format("dump", self.server, self.auth)
sys.argv = load_cmd.split(" ")
output = io.StringIO()
with redirect_stdout(output):
main()
output.flush()
# Check that identical to original file.
generated = output.getvalue()
with open(self.file) as f:
assert f.read() == generated
def test_round_trip_with_client_wins(self):
# Load some data
cmd = 'kinto-wizard {} --server={} --auth={}'
load_cmd = cmd.format("load {}".format(self.file),
self.server, self.auth)
sys.argv = load_cmd.split(" ")
main()
# Change something that could make the server to fail.
client = Client(server_url=self.server, auth=tuple(self.auth.split(':')))
client.update_record(bucket='build-hub', collection='archives',
id='0831d549-0a69-48dd-b240-feef94688d47', data={})
record = client.get_record(bucket='build-hub', collection='archives',
id='0831d549-0a69-48dd-b240-feef94688d47')
assert set(record['data'].keys()) == {'id', 'last_modified'}
cmd = 'kinto-wizard {} --server={} -D --auth={} --force'
load_cmd = cmd.format("load {}".format(self.file),
self.server, self.auth)
sys.argv = load_cmd.split(" ")
main()
record = client.get_record(bucket='build-hub', collection='archives',
id='0831d549-0a69-48dd-b240-feef94688d47')
assert set(record['data'].keys()) != {'id', 'last_modified'}
def test_round_trip_with_client_wins_and_delete_missing_records(self):
# Load some data
cmd = 'kinto-wizard {} --server={} --auth={}'
load_cmd = cmd.format("load {}".format(self.file),
self.server, self.auth)
sys.argv = load_cmd.split(" ")
main()
# Change something that could make the server to fail.
client = Client(server_url=self.server, auth=tuple(self.auth.split(':')))
client.create_record(bucket='build-hub', collection='archives',
id='8031d549-0a69-48dd-b240-feef94688d47', data={})
cmd = 'kinto-wizard {} --server={} -D --auth={} --force --delete-records'
load_cmd = cmd.format("load {}".format(self.file),
self.server, self.auth)
sys.argv = load_cmd.split(" ")
main()
with pytest.raises(exceptions.KintoException) as exc:
client.get_record(bucket='build-hub', collection='archives',
id='8031d549-0a69-48dd-b240-feef94688d47')
assert "'Not Found'" in str(exc.value)
def test_round_trip_with_delete_missing_records_ask_for_confirmation(self):
# Load some data
cmd = 'kinto-wizard {} --server={} --auth={}'
load_cmd = cmd.format("load {}".format(self.file),
self.server, self.auth)
sys.argv = load_cmd.split(" ")
main()
# Change something that could make the server to fail.
client = Client(server_url=self.server, auth=tuple(self.auth.split(':')))
client.create_record(bucket='build-hub', collection='archives',
id='8031d549-0a69-48dd-b240-feef94688d47', data={})
cmd = 'kinto-wizard {} --server={} -D --auth={} --delete-records'
load_cmd = cmd.format("load {}".format(self.file),
self.server, self.auth)
sys.argv = load_cmd.split(" ")
with mockInput('yes'):
main()
with pytest.raises(exceptions.KintoException) as exc:
client.get_record(bucket='build-hub', collection='archives',
id='8031d549-0a69-48dd-b240-feef94688d47')
assert "'Not Found'" in str(exc.value)
def test_round_trip_with_delete_missing_records_handle_misconfirmation(self):
# Load some data
cmd = 'kinto-wizard {} --server={} --auth={}'
load_cmd = cmd.format("load {}".format(self.file),
self.server, self.auth)
sys.argv = load_cmd.split(" ")
main()
# Change something that could make the server to fail.
client = Client(server_url=self.server, auth=tuple(self.auth.split(':')))
client.create_record(bucket='build-hub', collection='archives',
id='8031d549-0a69-48dd-b240-feef94688d47', data={})
cmd = 'kinto-wizard {} --server={} -D --auth={} --delete-records'
load_cmd = cmd.format("load {}".format(self.file),
self.server, self.auth)
sys.argv = load_cmd.split(" ")
with mockInput('no'):
with pytest.raises(SystemExit):
main()
class DataRecordsDump(FunctionalTest):
file = os.getenv("FILE", "tests/kinto-full.yaml")
def test_round_trip(self):
# Load some data
cmd = 'kinto-wizard {} --server={} --auth={}'
load_cmd = cmd.format("load {}".format(self.file),
self.server, self.auth)
sys.argv = load_cmd.split(" ")
main()
cmd = 'kinto-wizard {} --server={} --auth={} --data --records'
load_cmd = cmd.format("dump", self.server, self.auth)
sys.argv = load_cmd.split(" ")
output = io.StringIO()
with redirect_stdout(output):
main()
output.flush()
# Check that identical to original file.
generated = output.getvalue()
with open(self.file) as f:
assert f.read() == generated
class BucketCollectionSelectionableDump(FunctionalTest):
file = os.getenv("FILE", "tests/dumps/dump-full.yaml")
def test_validate(self):
self.validate()
def test_round_trip_with_bucket_selection_on_load(self):
self.load(bucket="natim")
generated = self.dump()
with open("tests/dumps/dump-natim.yaml") as f:
assert f.read() == generated
def test_round_trip_with_bucket_selection(self):
self.load()
generated = self.dump(bucket="natim")
with open("tests/dumps/dump-natim.yaml") as f:
assert f.read() == generated
def test_round_trip_with_bucket_collection_selection_on_load(self):
self.load(bucket="natim", collection="toto")
generated = self.dump()
with open("tests/dumps/dump-natim-toto-groups.yaml") as f:
assert f.read() == generated
def test_round_trip_with_bucket_collection_selection(self):
self.load()
generated = self.dump(bucket="natim", collection="toto")
with open("tests/dumps/dump-natim-toto.yaml") as f:
assert f.read() == generated
def test_round_trip_with_collection_selection_on_load(self):
self.load(collection="toto")
generated = self.dump()
with open("tests/dumps/dump-toto-groups.yaml") as f:
assert f.read() == generated
def test_round_trip_with_collection_selection(self):
self.load()
generated = self.dump(collection="toto")
with open("tests/dumps/dump-toto.yaml") as f:
assert f.read() == generated
def test_wizard_can_handle_dates(self):
self.load(bucket="date")
generated = self.dump()
with open("tests/dumps/dump-date.yaml") as f:
assert f.read() == generated
class YAMLReferenceSupportTest(FunctionalTest):
file = os.getenv("FILE", "tests/dumps/with-references.yaml")
def test_validate(self):
self.validate()
def test_file_can_have_yaml_references(self):
self.load()
client = Client(server_url=self.server, auth=tuple(self.auth.split(':')))
collection = client.get_collection(bucket="main", id="certificates")
assert 'url' in collection['data']['schema']['properties']
collection = client.get_collection(bucket="main", id="addons")
assert 'url' in collection['data']['schema']['properties']
# the anchor did not get interpreted as a bucket:
with self.assertRaises(exceptions.KintoException):
client.get_collection(bucket="attachment-schema")
class WrongSchemaValidationTest(FunctionalTest):
file = "tests/dumps/wrong-schema.yaml"
def test_validate(self):
self.validate(code=1)
class MiscUpdates(FunctionalTest):
def get_client(self):
return Client(server_url=self.server, auth=tuple(self.auth.split(':')))
def test_validate(self):
# This dump has a schema that requires `title` field, and a record doesn't have it.
self.validate(filename="tests/dumps/with-schema-1.yaml", code=1)
# This dump has a schema that does not require `title` field, so the dump is valid.
self.validate(filename="tests/dumps/with-schema-2.yaml")
def test_raises_with_4xx_error_in_batch(self):
with pytest.raises(exceptions.KintoBatchException):
self.load(filename="tests/dumps/with-schema-1.yaml")
records = self.get_client().get_records(bucket="natim", collection="toto")
assert len(records) == 0
def test_ignore_batch_4xx_errors_if_specified(self):
# Raises a KintoBatchException in case of error
self.load(filename="tests/dumps/with-schema-1.yaml", extra="--ignore-batch-4xx")
def test_record_updates(self):
self.load(filename="tests/dumps/with-schema-1.yaml", extra="--ignore-batch-4xx")
client = self.get_client()
client.create_record(data={'title': 'titi', 'last_modified': 1496132479110},
id="e2686bac-c45e-4144-9738-edfeb3d9da6d",
collection='toto', bucket='natim')
self.load(filename="tests/dumps/with-schema-2.yaml")
r = client.get_record(id="e2686bac-c45e-4144-9738-edfeb3d9da6d",
collection='toto', bucket='natim')
assert r["data"]["title"] == "toto"
def test_group_updates(self):
self.load(filename="tests/dumps/with-groups.yaml")
client = self.get_client()
client.update_group(data={"members": ["alexis", "mathieu", "remy"]},
id="toto", bucket="natim")
self.load(filename="tests/dumps/with-groups.yaml")
r = client.get_group(id="toto", bucket='natim')
assert r["data"]["members"] == ["alexis", "mathieu"]

@ -0,0 +1,86 @@
buckets:
configurations:
collections:
editors:
data:
id: editors
last_modified: 1630332338655
permissions:
read:
- account:donaldrich
- system.Everyone
record:create:
- account:donaldrich
write:
- account:donaldrich
data:
id: configurations
last_modified: 1630332338631
groups: {}
permissions:
collection:create:
- account:donaldrich
group:create:
- account:donaldrich
read:
- account:donaldrich
- system.Everyone
write:
- account:donaldrich
job_posts:
collections:
ziprecruiter:
data:
id: ziprecruiter
last_modified: 1630336651455
permissions:
read:
- account:donaldrich
- system.Everyone
record:create:
- account:donaldrich
write:
- account:donaldrich
data:
id: job_posts
last_modified: 1630332338677
groups: {}
permissions:
collection:create:
- account:donaldrich
group:create:
- account:donaldrich
read:
- account:donaldrich
- system.Everyone
write:
- account:donaldrich
grapesjs:
collections:
templates:
data:
id: templates
permissions:
read:
- account:donaldrich
- system.Everyone
record:create:
- account:donaldrich
- system.Everyone
write:
- account:donaldrich
- system.Everyone
data:
id: grapesjs
groups: {}
permissions:
collection:create:
- account:donaldrich
group:create:
- account:donaldrich
read:
- account:donaldrich
- system.Everyone
write:
- account:donaldrich
- system.Everyone

@ -0,0 +1,56 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "1933fb9b-3ebd-42d1-a1cd-094127ace37e",
"metadata": {},
"outputs": [],
"source": [
"\"\"\"\n",
"This example demonstrates how to write a custom highlighter.\n",
"\"\"\"\n",
"\n",
"from random import randint\n",
"\n",
"from rich import print\n",
"from rich.highlighter import Highlighter\n",
"\n",
"\n",
"class RainbowHighlighter(Highlighter):\n",
" def highlight(self, text):\n",
" for index in range(len(text)):\n",
" text.stylize(f\"color({randint(16, 255)})\", index, index + 1)\n",
"\n",
"\n",
"rainbow = RainbowHighlighter()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.9.13 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
},
"vscode": {
"interpreter": {
"hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}

@ -0,0 +1,114 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "db42eb08",
"metadata": {},
"source": [
"# Robot Framework\n",
"\n",
"[docs](https://robocorp.com/docs)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d2f5ca23-5eed-4eb6-9c1b-2152b08009a5",
"metadata": {},
"outputs": [
{
"ename": "",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31mFailed to start the Kernel. \n",
"Kernel Python 3.9.13 64-bit is not usable. Check the Jupyter output tab for more information. \n",
"View Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
]
}
],
"source": [
"*** Settings ***\n",
"Documentation Simple example using SeleniumLibrary.\n",
"Library SeleniumLibrary\n",
"\n",
"*** Variables ***\n",
"${LOGIN URL} http://localhost:7272\n",
"${BROWSER} Chrome\n",
"\n",
"*** Test Cases ***\n",
"Valid Login\n",
" Open Browser To Login Page\n",
" Input Username demo\n",
" Input Password mode\n",
" Submit Credentials\n",
" Welcome Page Should Be Open\n",
" [Teardown] Close Browser\n",
"\n",
"*** Keywords ***\n",
"Open Browser To Login Page\n",
" Open Browser ${LOGIN URL} ${BROWSER}\n",
" Title Should Be Login Page\n",
"\n",
"Input Username\n",
" [Arguments] ${username}\n",
" Input Text username_field ${username}\n",
"\n",
"Input Password\n",
" [Arguments] ${password}\n",
" Input Text password_field ${password}\n",
"\n",
"Submit Credentials\n",
" Click Button login_button\n",
"\n",
"Welcome Page Should Be Open\n",
" Title Should Be Welcome Page"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "78a7b507-e8f5-4dab-9d38-fb5396e92b42",
"metadata": {},
"outputs": [
{
"ename": "",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31mFailed to start the Kernel. \n",
"Kernel Python 3.9.13 64-bit is not usable. Check the Jupyter output tab for more information. \n",
"View Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
]
}
],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.9.13 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
},
"vscode": {
"interpreter": {
"hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}

@ -0,0 +1,65 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "5ed19dc5-4b2a-45b8-8b73-8ad1f787ad84",
"metadata": {},
"outputs": [],
"source": [
"from flask import request\n",
"from htmlparsing import Attr, Text\n",
"from toapi import Api, Item\n",
"\n",
"api = Api()\n",
"\n",
"\n",
"@api.site(\"https://news.ycombinator.com\")\n",
"@api.list(\".athing\")\n",
"@api.route(\"/posts?page={page}\", \"/news?p={page}\")\n",
"@api.route(\"/posts\", \"/news?p=1\")\n",
"class Post(Item):\n",
" url = Attr(\".storylink\", \"href\")\n",
" title = Text(\".storylink\")\n",
"\n",
"\n",
"@api.site(\"https://news.ycombinator.com\")\n",
"@api.route(\"/posts?page={page}\", \"/news?p={page}\")\n",
"@api.route(\"/posts\", \"/news?p=1\")\n",
"class Page(Item):\n",
" next_page = Attr(\".morelink\", \"href\")\n",
"\n",
" def clean_next_page(self, value):\n",
" return api.convert_string(\n",
" \"/\" + value,\n",
" \"/news?p={page}\",\n",
" request.host_url.strip(\"/\") + \"/posts?page={page}\",\n",
" )\n",
"\n",
"\n",
"api.run(debug=True, host=\"0.0.0.0\", port=5123)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

@ -0,0 +1,5 @@
import tomd
tomd.Tomd('<h1>h1</h1>').markdown
# or
tomd.convert('<h1>h1</h1>')

Binary file not shown.

@ -0,0 +1,127 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "2bec1b04-f1ab-4165-a4ea-cc890cc9e5ec",
"metadata": {},
"outputs": [],
"source": [
"!python -m pip install gql"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dca88210-18db-45ee-9949-7010b92dbae7",
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import os\n",
"import pprint\n",
"\n",
"import requests\n",
"from gql import Client, gql\n",
"from gql.transport.requests import RequestsHTTPTransport\n",
"\n",
"cwd = os.getcwd()\n",
"print(cwd)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7374f89e-23f8-407d-967b-c76e7d6d93a2",
"metadata": {
"scrolled": true,
"tags": []
},
"outputs": [],
"source": [
"with open(\"/home/jovyan/code/directus/docker.json\") as f:\n",
" scrubbed_records = json.load(f)\n",
"\n",
"# print(scrubbed_records[0])\n",
"item = scrubbed_records[0]\n",
"name = item[\"Name\"]\n",
"image = item[\"Config\"][\"Image\"]\n",
"# print(name)\n",
"# print(image)\n",
"image = image.replace(\":latest\", \"\")\n",
"name = name.replace(\"/\", \"\")\n",
"print(name)\n",
"print(image)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f2b300a9-f7c0-4863-9696-29e92e6cedc8",
"metadata": {
"scrolled": true,
"tags": []
},
"outputs": [],
"source": [
"for x in scrubbed_records:\n",
" # pprint.pprint(x)\n",
" item = x\n",
" name = item[\"Name\"]\n",
" image = item[\"Config\"][\"Image\"]\n",
" image = image.replace(\":latest\", \"\")\n",
" name = name.replace(\"/\", \"\")\n",
" print(name)\n",
" print(image)\n",
" input_set = {\"id\": name, \"image\": image, \"name\": name, \"raw\": item}\n",
"\n",
" my_headers = {\n",
" \"Authorization\": os.getenv(directus_token),\n",
" \"Content-Type\": \"application/json\",\n",
" }\n",
" response = requests.post(\n",
" \"https://cms.donavanaldrich.com/items/containers\",\n",
" headers=my_headers,\n",
" json=(input_set),\n",
" )\n",
" print(response.json())"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.9.13 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
},
"toc-autonumbering": false,
"toc-showcode": false,
"toc-showtags": false,
"vscode": {
"interpreter": {
"hash": "340e956ee656efd8fdfb480dc033c937d9b626f8b21073bd1b5aa2a469586ea6"
}
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"state": {},
"version_major": 2,
"version_minor": 0
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}

@ -0,0 +1,326 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "2bec1b04-f1ab-4165-a4ea-cc890cc9e5ec",
"metadata": {},
"outputs": [],
"source": [
"!python -m pip install gql"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7374f89e-23f8-407d-967b-c76e7d6d93a2",
"metadata": {
"scrolled": true,
"tags": []
},
"outputs": [],
"source": [
"import csv\n",
"\n",
"with open(\"/home/jovyan/code/directus/homebrew.txt\") as f:\n",
" reader = csv.reader(f)\n",
" for row in reader:\n",
" item = row[0]\n",
" print(item)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f0367eee-e052-4184-b016-8ecb0dd1aa32",
"metadata": {
"scrolled": true,
"tags": []
},
"outputs": [],
"source": [
"for x in scrubbed_records:\n",
" # pprint.pprint(x)\n",
" image = image.replace(\":latest\", \"\")\n",
" name = name.replace(\"/\", \"\")\n",
" data = x"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f2b300a9-f7c0-4863-9696-29e92e6cedc8",
"metadata": {
"scrolled": true,
"tags": []
},
"outputs": [],
"source": [
"for x in scrubbed_records:\n",
" item = x\n",
" name = item[\"Name\"]\n",
" image = item[\"Config\"][\"Image\"]\n",
" image = image.replace(\":latest\", \"\")\n",
" name = name.replace(\"/\", \"\")\n",
" print(name)\n",
" print(image)\n",
" input_set = {\"id\": name, \"image\": image, \"name\": name, \"raw\": item}\n",
"\n",
" my_headers = {\n",
" \"Authorization\": \"os.env(directus_token)\",\n",
" \"Content-Type\": \"application/json\",\n",
" }\n",
" response = requests.post(\n",
" \"https://cms.donavanaldrich.com/items/containers\",\n",
" headers=my_headers,\n",
" json=(input_set),\n",
" )\n",
" print(response.json())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a4dc5471-3df9-4d52-865e-ca9202d472a3",
"metadata": {},
"outputs": [],
"source": [
"# https://formulae.brew.sh/api/cask.json"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "74566291-5fb8-455a-b674-03c0733c6700",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'appcast': None,\n",
" 'artifacts': [['0 A.D..app'],\n",
" {'signal': {},\n",
" 'trash': '~/Library/Saved Application '\n",
" 'State/com.wildfiregames.0ad.savedState'}],\n",
" 'auto_updates': None,\n",
" 'caveats': None,\n",
" 'conflicts_with': None,\n",
" 'container': None,\n",
" 'depends_on': {'macos': {'>=': ['10.12']}},\n",
" 'desc': 'Real-time strategy game',\n",
" 'full_token': '0-ad',\n",
" 'homepage': 'https://play0ad.com/',\n",
" 'installed': None,\n",
" 'name': ['0 A.D.'],\n",
" 'outdated': False,\n",
" 'sha256': 'd1a2073dee3435d8a78bf289206248c9ed6be5e17f4ba1ac8412caf6d0eae0b0',\n",
" 'tap': 'homebrew/cask',\n",
" 'token': '0-ad',\n",
" 'url': 'https://releases.wildfiregames.com/0ad-0.0.25b-alpha-osx64.dmg',\n",
" 'version': '0.0.25b-alpha',\n",
" 'versions': {}}\n"
]
}
],
"source": [
"import json\n",
"import os\n",
"import pprint\n",
"\n",
"import requests\n",
"\n",
"my_headers = {\"Content-Type\": \"application/json\"}\n",
"raw_casks = requests.get(\"https://formulae.brew.sh/api/cask.json\")\n",
"\n",
"casks = raw_casks.json()\n",
"\n",
"# pprint.pprint(casks[0])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7b3f64e7-019a-4c21-9de2-5c84bd22640f",
"metadata": {
"scrolled": true,
"tags": []
},
"outputs": [],
"source": [
"for cask in casks:\n",
" name = cask[\"name\"][0]\n",
" desc = cask[\"desc\"]\n",
" homepage = cask[\"homepage\"]\n",
" url = cask[\"url\"]\n",
" tap = cask[\"tap\"]\n",
" full_name = cask[\"full_token\"]\n",
" print(full_token)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "1d6f86f0-91ab-4773-aba3-ace57852f9e4",
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'raw' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/tmp/ipykernel_9736/2986828884.py\u001b[0m in \u001b[0;36m<cell line: 3>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0mcategory\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"cask\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 12\u001b[0;31m \u001b[0minput_set\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m\"id\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mfull_name\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"description\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mdesc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"homepage\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mhomepage\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"tap\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mtap\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"raw\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mraw\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"category\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mcategory\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 13\u001b[0m response = requests.post(\n\u001b[1;32m 14\u001b[0m \u001b[0;34m\"https://cms.donavanaldrich.com/items/all_homebrews\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mNameError\u001b[0m: name 'raw' is not defined"
]
}
],
"source": [
"my_headers = {\"Authorization\": \"os.env(directus_token)\", \"Content-Type\": \"application/json\"}\n",
"\n",
"for cask in casks:\n",
" name = cask[\"name\"][0]\n",
" desc = cask[\"desc\"]\n",
" homepage = cask[\"homepage\"]\n",
" url = cask[\"url\"]\n",
" tap = cask[\"tap\"]\n",
" full_name = cask[\"full_token\"]\n",
" category = \"cask\"\n",
" # raw\n",
"\n",
" input_set = {\n",
" \"id\": full_name,\n",
" \"description\": desc,\n",
" \"homepage\": homepage,\n",
" \"tap\": tap,\n",
" \"raw\": cask,\n",
" \"category\": category,\n",
" }\n",
" response = requests.post(\n",
" \"https://cms.donavanaldrich.com/items/all_homebrews\",\n",
" headers=my_headers,\n",
" json=(input_set),\n",
" )\n",
" print(response.json())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ecab5d16-91a5-4a3d-9772-516410b8ce76",
"metadata": {},
"outputs": [],
"source": [
"# https://formulae.brew.sh/api/formula.json"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "762b9c4d-dbfb-45dc-a24f-ca1a3a3bbbcf",
"metadata": {},
"outputs": [],
"source": [
"my_headers = {\"Content-Type\": \"application/json\"}\n",
"raw_brews = requests.get(\"https://formulae.brew.sh/api/formula.json\")\n",
"\n",
"brews = raw_brews.json()\n",
"\n",
"pprint.pprint(brews[0])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9bb9a11e-4343-421c-820d-ef927e02532f",
"metadata": {},
"outputs": [],
"source": [
"for brew in brews:\n",
" name = brew[\"name\"]\n",
" desc = brew[\"desc\"]\n",
" homepage = brew[\"homepage\"]\n",
" url = brew[\"urls\"][\"stable\"][\"url\"]\n",
" tap = brew[\"tap\"]\n",
" full_name = brew[\"full_name\"]\n",
" print(urls)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9630a738-10ef-40a0-b666-097b0ab02c06",
"metadata": {},
"outputs": [],
"source": [
"my_headers = {\"Authorization\": \"os.env(directus_token)\", \"Content-Type\": \"application/json\"}\n",
"\n",
"for brew in brews:\n",
" name = brew[\"name\"]\n",
" desc = brew[\"desc\"]\n",
" homepage = brew[\"homepage\"]\n",
" url = brew[\"urls\"][\"stable\"][\"url\"]\n",
" tap = brew[\"tap\"]\n",
" full_name = brew[\"full_name\"]\n",
" category = \"brew\"\n",
"\n",
" input_set = {\n",
" \"id\": full_name,\n",
" \"description\": desc,\n",
" \"homepage\": homepage,\n",
" \"tap\": tap,\n",
" \"raw\": brew,\n",
" \"category\": category,\n",
" }\n",
" response = requests.post(\n",
" \"https://cms.donavanaldrich.com/items/all_homebrews\",\n",
" headers=my_headers,\n",
" json=(input_set),\n",
" )\n",
" print(response.json())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b835d52d-88bd-49f5-a1e3-d2b3c56e4fb2",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
},
"toc-autonumbering": false,
"toc-showcode": false,
"toc-showtags": false,
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"state": {},
"version_major": 2,
"version_minor": 0
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}

@ -0,0 +1,317 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "2bec1b04-f1ab-4165-a4ea-cc890cc9e5ec",
"metadata": {},
"outputs": [],
"source": [
"pip install gql"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dca88210-18db-45ee-9949-7010b92dbae7",
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import os\n",
"import pprint\n",
"\n",
"import requests\n",
"from gql import Client, gql\n",
"from gql.transport.requests import RequestsHTTPTransport\n",
"\n",
"cwd = os.getcwd()\n",
"print(cwd)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "7374f89e-23f8-407d-967b-c76e7d6d93a2",
"metadata": {
"scrolled": true,
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"active_workflow\n",
"automaticmode/active_workflow\n"
]
}
],
"source": [
"with open(\"/home/jovyan/code/directus/docker.json\") as f:\n",
" scrubbed_records = json.load(f)\n",
"\n",
"# print(scrubbed_records[0])\n",
"item = scrubbed_records[0]\n",
"name = item[\"Name\"]\n",
"image = item[\"Config\"][\"Image\"]\n",
"# print(name)\n",
"# print(image)\n",
"image = image.replace(\":latest\", \"\")\n",
"name = name.replace(\"/\", \"\")\n",
"print(name)\n",
"print(image)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f0367eee-e052-4184-b016-8ecb0dd1aa32",
"metadata": {
"scrolled": true,
"tags": []
},
"outputs": [],
"source": [
"for x in scrubbed_records:\n",
" # pprint.pprint(x)\n",
" image = image.replace(\":latest\", \"\")\n",
" name = name.replace(\"/\", \"\")\n",
" data = x"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f2b300a9-f7c0-4863-9696-29e92e6cedc8",
"metadata": {
"scrolled": true,
"tags": []
},
"outputs": [],
"source": [
"for x in scrubbed_records:\n",
" # pprint.pprint(x)\n",
" item = x\n",
" name = item[\"Name\"]\n",
" image = item[\"Config\"][\"Image\"]\n",
" image = image.replace(\":latest\", \"\")\n",
" name = name.replace(\"/\", \"\")\n",
" print(name)\n",
" print(image)\n",
" input_set = {\"id\": name, \"image\": image, \"name\": name, \"raw\": item}\n",
"\n",
" my_headers = {\"Authorization\": \"os.env(directus_token)\", \"Content-Type\": \"application/json\"}\n",
" response = requests.post(\n",
" \"https://cms.donavanaldrich.com/items/containers\",\n",
" headers=my_headers,\n",
" json=(input_set),\n",
" )\n",
" print(response.json())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "927fe89f-9660-4681-884b-f258fc669b88",
"metadata": {},
"outputs": [],
"source": [
"pip install gql[all] aiohttp\n"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "a4dc5471-3df9-4d52-865e-ca9202d472a3",
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'RequestsHTTPTransport' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/tmp/ipykernel_7244/3689489481.py\u001b[0m in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m sample_transport = RequestsHTTPTransport(\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"http://directus:8055/graphql\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0muse_json\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m headers={\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\"Content-type\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m\"application/json\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mNameError\u001b[0m: name 'RequestsHTTPTransport' is not defined"
]
}
],
"source": [
"\n",
"sample_transport = RequestsHTTPTransport(\n",
" url=\"http://directus:8055/graphql\",\n",
" use_json=True,\n",
" headers={\n",
" \"Content-type\": \"application/json\",\n",
" \"Authorization\": \"os.env(directus_token)\",\n",
" },\n",
" verify=True,\n",
" retries=3,\n",
")\n",
"\n",
"client = Client(\n",
" transport=sample_transport,\n",
" fetch_schema_from_transport=True,\n",
")\n",
"\n",
"\n",
"query = gql(\n",
" '''\n",
" query {\n",
" pages {\n",
" single(id: 3) {\n",
" id\n",
" path\n",
" locale\n",
" title\n",
" description\n",
" contentType\n",
" isPublished\n",
" isPrivate\n",
" privateNS\n",
" createdAt\n",
" updatedAt\n",
" }\n",
" }\n",
" }\n",
"'''\n",
")\n",
"\n",
"# params = { \"title\": article_attributes_data-posted-on }\n",
"\n",
"result = client.execute(query, variable_values=params)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "48935cc6-c7c7-43f7-9fb7-c34bd457c903",
"metadata": {
"scrolled": true,
"tags": []
},
"outputs": [],
"source": [
"import asyncio\n",
"\n",
"from gql import gql, Client\n",
"from gql.transport.aiohttp import AIOHTTPTransport\n",
"\n",
"transport = AIOHTTPTransport(\n",
" url=\"http://wiki:3000/graphql\",\n",
" headers={\n",
" \"Content-type\": \"application/json\",\n",
" \"Authorization\": \"os.env(directus_token)\",\n",
" },\n",
")\n",
"\n",
"# Create a GraphQL client using the defined transport\n",
"# client = Client(transport=transport, fetch_schema_from_transport=True)\n",
"client = Client(transport=transport, fetch_schema_from_transport=False)\n",
"# client = Client(transport=transport, fetch_schema_from_transport=True)\n",
"\n",
"query = gql(\n",
" '''\n",
" query pageList {\n",
" pages {\n",
" list(locale: \"en\") {\n",
" id\n",
" path\n",
" locale\n",
" title\n",
" description\n",
" contentType\n",
" isPublished\n",
" isPrivate\n",
" privateNS\n",
" createdAt\n",
" updatedAt\n",
" tags\n",
" }\n",
" }\n",
" }\n",
"'''\n",
")\n",
"\n",
"\n",
"\n",
"# result = await session.execute(query)\n",
"# print(result)\n",
"result = await client.execute_async(query)\n",
"print(result)\n",
"\n",
" \n",
"# asyncio.run(main())\n",
"# Execute the query on the transport\n",
"# result = await client.execute_async(query)\n",
"# print(result)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cfe2738f-7b10-45e2-bb31-38e8820b1ed6",
"metadata": {},
"outputs": [],
"source": [
"from gql import gql, Client\n",
"from gql.transport.aiohttp import AIOHTTPTransport\n",
"\n",
"# Select your transport with a defined url endpoint\n",
"transport = AIOHTTPTransport(url=\"https://countries.trevorblades.com/\")\n",
"\n",
"# Create a GraphQL client using the defined transport\n",
"client = Client(transport=transport, fetch_schema_from_transport=True)\n",
"\n",
"# Provide a GraphQL query\n",
"query = gql(\n",
" \"\"\"\n",
" query getContinents {\n",
" continents {\n",
" code\n",
" name\n",
" }\n",
" }\n",
"\"\"\"\n",
")\n",
"\n",
"# Execute the query on the transport\n",
"result = client.execute(query)\n",
"print(result)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
},
"toc-autonumbering": false,
"toc-showcode": false,
"toc-showtags": false,
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"state": {},
"version_major": 2,
"version_minor": 0
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}

@ -0,0 +1,193 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "5a2a0654",
"metadata": {},
"source": [
"curl --request GET \\\n",
" --url https://demo.linkace.org/api/v1/links \\\n",
" --header 'Authorization: Bearer undefined' \\\n",
" --header 'Content-Type: application/json' \\\n",
" --header 'accept: application/json'"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "60990b52",
"metadata": {},
"outputs": [],
"source": [
"\n"
]
},
{
"cell_type": "markdown",
"id": "ebef409e-ddc1-4cc4-bd4d-a4839ba62d66",
"metadata": {},
"source": [
"## Pull links from Linkace"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "02d6d963-538a-47c1-957f-06c7ea7c7497",
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import os\n",
"import pprint\n",
"import urllib.parse\n",
"import sys\n",
"import requests\n",
"\n",
"my_headers = {\n",
" \"Authorization\": \"Bearer EZXDgTTLt0Gc2Gt4gLFC5XZwX4mk4doC\",\n",
" \"Content-Type\": \"application/json\",\n",
" \"Accept\": \"application/json\",\n",
"}\n",
"\n",
"response = requests.get(\n",
" \"http://linkace/api/v1/links\",\n",
" headers=my_headers,\n",
")\n",
"\n",
"initial = response.json()\n",
"\n",
"last = initial['last_page']\n",
"\n",
"links = []\n",
"\n",
"for x in range(1, last + 1):\n",
" response = requests.get(\n",
" \"http://linkace/api/v1/links?page=\" + str(x),\n",
" headers=my_headers,\n",
" )\n",
" result = response.json()\n",
" data = result['data']\n",
" links = links + data\n",
" \n",
"out_file = open(\"links.json\", \"w\")\n",
"json.dump(links, out_file, indent = 2)\n",
"out_file.close()"
]
},
{
"cell_type": "markdown",
"id": "e6a27401-cc7d-4ffe-834d-2f051dce7025",
"metadata": {},
"source": [
"## Update Links in Directus"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "f2b300a9-f7c0-4863-9696-29e92e6cedc8",
"metadata": {
"scrolled": true,
"tags": []
},
"outputs": [
{
"ename": "JSONDecodeError",
"evalue": "[Errno Expecting value] : 0",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mJSONDecodeError\u001b[0m Traceback (most recent call last)",
"File \u001b[0;32m/opt/conda/lib/python3.9/site-packages/requests/models.py:910\u001b[0m, in \u001b[0;36mResponse.json\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 909\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 910\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcomplexjson\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mloads\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtext\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 911\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m JSONDecodeError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 912\u001b[0m \u001b[38;5;66;03m# Catch JSON-related errors and raise as requests.JSONDecodeError\u001b[39;00m\n\u001b[1;32m 913\u001b[0m \u001b[38;5;66;03m# This aliases json.JSONDecodeError and simplejson.JSONDecodeError\u001b[39;00m\n",
"File \u001b[0;32m/opt/conda/lib/python3.9/json/__init__.py:346\u001b[0m, in \u001b[0;36mloads\u001b[0;34m(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)\u001b[0m\n\u001b[1;32m 343\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m object_hook \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m\n\u001b[1;32m 344\u001b[0m parse_int \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m parse_float \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m\n\u001b[1;32m 345\u001b[0m parse_constant \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m object_pairs_hook \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m kw):\n\u001b[0;32m--> 346\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_default_decoder\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdecode\u001b[49m\u001b[43m(\u001b[49m\u001b[43ms\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 347\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
"File \u001b[0;32m/opt/conda/lib/python3.9/json/decoder.py:337\u001b[0m, in \u001b[0;36mJSONDecoder.decode\u001b[0;34m(self, s, _w)\u001b[0m\n\u001b[1;32m 333\u001b[0m \u001b[38;5;124;03m\"\"\"Return the Python representation of ``s`` (a ``str`` instance\u001b[39;00m\n\u001b[1;32m 334\u001b[0m \u001b[38;5;124;03mcontaining a JSON document).\u001b[39;00m\n\u001b[1;32m 335\u001b[0m \n\u001b[1;32m 336\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m--> 337\u001b[0m obj, end \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mraw_decode\u001b[49m\u001b[43m(\u001b[49m\u001b[43ms\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43midx\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_w\u001b[49m\u001b[43m(\u001b[49m\u001b[43ms\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mend\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 338\u001b[0m end \u001b[38;5;241m=\u001b[39m _w(s, end)\u001b[38;5;241m.\u001b[39mend()\n",
"File \u001b[0;32m/opt/conda/lib/python3.9/json/decoder.py:355\u001b[0m, in \u001b[0;36mJSONDecoder.raw_decode\u001b[0;34m(self, s, idx)\u001b[0m\n\u001b[1;32m 354\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[0;32m--> 355\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m JSONDecodeError(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mExpecting value\u001b[39m\u001b[38;5;124m\"\u001b[39m, s, err\u001b[38;5;241m.\u001b[39mvalue) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28mNone\u001b[39m\n\u001b[1;32m 356\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m obj, end\n",
"\u001b[0;31mJSONDecodeError\u001b[0m: Expecting value: line 1 column 1 (char 0)",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;31mJSONDecodeError\u001b[0m Traceback (most recent call last)",
"Input \u001b[0;32mIn [7]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 8\u001b[0m headers \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 9\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAuthorization\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mos.env(directus_token)\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 10\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mContent-Type\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mapplication/json\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 11\u001b[0m }\n\u001b[1;32m 12\u001b[0m response \u001b[38;5;241m=\u001b[39m requests\u001b[38;5;241m.\u001b[39mpatch(\n\u001b[1;32m 13\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhttp://directus:8055/items/bookmarks/\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mstr\u001b[39m(myid),\n\u001b[1;32m 14\u001b[0m headers\u001b[38;5;241m=\u001b[39mheaders,\n\u001b[1;32m 15\u001b[0m json\u001b[38;5;241m=\u001b[39m(link),\n\u001b[1;32m 16\u001b[0m )\n\u001b[0;32m---> 17\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjson\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m)\n",
"File \u001b[0;32m/opt/conda/lib/python3.9/site-packages/requests/models.py:917\u001b[0m, in \u001b[0;36mResponse.json\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 915\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m RequestsJSONDecodeError(e\u001b[38;5;241m.\u001b[39mmessage)\n\u001b[1;32m 916\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 917\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m RequestsJSONDecodeError(e\u001b[38;5;241m.\u001b[39mmsg, e\u001b[38;5;241m.\u001b[39mdoc, e\u001b[38;5;241m.\u001b[39mpos)\n",
"\u001b[0;31mJSONDecodeError\u001b[0m: [Errno Expecting value] : 0"
]
}
],
"source": [
"for link in links:\n",
" url = link['url']\n",
" myid = link.pop(\"id\")\n",
" parsed_url = urllib.parse.urlparse(url)\n",
" domain = parsed_url.netloc\n",
" pdom = domain.replace(\"www.\", \"\")\n",
" link['domain'] = pdom\n",
" headers = {\n",
" \"Authorization\": \"os.env(directus_token)\",\n",
" \"Content-Type\": \"application/json\",\n",
" }\n",
" response = requests.patch(\n",
" \"http://directus:8055/items/bookmarks/\" + str(myid),\n",
" headers=headers,\n",
" json=(link),\n",
" )\n",
" print(response.json())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b9d5bb9f-be1e-4db2-b0a3-d0d179e6b6a6",
"metadata": {},
"outputs": [],
"source": [
"import urllib.parse\n",
"import sys\n",
"\n",
"link = links[1]\n",
"url = link['url']\n",
"myid = link.pop(\"id\")\n",
"print(link)\n",
"parsed_url = urllib.parse.urlparse(url)\n",
"domain = parsed_url.netloc\n",
"pdom = domain.replace(\"www.\", \"\")\n",
"print(pdom)\n",
"link['domain'] = pdom\n",
"\n",
"\n",
"print(link)\n",
"\n",
"print(myid)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
},
"toc-autonumbering": false,
"toc-showcode": false,
"toc-showtags": false,
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"state": {},
"version_major": 2,
"version_minor": 0
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}

@ -0,0 +1,291 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "e1e6443b-9422-4715-bed9-8c326984b96f",
"metadata": {},
"source": [
"curl --request GET \\\n",
" --url https://demo.linkace.org/api/v1/links \\\n",
" --header 'Authorization: Bearer undefined' \\\n",
" --header 'Content-Type: application/json' \\\n",
" --header 'accept: application/json'"
]
},
{
"cell_type": "markdown",
"id": "ebef409e-ddc1-4cc4-bd4d-a4839ba62d66",
"metadata": {},
"source": [
"## Pull links from Linkace"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "02d6d963-538a-47c1-957f-06c7ea7c7497",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"workflow.donavanaldrich.com\n",
"alertmanager.donavanaldrich.com\n",
"ansible.donavanaldrich.com\n",
"notify.donavanaldrich.com\n",
"authelia.donavanaldrich.com\n",
"password.donavanaldrich.com\n",
"password.donavanaldrich.com\n",
"password.donavanaldrich.com\n",
"blackbox.donavanaldrich.com\n",
"browserless.donavanaldrich.com\n",
"cachet.donavanaldrich.com\n",
"cachet.donavanaldrich.com\n",
"caddy.donavanaldrich.com\n",
"chronograf.donavanaldrich.com\n",
"cloudflare.donavanaldrich.com\n",
"cron.donavanaldrich.com\n",
"cronicle.donavanaldrich.com\n",
"cyberchef.donavanaldrich.com\n",
"menu.donavanaldrich.com\n",
"cms.donavanaldrich.com\n",
"cms.donavanaldrich.com\n",
"dozzle.donavanaldrich.com\n",
"elastic.donavanaldrich.com\n",
"gatus.donavanaldrich.com\n",
"grafana.donavanaldrich.com\n",
"home-config.donavanaldrich.com\n",
"health.donavanaldrich.com\n",
"health.donavanaldrich.com\n",
"health.donavanaldrich.com\n",
"home.donavanaldrich.com\n",
"homebridge.donavanaldrich.com\n",
"homer.donavanaldrich.com\n",
"huginn.donavanaldrich.com\n",
"influx.donavanaldrich.com\n",
"jackett.donavanaldrich.com\n",
"jupyter.donavanaldrich.com\n",
"api.donavanaldrich.com\n",
"kong.donavanaldrich.com\n",
"konga.donavanaldrich.com\n",
"lidarr.donavanaldrich.com\n",
"lidarr.donavanaldrich.com\n",
"bookmarks.donavanaldrich.com\n",
"bookmarks.donavanaldrich.com\n",
"chat.donavanaldrich.com\n",
"analytics.donavanaldrich.com\n",
"minio.donavanaldrich.com\n",
"minio.donavanaldrich.com\n",
"monica.donavanaldrich.com\n",
"netdata.donavanaldrich.com\n",
"cloud.donavanaldrich.com\n",
"dev.donavanaldrich.com\n",
"node.donavanaldrich.com\n",
"nodered.donavanaldrich.com\n",
"droid.donavanaldrich.com\n",
"media.donavanaldrich.com\n",
"docs.donavanaldrich.com\n",
"pdf-render.donavanaldrich.com\n",
"pgadmin.donavanaldrich.com\n",
"pgweb.donavanaldrich.com\n",
"mysql.donavanaldrich.com\n",
"pihole.donavanaldrich.com\n",
"plex.donavanaldrich.com\n",
"portainer.donavanaldrich.com\n",
"prometheus.donavanaldrich.com\n",
"promtail.donavanaldrich.com\n",
"proxmox.donavanaldrich.com\n",
"proxmox.donavanaldrich.com\n",
"pushgateway.donavanaldrich.com\n",
"pyload.donavanaldrich.com\n",
"radarr.donavanaldrich.com\n",
"radarr.donavanaldrich.com\n",
"redis.donavanaldrich.com\n",
"requests.donavanaldrich.com\n",
"requests.donavanaldrich.com\n",
"requestrr.donavanaldrich.com\n",
"requestrr.donavanaldrich.com\n",
"router.donavanaldrich.com\n",
"script.donavanaldrich.com\n",
"disk.donavanaldrich.com\n",
"snmp.donavanaldrich.com\n",
"sonarr.donavanaldrich.com\n",
"sonarr.donavanaldrich.com\n",
"speedtest.donavanaldrich.com\n",
"splash.donavanaldrich.com\n",
"ssh.donavanaldrich.com\n",
"swagger.donavanaldrich.com\n",
"traefik.donavanaldrich.com\n",
"traefik.donavanaldrich.com\n",
"transmission.donavanaldrich.com\n",
"books.donavanaldrich.com\n",
"books.donavanaldrich.com\n",
"vault.donavanaldrich.com\n",
"vault.donavanaldrich.com\n",
"vault.donavanaldrich.com\n",
"vpn.donavanaldrich.com\n",
"code.donavanaldrich.com\n"
]
}
],
"source": [
"import json\n",
"import re\n",
"import requests\n",
"\n",
"my_headers = {\n",
" \"Content-Type\": \"application/json\",\n",
" \"Accept\": \"application/json\",\n",
"}\n",
"\n",
"response = requests.get(\n",
" \"http://traefik:8080/api/http/routers\",\n",
" headers=my_headers,\n",
")\n",
"\n",
"\n",
"initial = response.json()\n",
"\n",
"\n",
"for x in initial:\n",
" service = x[\"rule\"]\n",
" find = re.findall(\"Host\\(`([^\\)]+)`\\)\", service)\n",
" try:\n",
" print(find[0])\n",
" except:\n",
" pass"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1f3540d3-d599-4545-937b-50c5a3d9e2a7",
"metadata": {},
"outputs": [],
"source": [
"# alertmanager.donavanaldrich.com\n",
"# analytics.donavanaldrich.com\n",
"# ansible.donavanaldrich.com\n",
"# api.donavanaldrich.com\n",
"# blackbox.donavanaldrich.com\n",
"# bookmarks.donavanaldrich.com\n",
"# books.donavanaldrich.com\n",
"# browserless.donavanaldrich.com\n",
"# cachet.donavanaldrich.com\n",
"# caddy.donavanaldrich.com\n",
"# chat.donavanaldrich.com\n",
"# chronograf.donavanaldrich.com\n",
"# cloud.donavanaldrich.com\n",
"# cloudflare.donavanaldrich.com\n",
"# cms.donavanaldrich.com\n",
"# cron.donavanaldrich.com\n",
"# cronicle.donavanaldrich.com\n",
"# cyberchef.donavanaldrich.com\n",
"# dev.donavanaldrich.com\n",
"# disk.donavanaldrich.com\n",
"# docs.donavanaldrich.com\n",
"# dozzle.donavanaldrich.com\n",
"# droid.donavanaldrich.com\n",
"# elastic.donavanaldrich.com\n",
"# gatus.donavanaldrich.com\n",
"# grafana.donavanaldrich.com\n",
"# health.donavanaldrich.com\n",
"# home-config.donavanaldrich.com\n",
"# home.donavanaldrich.com\n",
"# homebridge.donavanaldrich.com\n",
"# homer.donavanaldrich.com\n",
"# huginn.donavanaldrich.com\n",
"# influx.donavanaldrich.com\n",
"# jackett.donavanaldrich.com\n",
"# jupyter.donavanaldrich.com\n",
"# kong.donavanaldrich.com\n",
"# konga.donavanaldrich.com\n",
"# lidarr.donavanaldrich.com\n",
"# media.donavanaldrich.com\n",
"# menu.donavanaldrich.com\n",
"# minio.donavanaldrich.com\n",
"# monica.donavanaldrich.com\n",
"# mysql.donavanaldrich.com\n",
"# netdata.donavanaldrich.com\n",
"# node.donavanaldrich.com\n",
"# nodered.donavanaldrich.com\n",
"# notify.donavanaldrich.com\n",
"# password.donavanaldrich.com\n",
"# pdf-render.donavanaldrich.com\n",
"# pgadmin.donavanaldrich.com\n",
"# pgweb.donavanaldrich.com\n",
"# pihole.donavanaldrich.com\n",
"# plex.donavanaldrich.com\n",
"# portainer.donavanaldrich.com\n",
"# prometheus.donavanaldrich.com\n",
"# promtail.donavanaldrich.com\n",
"# proxmox.donavanaldrich.com\n",
"# pushgateway.donavanaldrich.com\n",
"# pyload.donavanaldrich.com\n",
"# radarr.donavanaldrich.com\n",
"# redis.donavanaldrich.com\n",
"# requestrr.donavanaldrich.com\n",
"# requests.donavanaldrich.com\n",
"# router.donavanaldrich.com\n",
"# script.donavanaldrich.com\n",
"# snmp.donavanaldrich.com\n",
"# sonarr.donavanaldrich.com\n",
"# speedtest.donavanaldrich.com\n",
"# splash.donavanaldrich.com\n",
"# ssh.donavanaldrich.com\n",
"# swagger.donavanaldrich.com\n",
"# tautulli.donavanaldrich.com\n",
"# traefik.donavanaldrich.com\n",
"# traefik.donavanaldrich.com\n",
"# transmission.donavanaldrich.com\n",
"# vault.donavanaldrich.com\n",
"# workflow.donavanaldrich.com"
]
},
{
"cell_type": "markdown",
"id": "e6a27401-cc7d-4ffe-834d-2f051dce7025",
"metadata": {},
"source": [
"## Update Links in Directus"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.10.4 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
},
"toc-autonumbering": false,
"toc-showcode": false,
"toc-showtags": false,
"vscode": {
"interpreter": {
"hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
}
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"state": {},
"version_major": 2,
"version_minor": 0
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,86 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "927fe89f-9660-4681-884b-f258fc669b88",
"metadata": {},
"outputs": [],
"source": [
"pip install gql[all] aiohttp"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a4dc5471-3df9-4d52-865e-ca9202d472a3",
"metadata": {},
"outputs": [],
"source": [
"import asyncio\n",
"\n",
"from gql import Client, gql\n",
"from gql.transport.aiohttp import AIOHTTPTransport\n",
"\n",
"transport = AIOHTTPTransport(\n",
" url=\"http://directus:8055/graphql\",\n",
" headers={\n",
" \"Content-type\": \"application/json\",\n",
" \"Authorization\": \"os.env(directus_token)\",\n",
" },\n",
")\n",
"\n",
"client = Client(transport=transport, fetch_schema_from_transport=False)\n",
"\n",
"query = gql(\n",
" \"\"\"\n",
" query {\n",
" pages {\n",
" id\n",
" }\n",
" }\n",
"\"\"\"\n",
")\n",
"\n",
"result = await client.execute_async(query)\n",
"print(result)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f9429bdb-d291-49ce-8191-9977b3624f97",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"state": {},
"version_major": 2,
"version_minor": 0
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}

@ -0,0 +1,101 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "39df3cd3-d7d7-45f2-9c7c-f018e03cfdb1",
"metadata": {
"scrolled": true,
"tags": []
},
"outputs": [],
"source": [
"import json\n",
"import requests\n",
"input_set = []\n",
"\n",
"with open('pages.json', 'r') as input:\n",
" input_set = json.load(input)\n",
"# print(input_set)\n",
"\n",
"my_headers = {'Authorization': os.env(directus_token),'Content-Type': 'application/json'}\n",
"\n",
"response = requests.post('https://cms.donavanaldrich.com/items/pages', headers=my_headers, json=(input_set))\n",
"\n",
"# # # response = requests.post('https://httpbin.org/post', data = {'key':'value'})\n",
"print(response.json())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c3eb47fb-926c-4d6f-a080-64cf94e20e24",
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import requests\n",
"# input_set = []\n",
"\n",
"# with open('cards.json', 'r') as input:\n",
"# input_set = json.load(input)\n",
"# print(input_set)\n",
"\n",
"my_headers = {'Authorization': os.env(directus_token),'Content-Type': 'application/json'}\n",
"\n",
"response = requests.post('https://content.donavanaldrich.com/items/cards', headers=my_headers, json=(input_set))\n",
"\n",
"# # # response = requests.post('https://httpbin.org/post', data = {'key':'value'})\n",
"print(response.json())\n",
"\n",
"import json\n",
"import requests\n",
"# input_set = []\n",
"\n",
"# with open('cards.json', 'r') as input:\n",
"# input_set = json.load(input)\n",
"# print(input_set)\n",
"\n",
"my_headers = {'Authorization': os.env(directus_token),'Content-Type': 'application/json'}\n",
"\n",
"response = requests.post('https://content.donavanaldrich.com/items/cards', headers=my_headers, json=(input_set))\n",
"\n",
"# # # response = requests.post('https://httpbin.org/post', data = {'key':'value'})\n",
"print(response.json())\n",
"# input_set = []\n",
"\n",
"# with open('cards.json', 'r') as input:\n",
"# input_set = json.load(input)\n",
"# # print(input_set)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"state": {},
"version_major": 2,
"version_minor": 0
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}

@ -0,0 +1,353 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 53,
"id": "1e4b01f8-321a-4dd7-a157-0c36a8f5142f",
"metadata": {
"scrolled": true,
"tags": []
},
"outputs": [],
"source": [
"import json\n",
"import os\n",
"import pprint\n",
"\n",
"import requests\n",
"\n",
"headers = {\n",
" \"Authorization\": \"os.env(directus_token)\",\n",
" \"Content-Type\": \"application/json\",\n",
"}\n",
"response = requests.get(\n",
" \"https://cms.donavanaldrich.com/items/containers\", headers=headers\n",
")\n",
"data = response.json()\n",
"# pprint.pprint(items)\n",
"items = data[\"data\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1837f6a9-f3c2-4666-b21c-96ea2dd56a06",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 68,
"id": "c0ff14db-e04c-45fa-ba43-6ba9bf56810e",
"metadata": {
"scrolled": true,
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [500]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [500]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [500]>\n",
"<Response [200]>\n",
"<Response [500]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [500]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [500]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [500]>\n",
"<Response [500]>\n",
"<Response [200]>\n",
"<Response [500]>\n",
"<Response [500]>\n",
"<Response [200]>\n",
"<Response [500]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [500]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [500]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n"
]
}
],
"source": [
"for item in items:\n",
" raw = item[\"raw\"]\n",
" name = raw[\"Name\"]\n",
" image = raw[\"Config\"][\"Image\"]\n",
" image = image.replace(\":latest\", \"\")\n",
" name = name.replace(\"/\", \"\")\n",
" try:\n",
" item[\"ui_port\"] = raw[\"Config\"][\"Labels\"][\n",
" \"traefik.http.services.\" + name + \".loadbalancer.server.port\"\n",
" ]\n",
" router = raw[\"Config\"][\"Labels\"][\"traefik.http.routers.\" + name + \".rule\"]\n",
" item[\"host_url\"] = re.findall(\"Host\\(`([^\\)]+)`\\)\", router)\n",
" item[\"docs_url\"] = raw[\"Config\"][\"Labels\"][\n",
" \"org.opencontainers.image.documentation\"\n",
" ]\n",
" item[\"image_url\"] = raw[\"Config\"][\"Labels\"][\"org.opencontainers.image.source\"]\n",
" except:\n",
" foo = \"bar\"\n",
" headers = {\n",
" \"Authorization\": \"os.env(directus_token)\",\n",
" \"Content-Type\": \"application/json\",\n",
" }\n",
" response = requests.patch(\n",
" \"https://cms.donavanaldrich.com/items/containers/\" + str(item[\"id\"]),\n",
" headers=headers,\n",
" json=(item),\n",
" )\n",
" print(response)"
]
},
{
"cell_type": "code",
"execution_count": 68,
"id": "30fd3366-53f7-45e1-acd5-e5c1dd91ec14",
"metadata": {
"scrolled": true,
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [500]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [500]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [500]>\n",
"<Response [200]>\n",
"<Response [500]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [500]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [500]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [500]>\n",
"<Response [500]>\n",
"<Response [200]>\n",
"<Response [500]>\n",
"<Response [500]>\n",
"<Response [200]>\n",
"<Response [500]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [500]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [500]>\n",
"<Response [200]>\n",
"<Response [200]>\n",
"<Response [200]>\n"
]
}
],
"source": [
"item = items[0]\n",
" raw = item[\"raw\"]\n",
" name = raw[\"Name\"]\n",
" item[\"ui_port\"] = raw[\"Config\"][\"Labels\"][\n",
" \"traefik.http.services.\" + name + \".loadbalancer.server.port\"\n",
" ]\n",
" \n",
" router = raw[\"Config\"][\"Labels\"][\"traefik.http.routers.\" + name + \".rule\"]\n",
" item[\"host_url\"] = re.findall(\"Host\\(`([^\\)]+)`\\)\", router)\n",
" print(response)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3430aed0-6317-4889-bdaa-a2802edcc85a",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "71e58084-525c-4ab2-ae31-431a8fbd5b49",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

@ -0,0 +1,324 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "2bec1b04-f1ab-4165-a4ea-cc890cc9e5ec",
"metadata": {},
"outputs": [],
"source": [
"pip install gql"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "73420231-210c-46b0-8c5f-be26ce0f409f",
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import os\n",
"import pprint\n",
"\n",
"import requests\n",
"\n",
"headers = {\n",
" \"Authorization\": \"os.env(directus_token)\",\n",
" \"Content-Type\": \"application/json\",\n",
"}\n",
"response = requests.get(\n",
" \"https://cms.donavanaldrich.com/items/containers\", headers=headers\n",
")\n",
"data = response.json()\n",
"\n",
"items = data[\"data\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "513b90bb-b873-4fa2-8e26-197af7fea6ce",
"metadata": {},
"outputs": [],
"source": [
"%%sh\n",
"rm templates/wiki-js.md\n",
"\n",
"cat << EOF >> templates/wiki-js.md\n",
"\n",
"# {{ item.id }}\n",
"\n",
"hello\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3ee85b3d-19bc-47c9-9af6-2d08f7bf577b",
"metadata": {},
"outputs": [],
"source": [
"from jinja2 import Environment, FileSystemLoader, select_autoescape\n",
"env = Environment(\n",
" loader = FileSystemLoader([\"templates\"]),\n",
" # autoescape=select_autoescape()\n",
")\n",
"\n",
"t = env.get_template(\"wiki-js.md\")\n",
"pprint.pprint(t.render(item=items[0],items=items))\n",
"# \n",
"with open(\"test.ini\", \"w\") as external_file:\n",
" # add_text = \"This text will be added to the file\"\n",
" print(t.render(item=items[0],items=items), file=external_file)\n",
" external_file.close()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "dca88210-18db-45ee-9949-7010b92dbae7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/home/jovyan/code/directus\n"
]
}
],
"source": [
"import json\n",
"import os\n",
"import pprint\n",
"import requests\n",
"from gql import Client, gql\n",
"from gql.transport.requests import RequestsHTTPTransport\n",
"\n",
"cwd = os.getcwd()\n",
"print(cwd)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "7374f89e-23f8-407d-967b-c76e7d6d93a2",
"metadata": {
"scrolled": true,
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"active_workflow\n",
"automaticmode/active_workflow\n"
]
}
],
"source": [
"with open(\"/home/jovyan/code/directus/docker.json\") as f:\n",
" scrubbed_records = json.load(f)\n",
"\n",
"# print(scrubbed_records[0])\n",
"item = scrubbed_records[0]\n",
"name = item[\"Name\"]\n",
"image = item[\"Config\"][\"Image\"]\n",
"# print(name)\n",
"# print(image)\n",
"image = image.replace(\":latest\", \"\")\n",
"name = name.replace(\"/\", \"\")\n",
"print(name)\n",
"print(image)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f0367eee-e052-4184-b016-8ecb0dd1aa32",
"metadata": {
"scrolled": true,
"tags": []
},
"outputs": [],
"source": [
"for x in scrubbed_records:\n",
" # pprint.pprint(x)\n",
" image = image.replace(\":latest\", \"\")\n",
" name = name.replace(\"/\", \"\")\n",
" data = x"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f2b300a9-f7c0-4863-9696-29e92e6cedc8",
"metadata": {
"scrolled": true,
"tags": []
},
"outputs": [],
"source": [
"for x in scrubbed_records:\n",
" # pprint.pprint(x)\n",
" item = x\n",
" name = item[\"Name\"]\n",
" image = item[\"Config\"][\"Image\"]\n",
" image = image.replace(\":latest\", \"\")\n",
" name = name.replace(\"/\", \"\")\n",
" print(name)\n",
" print(image)\n",
" input_set = {\"id\": name, \"image\": image, \"name\": name, \"raw\": item}\n",
"\n",
" my_headers = {\"Authorization\": \"os.env(directus_token)\", \"Content-Type\": \"application/json\"}\n",
" response = requests.post(\n",
" \"https://cms.donavanaldrich.com/items/containers\",\n",
" headers=my_headers,\n",
" json=(input_set),\n",
" )\n",
" print(response.json())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "927fe89f-9660-4681-884b-f258fc669b88",
"metadata": {},
"outputs": [],
"source": [
"pip install gql[all] aiohttp\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "48935cc6-c7c7-43f7-9fb7-c34bd457c903",
"metadata": {
"scrolled": true,
"tags": []
},
"outputs": [],
"source": [
"import asyncio\n",
"\n",
"from gql import gql, Client\n",
"from gql.transport.aiohttp import AIOHTTPTransport\n",
"\n",
"transport = AIOHTTPTransport(\n",
" url=\"http://wiki:3000/graphql\",\n",
" headers={\n",
" \"Content-type\": \"application/json\",\n",
" \"Authorization\": \"os.env(directus_token)\",\n",
" },\n",
")\n",
"\n",
"# Create a GraphQL client using the defined transport\n",
"# client = Client(transport=transport, fetch_schema_from_transport=True)\n",
"client = Client(transport=transport, fetch_schema_from_transport=False)\n",
"# client = Client(transport=transport, fetch_schema_from_transport=True)\n",
"\n",
"query = gql(\n",
" '''\n",
" query pageList {\n",
" pages {\n",
" list(locale: \"en\") {\n",
" id\n",
" path\n",
" locale\n",
" title\n",
" description\n",
" contentType\n",
" isPublished\n",
" isPrivate\n",
" privateNS\n",
" createdAt\n",
" updatedAt\n",
" tags\n",
" }\n",
" }\n",
" }\n",
"'''\n",
")\n",
"\n",
"\n",
"\n",
"# result = await session.execute(query)\n",
"# print(result)\n",
"result = await client.execute_async(query)\n",
"print(result)\n",
"\n",
" \n",
"# asyncio.run(main())\n",
"# Execute the query on the transport\n",
"# result = await client.execute_async(query)\n",
"# print(result)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cfe2738f-7b10-45e2-bb31-38e8820b1ed6",
"metadata": {},
"outputs": [],
"source": [
"from gql import gql, Client\n",
"from gql.transport.aiohttp import AIOHTTPTransport\n",
"\n",
"# Select your transport with a defined url endpoint\n",
"transport = AIOHTTPTransport(url=\"https://countries.trevorblades.com/\")\n",
"\n",
"# Create a GraphQL client using the defined transport\n",
"client = Client(transport=transport, fetch_schema_from_transport=True)\n",
"\n",
"# Provide a GraphQL query\n",
"query = gql(\n",
" \"\"\"\n",
" query getContinents {\n",
" continents {\n",
" code\n",
" name\n",
" }\n",
" }\n",
"\"\"\"\n",
")\n",
"\n",
"# Execute the query on the transport\n",
"result = client.execute(query)\n",
"print(result)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
},
"toc-autonumbering": false,
"toc-showcode": false,
"toc-showtags": false,
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"state": {},
"version_major": 2,
"version_minor": 0
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}

@ -0,0 +1,110 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "960f7042-b00b-40b0-9d56-f4e157819dc6",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "8a83cbfd-c38b-40f5-8c24-c53dbbc6b29d",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "0019cd52-56b8-4f1d-82f5-55137f05456b",
"metadata": {},
"outputs": [],
"source": [
"pip install gql[all] aiohttp"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6658fd82-d8db-4a80-a103-3f015c851e69",
"metadata": {
"scrolled": true,
"tags": []
},
"outputs": [],
"source": [
"import asyncio\n",
"\n",
"from gql import Client, gql\n",
"from gql.transport.aiohttp import AIOHTTPTransport\n",
"\n",
"transport = AIOHTTPTransport(\n",
" url=\"http://wiki:3000/graphql\",\n",
" headers={\n",
" \"Content-type\": \"application/json\",\n",
" \"Authorization\": \"os.env(directus_token)\",\n",
" },\n",
")\n",
"\n",
"client = Client(transport=transport, fetch_schema_from_transport=False)\n",
"\n",
"query = gql(\n",
" \"\"\"\n",
" query pageList {\n",
" pages {\n",
" list(locale: \"en\") {\n",
" id\n",
" path\n",
" locale\n",
" title\n",
" description\n",
" contentType\n",
" isPublished\n",
" isPrivate\n",
" privateNS\n",
" createdAt\n",
" updatedAt\n",
" tags\n",
" }\n",
" }\n",
" }\n",
"\"\"\"\n",
")\n",
"\n",
"result = await client.execute_async(query)\n",
"print(result)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"state": {},
"version_major": 2,
"version_minor": 0
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}

@ -0,0 +1,455 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "e367555e-02e3-4606-b2c0-be534088b036",
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'requests' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/tmp/ipykernel_835/2578056015.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrequests\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"https://directus.donavanaldrich.com/items/job_posts/eEVP-aYX0tdMOGpZPxTNGA.--M7D0SXmM3\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;31m# print(response.json())\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresponse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrequests\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"https://directus.donavanaldrich.com/items/job_posts/eEVP-aYdMOGpZPxTNGA.--M7D0SXmM3\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mNameError\u001b[0m: name 'requests' is not defined"
]
}
],
"source": [
"response = requests.get(\"https://directus.donavanaldrich.com/items/job_posts/eEVP-aYX0tdMOGpZPxTNGA.--M7D0SXmM3\")\n",
"# print(response.json())\n",
"print(response)\n",
"\n",
"response = requests.get(\"https://directus.donavanaldrich.com/items/job_posts/eEVP-aYdMOGpZPxTNGA.--M7D0SXmM3\")\n",
"# print(response.json())\n",
"print(response.status_code)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f004bec1-3f13-49d3-aed7-94bf744d4eff",
"metadata": {},
"outputs": [],
"source": [
"my_headers = {'Authorization': os.env(directus_token),'Content-Type': 'application/json'}\n",
"\n",
"response = requests.post('https://directus.donavanaldrich.com/items/job_posts', headers=my_headers, json = [object_list])\n",
"\n",
"# response = requests.post('https://httpbin.org/post', data = {'key':'value'})\n",
"print(response.json())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c405cdba-ae3a-4b53-b6be-2353e7e2b7d8",
"metadata": {},
"outputs": [],
"source": [
"profile = df.profile_report(missing_diagrams={\"bar\": False})\n",
"profile = df.profile_report(correlations={\"cramers\": {\"calculate\": False}})\n",
"\n",
"rejected = profile.get_rejected_variables()\n",
"\n",
"print(rejected)\n",
"\n",
"rej_vars = list(rejected)\n",
"\n",
"print(rej_vars)\n",
"\n",
"with open('data/zip/rejected_variables.pkl', 'wb') as pickle_file:\n",
" pickle.dump(rej_vars, pickle_file)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a98a4655-69d6-4a71-ac1c-abdfaec4b2ae",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "80f9b417-5c57-4c3b-a325-074611b54d9a",
"metadata": {
"tags": []
},
"source": [
"# Json Format"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1a5f89c5-998d-4698-b862-73e2972d1642",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "21804087-9e66-4662-80e9-30d837a68a82",
"metadata": {},
"source": [
"## Append JSON"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6daf9c74-8a88-49c6-8941-4d99a3cf79f6",
"metadata": {},
"outputs": [],
"source": [
"# python object(dictionary) to be dumped\n",
"dict1 ={\n",
" \"emp1\": {\n",
" \"name\": \"Lisa\",\n",
" \"designation\": \"programmer\",\n",
" \"age\": \"34\",\n",
" \"salary\": \"54000\"\n",
" },\n",
" \"emp2\": {\n",
" \"name\": \"Elis\",\n",
" \"designation\": \"Trainee\",\n",
" \"age\": \"24\",\n",
" \"salary\": \"40000\"\n",
" },\n",
"}\n",
" \n",
"# the json file where the output must be stored\n",
"out_file = open(\"test.json\", \"w\")\n",
" \n",
"json.dump(dict1, out_file, indent = 2)\n",
" \n",
"out_file.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "29a5bc7d-5a4c-4891-a263-5341b4156879",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "1072a86a-34f8-41aa-ae2b-5931cc82f178",
"metadata": {
"tags": []
},
"source": [
"# Json Format"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7cc7fa62-bfd4-41d9-80f6-4d40c8354cd7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eEVP-aYX0tdMOGpZPxTNGA.--M7D0SXmM3': {'article_attributes_class_0': 'job_result', 'article_attributes_class_1': 't_job_result', 'article_attributes_id': 'quiz-card-'}, 'eEVP-aYX0tdsdfsGpZPxTNGA.--M7D0SXmM3': {'article_attributes_class_0': 'job_result', 'article_attributes_class_1': 't_job_result', 'article_attributes_id': 'quiz-card-'}}\n"
]
}
],
"source": [
"record1 = {\n",
" \"eEVP-aYX0tdMOGpZPxTNGA.--M7D0SXmM3\": {\n",
" \"article_attributes_class_0\": \"job_result\",\n",
" \"article_attributes_class_1\": \"t_job_result\",\n",
" \"article_attributes_id\": \"quiz-card-\"\n",
" } \n",
"}\n",
"record2 = {\n",
" \"eEVP-aYX0tdsdfsGpZPxTNGA.--M7D0SXmM3\": {\n",
" \"article_attributes_class_0\": \"job_result\",\n",
" \"article_attributes_class_1\": \"t_job_result\",\n",
" \"article_attributes_id\": \"quiz-card-\"\n",
" }\n",
"}\n",
"\n",
"\n",
"cuml_data = record1\n",
"\n",
"cuml_data.update(record2)\n",
"\n",
"print(cuml_data)\n",
"\n",
"out_file = open(\"test2.json\", \"w\")\n",
" \n",
"json.dump(cuml_data, out_file, indent = 2)\n",
" \n",
"out_file.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3c79355c-e4a7-4af0-9252-9400efe6ffed",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "fff3e539-0423-4593-80a1-85ce219bb2e7",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "bf34048d-fdf7-4c8a-bb45-6fb40e69a3f3",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "23ad549c-7137-47aa-bcbc-2906b70fd078",
"metadata": {
"tags": []
},
"source": [
"# Import into Pandas"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dce9dfe8-4938-4731-b2cf-353f4b9610c1",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"df = pd.read_json(r'data/zip/records.json')\n",
"\n",
"df = df.transpose()\n",
"\n",
"df"
]
},
{
"cell_type": "markdown",
"id": "b6464d9f-60e6-4c6e-9ff5-4a616e81f35b",
"metadata": {
"tags": []
},
"source": [
"# Initial Variable Profiling"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "72bc21b4-38ac-4c11-92e6-db5ebf03903d",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from pandas_profiling import ProfileReport\n",
"\n",
"profile = ProfileReport(df, title=\"Ziprecruiter\", explorative=True)\n",
"profile = df.profile_report(missing_diagrams={\"bar\": False})\n",
"profile = df.profile_report(correlations={\"cramers\": {\"calculate\": False}})\n",
"\n",
"# profile.to_notebook_iframe()\n",
"\n",
"# profile.to_file(\"data/zip/var_report.json\")\n",
"\n",
"with open('data/zip/profile.pkl', 'wb') as pickle_file:\n",
" pickle.dump(profile, pickle_file)\n",
"# profile.to_html()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "865e08ae-8300-4ff4-a3c7-17ef1353c5f7",
"metadata": {
"jupyter": {
"source_hidden": true
},
"scrolled": true,
"tags": []
},
"outputs": [],
"source": [
"profile = df.profile_report(missing_diagrams={\"bar\": False})\n",
"profile = df.profile_report(correlations={\"cramers\": {\"calculate\": False}})\n",
"\n",
"rejected = profile.get_rejected_variables()\n",
"\n",
"print(rejected)\n",
"\n",
"rej_vars = list(rejected)\n",
"\n",
"print(rej_vars)\n",
"\n",
"with open('data/zip/rejected_variables.pkl', 'wb') as pickle_file:\n",
" pickle.dump(rej_vars, pickle_file)"
]
},
{
"cell_type": "markdown",
"id": "2f7d77ce-6dbc-4d9a-b86a-d57f337b7344",
"metadata": {
"tags": []
},
"source": [
"# Drop Rejected Variables"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d22beeed-6ed0-4d2f-8224-627a288b16bc",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"with open('data/zip/rejected_variables.pkl', 'rb') as pickle_file:\n",
" rej_vars = pickle.load(pickle_file)\n",
"\n",
"new = pd.read_json(r'data/zip/records.json')\n",
"\n",
"new = new.transpose()\n",
"\n",
"new.drop(columns=rej_vars, inplace = True)\n",
"\n",
"with open('data/zip/cleaned_indexed_data.pkl', 'wb') as pickle_file:\n",
" pickle.dump(new, pickle_file)\n",
"\n",
"new"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "24d9145b-ff0e-4a7a-928e-942b3373e11c",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"with open('data/zip/rejected_variables.pkl', 'rb') as pickle_file:\n",
" rej_vars = pickle.load(pickle_file)\n",
"\n",
"new_nokey = pd.read_json(r'data/zip/unindexed_records.json')\n",
"\n",
"new_nokey.drop(columns=rej_vars, inplace = True)\n",
"\n",
"with open('data/zip/cleaned_unindexed_data.pkl', 'wb') as pickle_file:\n",
" pickle.dump(new_nokey, pickle_file)\n",
"\n",
"new_nokey"
]
},
{
"cell_type": "markdown",
"id": "8552a640-0669-46a0-a41a-59e818f0af6e",
"metadata": {
"tags": []
},
"source": [
"# Export Clean Data"
]
},
{
"cell_type": "markdown",
"id": "2dba4f67-bdf3-4d0f-bd4c-d8d2fa7e1988",
"metadata": {
"tags": []
},
"source": [
"## Dirty Vars Removed"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1ff9569b-2f90-4e35-a65a-685e086da0bd",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# with open('data/zip/cleaned_unindexed_data.pkl', 'rb') as pickle_file:\n",
"# data = pickle.load(pickle_file)\n",
"# result = data.to_json(orient=\"index\")\n",
"# parsed = json.loads(result)"
]
},
{
"cell_type": "markdown",
"id": "07c10907-c265-4d00-9c32-3621bc33f588",
"metadata": {},
"source": [
"# Get Rejected Variables"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2f34b472-7330-46bc-8a2c-45e3a5767a36",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "8a6a2574-3fe9-4222-9330-b96e52fddd89",
"metadata": {
"tags": []
},
"source": [
"# Json Format"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"state": {},
"version_major": 2,
"version_minor": 0
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because one or more lines are too long

@ -0,0 +1,106 @@
[sh_curl]
platform curl
resource = http://sh.com
value_template My IP: [{value.ip}}
response_type = json
[sh_rest]
platform = rest
resource = http://sh/api
value_template = {{value}}
method = post
authentication = basic
username = my_username
password = my_password
payload = {"var1": "hi", "var2": 1}
headers {"Content-Type": "application/json"}
verify = false
[sh_ping]
platform = ping
resource = 192.168.1.1
[sh_http]
platform = http_status
resource = https://your-website.com/api
method = get
authentication = basic
username = my_username
password = my_password
headers = {"Content-Type": "application/json"}
return_codes = 2XX, 3XX
[sh_health]
platform = healthchecks
prefix = http://
host = localhost
port = 8080
api_key = {{ Healthchecks project API Key }}
project {{ Healthchecks project name }}
verify = true
value_template = { { value_template }}
[sh]
prefix = https://
url = your-website.com
icon = static/images/apps/sh.png
sidebar_icon = static/images/apps/{{ name }}.png
description = default
open_in = iframe
data_sources = sh_http,sh_health,sh_ping,sh_rest
tags = default
groups = admin_only
[sh_curl]
platform curl
resource = http://sh.com
value_template My IP: [{value.ip}}
response_type = json
[sh_rest]
platform = rest
resource = http://sh/api
value_template = {{value}}
method = post
authentication = basic
username = my_username
password = my_password
payload = {"var1": "hi", "var2": 1}
headers {"Content-Type": "application/json"}
verify = false
[sh_ping]
platform = ping
resource = 192.168.1.1
[sh_http]
platform = http_status
resource = https://your-website.com/api
method = get
authentication = basic
username = my_username
password = my_password
headers = {"Content-Type": "application/json"}
return_codes = 2XX, 3XX
[sh_health]
platform = healthchecks
prefix = http://
host = localhost
port = 8080
api_key = {{ Healthchecks project API Key }}
project {{ Healthchecks project name }}
verify = true
value_template = { { value_template }}
[sh]
prefix = https://
url = your-website.com
icon = static/images/apps/sh.png
sidebar_icon = static/images/apps/{{ name }}.png
description = default
open_in = iframe
data_sources = sh_http,sh_health,sh_ping,sh_rest
tags = default
groups = admin_only

@ -0,0 +1,43 @@
<!DOCTYPE html>
<html lang="en">
<head>
<title>My Webpage</title>
</head>
<body>
<ul id="navigation">
{% for item in navigation %}
<li><a href="{{ item.href }}">{{ item.caption }}</a></li>
{% endfor %}
</ul>
[{{ name }}]
prefix = https://
url = your-website.com
icon = static/images/apps/sh.png
sidebar_icon = static/images/apps/{{ name }}.png
description = default
open_in = iframe
data_sources = sh_http,sh_health,sh_ping,sh_rest
tags = default
groups = admin_only
{# a comment #}
</body>
</html>
[{{ name }}]
prefix = https://
url = your-website.com
icon = static/images/apps/sh.png
sidebar_icon = static/images/apps/{{ name }}.png
description = default
open_in = iframe
data_sources = sh_http,sh_health,sh_ping,sh_rest
tags = default
groups = admin_only
{# a comment #}
</body>
</html>

@ -0,0 +1,3 @@
"Jinja {{ name }}!"

@ -0,0 +1,106 @@
[sh_curl]
platform curl
resource = http://sh.com
value_template My IP: [{value.ip}}
response_type = json
[sh_rest]
platform = rest
resource = http://sh/api
value_template = {{value}}
method = post
authentication = basic
username = my_username
password = my_password
payload = {"var1": "hi", "var2": 1}
headers {"Content-Type": "application/json"}
verify = false
[sh_ping]
platform = ping
resource = 192.168.1.1
[sh_http]
platform = http_status
resource = https://your-website.com/api
method = get
authentication = basic
username = my_username
password = my_password
headers = {"Content-Type": "application/json"}
return_codes = 2XX, 3XX
[sh_health]
platform = healthchecks
prefix = http://
host = localhost
port = 8080
api_key = {{ Healthchecks project API Key }}
project {{ Healthchecks project name }}
verify = true
value_template = { { value_template }}
[sh]
prefix = https://
url = your-website.com
icon = static/images/apps/sh.png
sidebar_icon = static/images/apps/sh.png
description = default
open_in = iframe
data_sources = sh_http,sh_health,sh_ping,sh_rest
tags = default
groups = admin_only
[sh_curl]
platform curl
resource = http://sh.com
value_template My IP: [{value.ip}}
response_type = json
[sh_rest]
platform = rest
resource = http://sh/api
value_template = {{value}}
method = post
authentication = basic
username = my_username
password = my_password
payload = {"var1": "hi", "var2": 1}
headers {"Content-Type": "application/json"}
verify = false
[sh_ping]
platform = ping
resource = 192.168.1.1
[sh_http]
platform = http_status
resource = https://your-website.com/api
method = get
authentication = basic
username = my_username
password = my_password
headers = {"Content-Type": "application/json"}
return_codes = 2XX, 3XX
[sh_health]
platform = healthchecks
prefix = http://
host = localhost
port = 8080
api_key = {{ Healthchecks project API Key }}
project {{ Healthchecks project name }}
verify = true
value_template = { { value_template }}
[sh]
prefix = https://
url = your-website.com
icon = static/images/apps/sh.png
sidebar_icon = static/images/apps/{{ name }}.png
description = default
open_in = iframe
data_sources = sh_http,sh_health,sh_ping,sh_rest
tags = default
groups = admin_only

@ -0,0 +1,38 @@
[Settings]
theme = dark
accent = purple
background = None
roles = admin,user,public_user
home_access_groups = admin_only
settings_access_groups = admin_only
custom_app_title = DashMachine
sidebar_default = open
tags = {"name": "foo", "icon": "home", "sort_pos": "2"}
[admin]
role = admin
password = {{ user.password }}
confirm_password = {{ user.password }}
{% for item in items %}
[{{ item.service }}]
prefix = https://
url = {{ item.url }}
{% if item.icon_url %}
icon = item.icon_url
sidebar_icon = item.icon_url
{% else %}
icon = static/images/apps/{{ item.service }}.png
sidebar_icon = static/images/apps/{{ item.service }}.png
{% endif %}
description = default
open_in = iframe
# data_sources = sh_http,sh_health,sh_ping,sh_rest
tags = default
# groups = admin_only
{% endfor %}
{# a comment #}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,300 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "2bec1b04-f1ab-4165-a4ea-cc890cc9e5ec",
"metadata": {},
"outputs": [],
"source": [
"pip install gql"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dca88210-18db-45ee-9949-7010b92dbae7",
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import os\n",
"import pprint\n",
"\n",
"from gql import Client, gql\n",
"from gql.transport.requests import RequestsHTTPTransport\n",
"\n",
"cwd = os.getcwd()\n",
"print(cwd)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7374f89e-23f8-407d-967b-c76e7d6d93a2",
"metadata": {
"scrolled": true,
"tags": []
},
"outputs": [],
"source": [
"with open(\"/home/jovyan/code/directus/docker.json\") as f:\n",
" scrubbed_records = json.load(f)\n",
"\n",
"# print(scrubbed_records[0])\n",
"item = scrubbed_records[0]\n",
"name = item['Name']\n",
"image = item['Config']['Image']\n",
"# print(name)\n",
"# print(image)\n",
"image = (image.replace(':latest', ''))\n",
"name = (name.replace('/', ''))\n",
"print(name)\n",
"print(image)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f0367eee-e052-4184-b016-8ecb0dd1aa32",
"metadata": {
"scrolled": true,
"tags": []
},
"outputs": [],
"source": [
"for x in scrubbed_records:\n",
" # pprint.pprint(x)\n",
" item = scrubbed_records[0]\n",
" name = item['Name']\n",
" image = item['Config']['Image']\n",
" image = (image.replace(':latest', ''))\n",
" name = (name.replace('/', ''))\n",
" data = x"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4f03fbfe-147b-4011-b71f-b9031810aa44",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# print(jsonString_merged)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b2983b27-6169-4a16-bc70-adfe9b6023b0",
"metadata": {
"scrolled": true,
"tags": []
},
"outputs": [],
"source": [
"sample_transport = RequestsHTTPTransport(\n",
" url=\"http://directus:8055/graphql\",\n",
" use_json=True,\n",
" headers={\n",
" \"Content-type\": \"application/json\",\n",
" \"Authorization\": \"os.env(directus_token)\",\n",
" },\n",
" verify=True,\n",
" retries=3,\n",
")\n",
"\n",
"client = Client(\n",
" transport=sample_transport,\n",
" fetch_schema_from_transport=True,\n",
")\n",
"\n",
"job_posts = gql(\n",
" \"\"\"\n",
"mutation{\n",
" update_containers_item(id: ______, data: ______){\n",
" id\n",
" status\n",
" sort\n",
" date_created\n",
" date_updated\n",
" json_object\n",
" }\n",
"}\n",
"\"\"\"\n",
")\n",
"\n",
"\n",
"y = json.dumps(scrubbed_records)\n",
"for x in y:\n",
"\n",
" # ef = x.loads()\n",
" # print(x)\n",
" # id = x[\"article_attributes_data-listing-version-key\"]\n",
" # pairs = y.items()\n",
" pprint.pprint(x)\n",
"# print(y)\n",
"# id = y[\"article_attributes_data-listing-version-key\"]\n",
"# print(id)\n",
"# data = y[scrubbed_record]\n",
"# print(scrubbed_record)\n",
"\n",
"\n",
"# Mutation ($data: JSON!) {\n",
"# update_job_blocks_item(\n",
"# id: \"ziprecruiter\"\n",
"# data: {\n",
"# title: $title\n",
"# }\n",
"# )\n",
"# }\n",
"\n",
"\n",
"\n",
"# print(title)\n",
"\n",
"# params = { \"title\": article_attributes_data-posted-on }\n",
"\n",
"# result = client.execute(mutate, variable_values=params)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "28c500a2-becf-4d86-a88d-c1f30b3a5e99",
"metadata": {},
"outputs": [],
"source": [
"import pprint\n",
"\n",
"# print(result)\n",
"\n",
"# query = gql('''\n",
"# query {\n",
"# json_server_by_id(id: \"ziprecruiter\") {\n",
"# data\n",
"# }\n",
"# }\n",
"# ''')\n",
"\n",
"\n",
"mutate = gql(\n",
" \"\"\"\n",
" mutation ($data: JSON!) {\n",
" update_containers_item(\n",
" id: \"test\"\n",
" data: {\n",
" data: $data\n",
" }\n",
" ) {\n",
" data\n",
" }\n",
" }\n",
"\"\"\"\n",
")\n",
"\n",
"# add = gql(\n",
"# \"\"\"\n",
"# mutation ($create_test_data: [create_test_input!]) {\n",
"# create_test_items( data: $create_test_data ) {\n",
"# id\n",
"# json_object\n",
"# }\n",
"# }\n",
"# \"\"\"\n",
"# )\n",
"# init = gql(\n",
"# \"\"\"\n",
"# mutation ($collection: String!, $data: !) {\n",
"# create_fields_item(collection: \"job_posts\", data: { field: \"title\", type: \"string\" ) {\n",
"# collection\n",
"# field\n",
"# }\n",
"# }\n",
"# \"\"\"\n",
"# )\n",
"\n",
"test3 = { \"field\": \"title\", type: \"string\", \"meta\": { \"icon\": \"title\" }, \"schema\": { \"default_value\": \"null\" } }\n",
"\n",
"params = {\"collection\": \"job_posts\", \"data\": test3}\n",
"\n",
"result = client.execute(init, variable_values=params)\n",
"# print(result)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2a52af9c-4127-4dbf-bac4-20086433fc80",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "f2b300a9-f7c0-4863-9696-29e92e6cedc8",
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"\n",
"import requests\n",
"\n",
"input_set = {\n",
" \"id\": \"tee\",\n",
" \"name\": \"ffff\"\n",
"}\n",
"\n",
"# import jsonify\n",
"my_headers = {\"Authorization\": \"os.env(directus_token)\", \"Content-Type\": \"application/json\"}\n",
"response = requests.post(\n",
" \"https://cms.donavanaldrich.com/fields/containers\", headers=my_headers, json=(input_set)\n",
")\n",
"print(response.json())\n",
"# r = response.json()\n",
"# print(r)\n",
"# p = json.dumps(r)\n",
"# print(p)\n",
"# with open('blog_posts_fields.json', 'w') as output:\n",
"# json.dumps(input)\n",
"# # print(input_set)\n",
"# with open('blog_posts_fields.json', 'w') as f:\n",
"# json.dump(p, f, indent=8)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
},
"toc-autonumbering": false,
"toc-showcode": false,
"toc-showtags": false,
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"state": {},
"version_major": 2,
"version_minor": 0
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,579 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Intro to Scrapy\n",
"\n",
"Scrapy is a Python framework for data scraping, which, to say in short, is the combination of almost everything we learnt until now: requests, css selectors (BeautifulSoup), xpath (lxml), regex (re) and even checking robots.txt or putting hte scraper to sleep.\n",
"\n",
"Generally, as Scrapy is a framework, one does not code inside Jupyter Notebook. To mimic Scrapy behavior inside the Notebook, we will have to make some additional imports which would not be required otherwise.\n",
"\n",
"Key points:\n",
"- response - the object that contains page source as a Scrapy element to be scraped,\n",
"- response.css() - css approach to scraping (BeautifulSoup),\n",
"- response.xpath() - xpath approach to scraping (Lxml),\n",
"- extract() - extract all elements satisfying some condition (provides list),\n",
"- extract_first() - extract first element satisfying some condition (provides element).\n",
"- response.css(\"a::text\").extract_first() - will provide the text of the first link matched (CSS),\n",
"- response.xpath(\"//a/text()\").extract_first() - will provide the text of the first link matched (Xpath),\n",
"- response.css('a::attr(href)').extract_first() - will provide the href attribute (URL) of the first link matched (CSS),\n",
"- response.xpath(\"//a/@href\").extract_first() - will provide the href attribute (URL) of the first link matched (Xpath)."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"from scrapy.http import TextResponse"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"url = \"http://quotes.toscrape.com/\"\n",
"r = requests.get(url)\n",
"response = TextResponse(r.url,body=r.text,encoding=\"utf-8\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<200 http://quotes.toscrape.com/>"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"response"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'<a href=\"/\" style=\"text-decoration: none\">Quotes to Scrape</a>'"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#get heading-css\n",
"response.css(\"a\").extract_first()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'<a href=\"/\" style=\"text-decoration: none\">Quotes to Scrape</a>'"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#get heading-xpath\n",
"response.xpath(\"//a\").extract_first()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['Albert Einstein',\n",
" 'J.K. Rowling',\n",
" 'Albert Einstein',\n",
" 'Jane Austen',\n",
" 'Marilyn Monroe',\n",
" 'Albert Einstein',\n",
" 'André Gide',\n",
" 'Thomas A. Edison',\n",
" 'Eleanor Roosevelt',\n",
" 'Steve Martin']"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#get authors-css\n",
"response.css(\"small::text\").extract()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['Albert Einstein',\n",
" 'J.K. Rowling',\n",
" 'Albert Einstein',\n",
" 'Jane Austen',\n",
" 'Marilyn Monroe',\n",
" 'Albert Einstein',\n",
" 'André Gide',\n",
" 'Thomas A. Edison',\n",
" 'Eleanor Roosevelt',\n",
" 'Steve Martin']"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#authors-xpath\n",
"response.xpath(\"//small/text()\").extract()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['<a href=\"/\" style=\"text-decoration: none\">Quotes to Scrape</a>']"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#heading-css\n",
"response.css('a[style=\"text-decoration: none\"]').extract()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['Quotes to Scrape']"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#heading-css text only\n",
"response.css('a[style=\"text-decoration: none\"]::text').extract()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['/']"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#heading-css href only\n",
"response.css('a[style=\"text-decoration: none\"]::attr(href)').extract()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [
{
"data": {
"text/plain": [
"['change',\n",
" 'deep-thoughts',\n",
" 'thinking',\n",
" 'world',\n",
" 'abilities',\n",
" 'choices',\n",
" 'inspirational',\n",
" 'life',\n",
" 'live',\n",
" 'miracle',\n",
" 'miracles',\n",
" 'aliteracy',\n",
" 'books',\n",
" 'classic',\n",
" 'humor',\n",
" 'be-yourself',\n",
" 'inspirational',\n",
" 'adulthood',\n",
" 'success',\n",
" 'value',\n",
" 'life',\n",
" 'love',\n",
" 'edison',\n",
" 'failure',\n",
" 'inspirational',\n",
" 'paraphrased',\n",
" 'misattributed-eleanor-roosevelt',\n",
" 'humor',\n",
" 'obvious',\n",
" 'simile',\n",
" 'love',\n",
" 'inspirational',\n",
" 'life',\n",
" 'humor',\n",
" 'books',\n",
" 'reading',\n",
" 'friendship',\n",
" 'friends',\n",
" 'truth',\n",
" 'simile']"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#tag text css\n",
"response.css(\"a[class='tag']::text\").extract()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [
{
"data": {
"text/plain": [
"['/tag/change/page/1/',\n",
" '/tag/deep-thoughts/page/1/',\n",
" '/tag/thinking/page/1/',\n",
" '/tag/world/page/1/',\n",
" '/tag/abilities/page/1/',\n",
" '/tag/choices/page/1/',\n",
" '/tag/inspirational/page/1/',\n",
" '/tag/life/page/1/',\n",
" '/tag/live/page/1/',\n",
" '/tag/miracle/page/1/',\n",
" '/tag/miracles/page/1/',\n",
" '/tag/aliteracy/page/1/',\n",
" '/tag/books/page/1/',\n",
" '/tag/classic/page/1/',\n",
" '/tag/humor/page/1/',\n",
" '/tag/be-yourself/page/1/',\n",
" '/tag/inspirational/page/1/',\n",
" '/tag/adulthood/page/1/',\n",
" '/tag/success/page/1/',\n",
" '/tag/value/page/1/',\n",
" '/tag/life/page/1/',\n",
" '/tag/love/page/1/',\n",
" '/tag/edison/page/1/',\n",
" '/tag/failure/page/1/',\n",
" '/tag/inspirational/page/1/',\n",
" '/tag/paraphrased/page/1/',\n",
" '/tag/misattributed-eleanor-roosevelt/page/1/',\n",
" '/tag/humor/page/1/',\n",
" '/tag/obvious/page/1/',\n",
" '/tag/simile/page/1/',\n",
" '/tag/love/',\n",
" '/tag/inspirational/',\n",
" '/tag/life/',\n",
" '/tag/humor/',\n",
" '/tag/books/',\n",
" '/tag/reading/',\n",
" '/tag/friendship/',\n",
" '/tag/friends/',\n",
" '/tag/truth/',\n",
" '/tag/simile/']"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#tag url css\n",
"response.css(\"a[class='tag']::attr(href)\").extract()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [
{
"data": {
"text/plain": [
"['change',\n",
" 'deep-thoughts',\n",
" 'thinking',\n",
" 'world',\n",
" 'abilities',\n",
" 'choices',\n",
" 'inspirational',\n",
" 'life',\n",
" 'live',\n",
" 'miracle',\n",
" 'miracles',\n",
" 'aliteracy',\n",
" 'books',\n",
" 'classic',\n",
" 'humor',\n",
" 'be-yourself',\n",
" 'inspirational',\n",
" 'adulthood',\n",
" 'success',\n",
" 'value',\n",
" 'life',\n",
" 'love',\n",
" 'edison',\n",
" 'failure',\n",
" 'inspirational',\n",
" 'paraphrased',\n",
" 'misattributed-eleanor-roosevelt',\n",
" 'humor',\n",
" 'obvious',\n",
" 'simile',\n",
" 'love',\n",
" 'inspirational',\n",
" 'life',\n",
" 'humor',\n",
" 'books',\n",
" 'reading',\n",
" 'friendship',\n",
" 'friends',\n",
" 'truth',\n",
" 'simile']"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#tag text xpath\n",
"response.xpath(\"//a[@class='tag']/text()\").extract()"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['/tag/change/page/1/',\n",
" '/tag/deep-thoughts/page/1/',\n",
" '/tag/thinking/page/1/',\n",
" '/tag/world/page/1/',\n",
" '/tag/abilities/page/1/',\n",
" '/tag/choices/page/1/',\n",
" '/tag/inspirational/page/1/',\n",
" '/tag/life/page/1/',\n",
" '/tag/live/page/1/',\n",
" '/tag/miracle/page/1/',\n",
" '/tag/miracles/page/1/',\n",
" '/tag/aliteracy/page/1/',\n",
" '/tag/books/page/1/',\n",
" '/tag/classic/page/1/',\n",
" '/tag/humor/page/1/',\n",
" '/tag/be-yourself/page/1/',\n",
" '/tag/inspirational/page/1/',\n",
" '/tag/adulthood/page/1/',\n",
" '/tag/success/page/1/',\n",
" '/tag/value/page/1/',\n",
" '/tag/life/page/1/',\n",
" '/tag/love/page/1/',\n",
" '/tag/edison/page/1/',\n",
" '/tag/failure/page/1/',\n",
" '/tag/inspirational/page/1/',\n",
" '/tag/paraphrased/page/1/',\n",
" '/tag/misattributed-eleanor-roosevelt/page/1/',\n",
" '/tag/humor/page/1/',\n",
" '/tag/obvious/page/1/',\n",
" '/tag/simile/page/1/',\n",
" '/tag/love/',\n",
" '/tag/inspirational/',\n",
" '/tag/life/',\n",
" '/tag/humor/',\n",
" '/tag/books/',\n",
" '/tag/reading/',\n",
" '/tag/friendship/',\n",
" '/tag/friends/',\n",
" '/tag/truth/',\n",
" '/tag/simile/']"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#tag url xpath\n",
"response.xpath(\"//a[@class='tag']/@href\").extract()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'<title>Quotes to Scrape</title>'"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"response.css(\"title\").extract_first()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['title', 'title']"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"response.css(\"title\").re(\"title\")"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['Quotes to Scrape']"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#regex to get text between tags\n",
"response.css(\"title\").re('.+>(.+)<.+')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
},
"nteract": {
"version": "0.10.0"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

@ -0,0 +1,673 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Scrapy: part 1\n",
"\n",
"**Scrapy** is a powerful web scraping framework for Python. A framework is still a library (\"an API of functions\") yet with more powerful built-in features. It can be described as the combination of all we learnt till now including requests, BeautifulSoup, lxml and RegEx. To install **Scrapy**, open the command prompt and run the following command:\n",
"```\n",
"pip install scrapy\n",
"```\n",
"Once scrapy is installed one can start experiencing it by just running the following command inside the command prompt (e.g. let's assume you want to scrape the http://quotes.toscrape.com/page/1/ page):\n",
"```\n",
"scrapy shell http://quotes.toscrape.com/page/1/\n",
"```\n",
"Now, you must be able to apply powerful scrapy functions to get the data you want. However, all of this are available inside the command prompt. If you want to experience the same inside a Jupyter notebook, you must try to *mimic* the command prompt behaviour by adding **5** additional lines as shown below (instead of running the abovementioned command). As this material is provided in a Jupyter notebook, we will also *mimic* the command prompt behavior, yet you are encouraged to experience it yourself."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [],
"source": [
"import requests\n",
"from scrapy.http import TextResponse\n",
"\n",
"url = \"http://quotes.toscrape.com/page/1/\"\n",
"\n",
"r = requests.get(url)\n",
"response = TextResponse(r.url, body=r.text, encoding='utf-8')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Fine, now we are ready to apply the **scrapy** functions on our **response** object. All the code following this line is same for both Jupyter notebook users and those you chose to experience the command prompt approach.\n",
"\n",
"As we covered before, there are two main ways to navigate over an HTML file: using CSS selectors and the XPath approach. While **BeautifulSoup** supported only the former, **Scrapy** has functions for both: **css()** for using css selectors and **xpath()** for the xpath approach.\n",
"\n",
"### CSS selectors\n",
"\n",
"Let's use CSS selectors to find the title of the page."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [
{
"data": {
"text/plain": [
"[<Selector xpath=u'descendant-or-self::title' data=u'<title>Quotes to Scrape</title>'>]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"response.css('title')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"As you can see it provides *more information than needed*. That's why there is an **extract()** function, that will extract only the component we are interested in without the additional information. It can be said that **css()** and **extract()** function mimic the **findAll()** behaviour from BeautifulSoup."
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [
{
"data": {
"text/plain": [
"[u'<title>Quotes to Scrape</title>']"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"response.css('title').extract()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Excellent! We now have the correct tag we were looking for with the text inside. If we want to choose only the text content there is no need for using additional function: one just needs to add the following component to the CSS selector **::text** as shown below."
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [
{
"data": {
"text/plain": [
"[u'Quotes to Scrape']"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"response.css('title::text').extract()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [
{
"data": {
"text/plain": [
"list"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type(response.css('title::text').extract())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"As mentioned before, the **extract()** function applied on the css selector mimics the **findAll()** behavior. This is true also about the output we receive: it has the type of list. If one needs to receive the unoce element as an output, the **extract_first()** function must be used, which will return the very first matched element (similarly to **find()** from BeautifulSoup)."
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [
{
"data": {
"text/plain": [
"u'Quotes to Scrape'"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"response.css('title::text').extract_first()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [
{
"data": {
"text/plain": [
"unicode"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type(response.css('title::text').extract_first())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's now try to find the heading of the page (which is Quotes to Scrape). Heading is provided inside a `<h1>` tag as usually."
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [
{
"data": {
"text/plain": [
"[u'<h1>\\n <a href=\"/\" style=\"text-decoration: none\">Quotes to Scrape</a>\\n </h1>']"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"response.css('h1').extract()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Again, we can get the heading text by using the **::text** guy."
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [
{
"data": {
"text/plain": [
"[u'\\n ', u'\\n ']"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"response.css('h1::text').extract()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The latter did not really help because the heading text was inside an `<a>` tag, which in its turn was inside the above found `<h1>` tag. "
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [
{
"data": {
"text/plain": [
"[u'<a href=\"/\" style=\"text-decoration: none\">Quotes to Scrape</a>']"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"response.css('h1 a').extract()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Nice! We found it. As you can see it has the style attribute that differenciates this `<a>` tag from others (kind of an identifier). We could use it to find this `<a>` tag even without mentioning that it is inside a `<h1>` guy. To do this in **Scrapy**, square brackets should be used."
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [
{
"data": {
"text/plain": [
"[u'<a href=\"/\" style=\"text-decoration: none\">Quotes to Scrape</a>']"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"response.css('a[style=\"text-decoration: none\"]').extract()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Great! Let's now extract the text first and then go for the link inside this tag (i.e. the value of the **href** attribute)."
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [
{
"data": {
"text/plain": [
"[u'Quotes to Scrape']"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"response.css('a[style=\"text-decoration: none\"]::text').extract()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To get the value of **href** attirubute (and same for any other attirubte) the following approach can be used in **Scrapy**, which can be considered the alternative to **get()** function in BeautifulSoup or lxml."
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [
{
"data": {
"text/plain": [
"[u'/']"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"response.css('a[style=\"text-decoration: none\"]::attr(href)').extract()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Scrapy** also supports regular expressions that can directly be applied on matched response. For example, let's select only the \"to Scrape\" part from the heading using regular expressions. We just need to substitute the **extract()** function with a **re()** function that will take the expression as an argument."
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [
{
"data": {
"text/plain": [
"[u'to Scrape']"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# expression explanation: find Quotes, a whitespace, anything else\n",
"# return only anything else component\n",
"response.css('h1 a::text').re('Quotes\\s(.*)')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Similarly, we could use RegEx to find and match and return each for of the heading separately as a list element:"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [
{
"data": {
"text/plain": [
"[u'Quotes', u'to', u'Scrape']"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"response.css('h1 a::text').re('(\\S+)\\s(\\S+)\\s(\\S+)')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Perfect, we are done now with **css()** function, let's now implement the same in **xpath()**.\n",
"\n",
"### XPath approach"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [
{
"data": {
"text/plain": [
"[u'<title>Quotes to Scrape</title>']"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"response.xpath('//title').extract()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To get the text only, the following should be added to the Xpath argument: **/text()**"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [
{
"data": {
"text/plain": [
"[u'Quotes to Scrape']"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"response.xpath('//title/text()').extract()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Similarly, we can find the `<a>` tag inside the `<h1>` and extract first text then the link."
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [
{
"data": {
"text/plain": [
"[u'<a href=\"/\" style=\"text-decoration: none\">Quotes to Scrape</a>']"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"response.xpath('//h1/a').extract()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [
{
"data": {
"text/plain": [
"[u'Quotes to Scrape']"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"response.xpath('//h1/a/text()').extract()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**xpath()** function operates in the same way as in the **lxml** package, which means **/@href** should be added to the path to select the value of the **href** attribute (i.e. the link)."
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [
{
"data": {
"text/plain": [
"[u'/']"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"response.xpath('//h1/a/@href').extract()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This is all for Part 1. We just used Scrapy as a library and experienced part its power: **Scrapy** is kind of the combination of whatever we learnt till now. Yet, this is not the only reason **Scrapy** is powerful and demanded. The rest will be covered in following parts.\n",
"\n",
"P.S. If you were using command prompt to run this code, then run **exit()** comand to exit Scrapy. If you want to save your commands before exiting into a Python file, then the following command will be of use:\n",
"``` \n",
"%save my_commands 1-56\n",
"```\n",
"where my_commands is the name of the file to be created (change it based on your taste) and 1-56 tells Python to save the code starting from the line 1 (very begining) and ending with line 56 (put the line that you want here, last one if you want to save whole code)."
]
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

@ -0,0 +1,194 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Scrapy 2: using the Spider\n",
"\n",
"Scrapy is a powerful Python web scraping framework. We will experience its power today by scraping [quotes.toscrape.com](quotes.toscrape.com).\n",
"To understand how Scrapy works, first of all we need to create a Scrapy project. For that purpose go to the command prompt, change it to your usual directory (e.g. Data_scraping folder) and run the following command:\n",
"\n",
"```\n",
"scrapy startproject Quotes\n",
"```\n",
"This command will generate a new folder titled **Quotes** with several files and folders. What you should be interested in now is the folder called spiders inside another folders again titled **Quotes**. This folder includes the scrapers that you use (none for know). AS the scrapers ar eusually getting data from web, they are called spiders. To generate our first spider, change the directory from command prompt to the newly created project folder using the following command:\n",
"```\n",
"cd Quotes\n",
"```\n",
"Afterwards, run the following command to generate a spider based on the default simple sample.\n",
"```\n",
"scrapy genspider QuoteScraper quotes.toscrapte.com\n",
"```\n",
"The third argument is the name of the scraper class, while the last argument provides the overall domain where you may scrape pages from. Once it is done, a QuoteScraper.py file will appear in the abovementioned spiders folder. Open the file and start editing. The initial file will include the general structure, however, the allowed domain and start url variables will be built on the above-provided information (4th variable). Yet, there is nothing inside the defined parse() function. Let's fill it in."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [],
"source": [
"# -*- coding: utf-8 -*-\n",
"import scrapy\n",
"\n",
"\n",
"class QuoteSpider(scrapy.Spider):\n",
" name = \"quote\"\n",
" allowed_domains = [\"quotes.toscrape.com\"]\n",
" start_urls = ['http://quotes.toscrape.com/']\n",
"\n",
" def parse(self, response):\n",
" pass\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"What we want is to get the data (response.body) saved in an HTML file, thus, we add 2 lines of code in the end."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [],
"source": [
"# -*- coding: utf-8 -*-\n",
"import scrapy\n",
"\n",
"\n",
"class QuoteSpider(scrapy.Spider):\n",
" name = \"quote\"\n",
" allowed_domains = [\"quotes.toscrape.com\"]\n",
" start_urls = ['http://quotes.toscrape.com/']\n",
"\n",
" def parse(self, response):\n",
" with open('scraped.html','w') as f:\n",
" f.write(response.body)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"What if you want to scrape two pages and save the body as an HTML file (with a proper filename)? That's again easy, one just needs to do 2 things:\n",
"1. Add both URLs to the start_urls list as done below,\n",
"2. create a filename variable which will take the prelast character of the page name (1 or 2 in our case) and append it to the filename."
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [],
"source": [
"# -*- coding: utf-8 -*-\n",
"import scrapy\n",
"\n",
"class QuoteSpider(scrapy.Spider):\n",
" name = \"quote\"\n",
" allowed_domains = [\"quotes.toscrape.com\"]\n",
" start_urls = ['http://quotes.toscrape.com/page/1/',\n",
" 'http://quotes.toscrape.com/page/2/']\n",
"\n",
" def parse(self, response):\n",
" filename = \"quotes\"+response.url[-2:-1]+\".html\"\n",
" with open(filename,'w') as f:\n",
" f.write(response.body)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"That's cool, but not that much of a scraping yet. We get the page, but not a data of interest. Let's assume one is interested in getting the following data: quote, its author and the keyword tags. The following spyder would help (all same but parse function):"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [],
"source": [
"# -*- coding: utf-8 -*-\n",
"import scrapy\n",
"\n",
"\n",
"class QuoteSpider(scrapy.Spider):\n",
" name = \"quote\"\n",
" allowed_domains = [\"quotes.toscrape.com\"]\n",
" start_urls = ['http://quotes.toscrape.com/page/1/',\n",
" 'http://quotes.toscrape.com/page/2/']\n",
"\n",
" def parse(self, response):\n",
" for quote in response.css('div.quote'):\n",
" yield {\n",
" 'text': quote.css('span.text::text').extract_first(),\n",
" 'author': quote.css('span small.author::text').extract_first(),\n",
" 'tags': quote.css('div.tags a.tag::text').extract(),\n",
" }"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The **yield** keyword above is like the **return**, yet instead of returning a value, its just generating it and forgetting about that (computationally efficient). This is helpful when you want to write some values into a file and forget about them. That's what Scrapy is doing. Once you have this function ready, you can write scraped values into a JSON file by just using hte following command inside the command prompt:\n",
"```\n",
"scrapy crawl quote -o quotes.json\n",
"```\n",
"The output will be a JSON file with scraped data. If you are interested in getting a JSON lines document, then just change the file format from **.json** to **.jl**.\n",
"\n",
"It is important to note that all those changes happened to the QuoteScraper.py file, while there are some other files also generated by Scrapy. One of those is titled **`settings.py`**, which includes information on settings that one can change. The most important components probably are:\n",
"\n",
"- BOT_NAME = 'quotes' - that's the bot name, used to be recognized by websites being scraped,\n",
"- ROBOTSTXT_OBEY = True - tells the spider to obey robots.txt, i.e. not scrape if it is not allowed,\n",
"- DOWNLOAD_DELAY = 3 - provides the number of seconds for sleeping between requests."
]
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

@ -0,0 +1,246 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Scrapy 3: crawling all pages\n",
"\n",
"The last notebook (scrapy 2) provided Scrapy code for scraping a page from quotes.toscrape.com. Yet, there are several other pages on this website that one may need to scrape. Which means, we have to actually create a Spider that do the same scraping tasks for all the URLs, not just one. That can be implemented in several ways, but first of all, let's start a new project and generate a new spider.\n",
"\n",
"To start a new project, open the command prompt (move to the Data_Scraping folder, if you always do so) and run the following command:\n",
"\n",
"`\n",
"scrapy startproject quote_pages\n",
"`\n",
"\n",
"So now move to the newly created folder and generate a new spider (called quote_all) for getting data from quotes.toscrape.com as follows:\n",
"\n",
"`\n",
"cd quote_pages\n",
"scrapy genspider quote_all quotes.toscrape.com\n",
"`\n",
"\n",
"The spider we will create is basically the same we had before (that scraped the same page and yielded a JSON file) just with some small changes. So let's copy the code from out spider and paste it inside the newly generated quote_all.py file."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [],
"source": [
"# -*- coding: utf-8 -*-\n",
"import scrapy\n",
"\n",
"\n",
"class QuoteSpider(scrapy.Spider):\n",
" name = \"quote\"\n",
" allowed_domains = [\"quotes.toscrape.com\"]\n",
" start_urls = ['http://quotes.toscrape.com/page/1/',\n",
" 'http://quotes.toscrape.com/page/2/']\n",
"\n",
" def parse(self, response):\n",
" for quote in response.css('div.quote'):\n",
" yield {\n",
" 'text': quote.css('span.text::text').extract_first(),\n",
" 'author': quote.css('span small.author::text').extract_first(),\n",
" 'tags': quote.css('div.tags a.tag::text').extract(),\n",
" }"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"As you can see the very first (and brutal) approach can be adding the URLs one-by-one to the start_urls list. The good news is that all URLs are quite similar: the only difference is the page number. This means we can construct URLs from three components as follows:\n",
"`\n",
"URL = 'http://quotes.toscrape.com/page/' + '1' + '/'\n",
"`\n",
"where the 2nd component (in this case 1) is the only variable component. If you check manually, you will see that there are 10 pages overall that include quote data. Which means, we can create each separate link using **range()** function and append them to the start_urls empty list as follows:\n",
"\n",
"`\n",
"start_urls = []\n",
"for i in range(1,11):\n",
" URL = 'http://quotes.toscrape.com/page/' + str(i) + '/'\n",
" start_urls.append(URL)\n",
"`\n",
"\n",
"Thus, the overall function after the abovementioned change will look like this (P.S. also, change the **name** variable value as we do not want to have 2 scrapers with the same name):"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [],
"source": [
"# -*- coding: utf-8 -*-\n",
"import scrapy\n",
"\n",
"\n",
"class QuoteSpider(scrapy.Spider):\n",
" name = \"quote_new\"\n",
" allowed_domains = [\"quotes.toscrape.com\"]\n",
" start_urls = []\n",
" for i in range(1,11):\n",
" URL = 'http://quotes.toscrape.com/page/' + str(i) + '/'\n",
" start_urls.append(URL)\n",
"\n",
" def parse(self, response):\n",
" for quote in response.css('div.quote'):\n",
" yield {\n",
" 'text': quote.css('span.text::text').extract_first(),\n",
" 'author': quote.css('span small.author::text').extract_first(),\n",
" 'tags': quote.css('div.tags a.tag::text').extract(),\n",
" }"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The same, of course, could be achieved using a while loop as follows:"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [],
"source": [
"# -*- coding: utf-8 -*-\n",
"import scrapy\n",
"\n",
"\n",
"class QuoteSpider(scrapy.Spider):\n",
" name = \"quote_new\"\n",
" allowed_domains = [\"quotes.toscrape.com\"]\n",
" start_urls = []\n",
" i=1\n",
" while i<11:\n",
" URL = 'http://quotes.toscrape.com/page/' + str(i) + '/'\n",
" start_urls.append(URL)\n",
" i+=1\n",
"\n",
" def parse(self, response):\n",
" for quote in response.css('div.quote'):\n",
" yield {\n",
" 'text': quote.css('span.text::text').extract_first(),\n",
" 'author': quote.css('span small.author::text').extract_first(),\n",
" 'tags': quote.css('div.tags a.tag::text').extract(),\n",
" }"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This approach is easy and user firendly, yet it requires you to know the overall number of pages (10, in our case). A smarter solution would be the one that will not requore you to have this information. If you take an attentive look you will notice that there is a **Next** button on each single page and there is only one page which is missing the **Next** button: the last page. The button includes a hyperlink to each next page. As there is not next page for the last one, there is no next button on it. Which means we can navigate over pages by finding the hyperlink under the next button. It can be found with following code, which is using CSS selectors to find a list item (li) with a class next, then find an `<a>` tag inside the list item and get the value of its **href** attribute:\n",
"\n",
"`\n",
"next_page = response.css('li.next a::attr(href)').extract_first()\n",
"`\n",
"\n",
"If we are on the very first page, the value of the **next_page** guy will be **/page/2/**. Then this will be the absolute link of the 2nd page:\n",
"\n",
"`\n",
"new_link = 'http://quotes/toscrape.com' + next_page\n",
"`\n",
"\n",
"To finalize the code what we need to do is to first check whether there is any next button (any next_page url) and if so, then yield a new request to the new url as follows:\n",
"\n",
"`\n",
"if next_page is not None:\n",
" yield scrapy.Request(new_link)\n",
"`\n",
"\n",
"The code above must be added inside the defined **parse()** function (but outside the for loop). Thus, the full code will look like this."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [],
"source": [
"# -*- coding: utf-8 -*-\n",
"import scrapy\n",
"\n",
"\n",
"class QuoteSpider(scrapy.Spider):\n",
" name = \"quote_new\"\n",
" allowed_domains = [\"quotes.toscrape.com\"]\n",
" start_urls = [\"http://quotes.toscrape.com/\"]\n",
" \n",
" def parse(self, response):\n",
" for quote in response.css('div.quote'):\n",
" yield {\n",
" 'text': quote.css('span.text::text').extract_first(),\n",
" 'author': quote.css('span small.author::text').extract_first(),\n",
" 'tags': quote.css('div.tags a.tag::text').extract(),\n",
" }\n",
" next_page = response.css('li.next a::attr(href)').extract_first()\n",
" new_link = \"http://quotes.toscrape.com\" + next_page\n",
"\n",
" if next_page is not None:\n",
" yield scrapy.Request(new_link)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Excellent! The code above must work, and to check it we can run a command that will generate a JSON file from the scraped data as follows:\n",
"\n",
"`\n",
"scrapy crawl quote_new -o all_page_data.json\n",
"`"
]
}
],
"metadata": {
"interpreter": {
"hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a"
},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

@ -0,0 +1,246 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Scrapy 3: crawling all pages\n",
"\n",
"The last notebook (scrapy 2) provided Scrapy code for scraping a page from quotes.toscrape.com. Yet, there are several other pages on this website that one may need to scrape. Which means, we have to actually create a Spider that do the same scraping tasks for all the URLs, not just one. That can be implemented in several ways, but first of all, let's start a new project and generate a new spider.\n",
"\n",
"To start a new project, open the command prompt (move to the Data_Scraping folder, if you always do so) and run the following command:\n",
"\n",
"`\n",
"scrapy startproject quote_pages\n",
"`\n",
"\n",
"So now move to the newly created folder and generate a new spider (called quote_all) for getting data from quotes.toscrape.com as follows:\n",
"\n",
"`\n",
"cd quote_pages\n",
"scrapy genspider quote_all quotes.toscrape.com\n",
"`\n",
"\n",
"The spider we will create is basically the same we had before (that scraped the same page and yielded a JSON file) just with some small changes. So let's copy the code from out spider and paste it inside the newly generated quote_all.py file."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [],
"source": [
"# -*- coding: utf-8 -*-\n",
"import scrapy\n",
"\n",
"\n",
"class QuoteSpider(scrapy.Spider):\n",
" name = \"quote\"\n",
" allowed_domains = [\"quotes.toscrape.com\"]\n",
" start_urls = ['http://quotes.toscrape.com/page/1/',\n",
" 'http://quotes.toscrape.com/page/2/']\n",
"\n",
" def parse(self, response):\n",
" for quote in response.css('div.quote'):\n",
" yield {\n",
" 'text': quote.css('span.text::text').extract_first(),\n",
" 'author': quote.css('span small.author::text').extract_first(),\n",
" 'tags': quote.css('div.tags a.tag::text').extract(),\n",
" }"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"As you can see the very first (and brutal) approach can be adding the URLs one-by-one to the start_urls list. The good news is that all URLs are quite similar: the only difference is the page number. This means we can construct URLs from three components as follows:\n",
"`\n",
"URL = 'http://quotes.toscrape.com/page/' + '1' + '/'\n",
"`\n",
"where the 2nd component (in this case 1) is the only variable component. If you check manually, you will see that there are 10 pages overall that include quote data. Which means, we can create each separate link using **range()** function and append them to the start_urls empty list as follows:\n",
"\n",
"`\n",
"start_urls = []\n",
"for i in range(1,11):\n",
" URL = 'http://quotes.toscrape.com/page/' + str(i) + '/'\n",
" start_urls.append(URL)\n",
"`\n",
"\n",
"Thus, the overall function after the abovementioned change will look like this (P.S. also, change the **name** variable value as we do not want to have 2 scrapers with the same name):"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [],
"source": [
"# -*- coding: utf-8 -*-\n",
"import scrapy\n",
"\n",
"\n",
"class QuoteSpider(scrapy.Spider):\n",
" name = \"quote_new\"\n",
" allowed_domains = [\"quotes.toscrape.com\"]\n",
" start_urls = []\n",
" for i in range(1,11):\n",
" URL = 'http://quotes.toscrape.com/page/' + str(i) + '/'\n",
" start_urls.append(URL)\n",
"\n",
" def parse(self, response):\n",
" for quote in response.css('div.quote'):\n",
" yield {\n",
" 'text': quote.css('span.text::text').extract_first(),\n",
" 'author': quote.css('span small.author::text').extract_first(),\n",
" 'tags': quote.css('div.tags a.tag::text').extract(),\n",
" }"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The same, of course, could be achieved using a while loop as follows:"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [],
"source": [
"# -*- coding: utf-8 -*-\n",
"import scrapy\n",
"\n",
"\n",
"class QuoteSpider(scrapy.Spider):\n",
" name = \"quote_new\"\n",
" allowed_domains = [\"quotes.toscrape.com\"]\n",
" start_urls = []\n",
" i=1\n",
" while i<11:\n",
" URL = 'http://quotes.toscrape.com/page/' + str(i) + '/'\n",
" start_urls.append(URL)\n",
" i+=1\n",
"\n",
" def parse(self, response):\n",
" for quote in response.css('div.quote'):\n",
" yield {\n",
" 'text': quote.css('span.text::text').extract_first(),\n",
" 'author': quote.css('span small.author::text').extract_first(),\n",
" 'tags': quote.css('div.tags a.tag::text').extract(),\n",
" }"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This approach is easy and user firendly, yet it requires you to know the overall number of pages (10, in our case). A smarter solution would be the one that will not requore you to have this information. If you take an attentive look you will notice that there is a **Next** button on each single page and there is only one page which is missing the **Next** button: the last page. The button includes a hyperlink to each next page. As there is not next page for the last one, there is no next button on it. Which means we can navigate over pages by finding the hyperlink under the next button. It can be found with following code, which is using CSS selectors to find a list item (li) with a class next, then find an `<a>` tag inside the list item and get the value of its **href** attribute:\n",
"\n",
"`\n",
"next_page = response.css('li.next a::attr(href)').extract_first()\n",
"`\n",
"\n",
"If we are on the very first page, the value of the **next_page** guy will be **/page/2/**. Then this will be the absolute link of the 2nd page:\n",
"\n",
"`\n",
"new_link = 'http://quotes/toscrape.com' + next_page\n",
"`\n",
"\n",
"To finalize the code what we need to do is to first check whether there is any next button (any next_page url) and if so, then yield a new request to the new url as follows:\n",
"\n",
"`\n",
"if next_page is not None:\n",
" yield scrapy.Request(new_link)\n",
"`\n",
"\n",
"The code above must be added inside the defined **parse()** function (but outside the for loop). Thus, the full code will look like this."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [],
"source": [
"# -*- coding: utf-8 -*-\n",
"import scrapy\n",
"\n",
"\n",
"class QuoteSpider(scrapy.Spider):\n",
" name = \"quote_new\"\n",
" allowed_domains = [\"quotes.toscrape.com\"]\n",
" start_urls = [\"http://quotes.toscrape.com/\"]\n",
" \n",
" def parse(self, response):\n",
" for quote in response.css('div.quote'):\n",
" yield {\n",
" 'text': quote.css('span.text::text').extract_first(),\n",
" 'author': quote.css('span small.author::text').extract_first(),\n",
" 'tags': quote.css('div.tags a.tag::text').extract(),\n",
" }\n",
" next_page = response.css('li.next a::attr(href)').extract_first()\n",
" new_link = \"http://quotes.toscrape.com\" + next_page\n",
"\n",
" if next_page is not None:\n",
" yield scrapy.Request(new_link)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Excellent! The code above must work, and to check it we can run a command that will generate a JSON file from the scraped data as follows:\n",
"\n",
"`\n",
"scrapy crawl quote_new -o all_page_data.json\n",
"`"
]
}
],
"metadata": {
"interpreter": {
"hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a"
},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

@ -0,0 +1,405 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Selenium 1\n",
"\n",
"The libraries covered beofre (BeautifulSoup, lxml, Scrapy) provide user friendly interface for getting data from the web using the HTML source of a page. Yet, sometimes the HTML source is not directly available: it can be created as an output of a function (usually generated by JavaScript) which is toggled by a user input. For example, the data on a page can be generated by ckicling a button on a page or filling in a form or choosing a vlue from a filter. A simple request to a URL will not provide the data, as no user interaction has taken place. In this case, one should write a Python code that will act as a webbrowser. There are many libraries that provide this functionality, but we will concetrate on one of the most popular among them called [**Selenium**](http://selenium-python.readthedocs.io/index.html). First of all, you need to ahve it installed by running the following command in the command prompt:\n",
"\n",
"`\n",
"pip install selenium\n",
"`\n",
"\n",
"Once **selenium** is installed you need to download the webdriver of your browser to your local directory. For example, if your notebook is inside the **Data_Scraping** folder, and your are using the Chrome/Firefox webbrowser, then you may download the drivers from here:\n",
"\n",
"- [Chrome driver](https://sites.google.com/a/chromium.org/chromedriver/downloads)\n",
"- [Firefox driver](https://github.com/mozilla/geckodriver/releases)\n",
"\n",
"Alright, you are now ready to move to the code. Let's write an algorithm that will open the Chrome browser, go to the [www.inventwithpython.com](http://inventwithpython.com/), find a hypterlink titled \"**Read It Online**\" (find it using the text directly) and click on it."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [],
"source": [
"import sys\n",
"!{sys.executable} -m pip install selenium\n",
"\n",
"from selenium import webdriver\n",
"from selenium.webdriver.chrome.options import Options\n",
"options = Options()\n",
"options.add_argument(\"--no-sandbox\")\n",
"options.add_argument(\"--disable-dev-shm-usage\")\n",
"options.binary_location = \"/usr/bin/google-chrome-stable\"\n",
"# change Chrome() below with Firefox(), if the latter is the driver you decided to use\n",
"browser = webdriver.Chrome(options = options, executable_path=\"/home/jovyan/chromedriver\")\n",
"url = 'http://inventwithpython.com'\n",
"browser.get(url)\n",
"our_element = browser.find_element_by_link_text('Read It Online')\n",
"type(our_element)\n",
"our_element.click() # follows the \"Read It Online\" link"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from selenium import webdriver\n",
"from selenium.webdriver.chrome.options import Options\n",
"options = Options()\n",
"options.add_argument(\"--no-sandbox\")\n",
"options.add_argument(\"--disable-dev-shm-usage\")\n",
"options.binary_location = \"/usr/bin/google-chrome-stable\"\n",
"# driver = webdriver.Chrome(chrome_options = options, executable_path=r'C:\\path\\to\\chromedriver.exe')\n",
"options.add_argument(\"--remote-debugging-port=9222\")\n",
"options.headless = True\n",
"driver = webdriver.Chrome(options = options, executable_path=\"/home/jovyan/chromedriver\")\n",
"driver.get(\"https://www.hotstar.com\")\n",
"driver.find_element_by_id(\"searchField\").send_keys(\"Movies\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"browser.close()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's do a similar task for yahoo. These are the steps to take:\n",
"- open the Chrome browser,\n",
"- go to the yahoo mail login page,\n",
"- find the username form and then fill it in with your e-mail address,\n",
"- click on the next button,\n",
"- find the password form and then fill it in with your password,\n",
"- click submit."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from selenium import webdriver\n",
"browser = webdriver.Chrome()\n",
"browser.get('https://mail.yahoo.com')\n",
"email_element = browser.find_element_by_id('login-username')\n",
"email_element.send_keys('hrantdavtyan@yahoo.com')\n",
"next_button_element = browser.find_element_by_id('login-signin')\n",
"next_button_element.click()\n",
"password_element = browser.find_element_by_id('login-passwd')\n",
"password_element.send_keys('my_password')\n",
"password_element.submit()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"browser.close()"
]
},
{
"cell_type": "markdown",
"metadata": {
"jp-MarkdownHeadingCollapsed": true,
"tags": []
},
"source": [
"## CheatSheet - *sourced from the \"Automate boring staff with Python\" book, chapter 11*\n",
"\n",
"\n",
"### Table 1: Selenium’s WebDriver Methods for Finding Elements\n",
"\n",
"<table summary=\"Selenium’s WebDriver Methods for Finding Elements\" class=\"calibre9\">\n",
"<colgroup class=\"calibre10\">\n",
"<col class=\"calibre11\">\n",
"<col class=\"calibre11\">\n",
"</colgroup>\n",
"<thead class=\"calibre12\">\n",
"<tr class=\"calibre13\">\n",
"<th valign=\"top\" class=\"calibre14\">\n",
"<p class=\"calibre4\"><a id=\"calibre_link-305\" class=\"calibre1\"></a><a id=\"calibre_link-323\" class=\"calibre1\"></a><a id=\"calibre_link-385\" class=\"calibre1\"></a><a id=\"calibre_link-735\" class=\"calibre1\"></a><a id=\"calibre_link-902\" class=\"calibre1\"></a><a id=\"calibre_link-903\" class=\"calibre1\"></a><a id=\"calibre_link-906\" class=\"calibre1\"></a><a id=\"calibre_link-1016\" class=\"calibre1\"></a><a id=\"calibre_link-1551\" class=\"calibre1\"></a><a id=\"calibre_link-1693\" class=\"calibre1\"></a>Method name</p>\n",
"</th>\n",
"<th valign=\"top\" class=\"calibre15\">\n",
"<p class=\"calibre4\">WebElement object/list returned</p>\n",
"</th>\n",
"</tr>\n",
"</thead>\n",
"<tbody class=\"calibre16\">\n",
"<tr class=\"calibre13\">\n",
"<td valign=\"top\" class=\"calibre17\"><a id=\"calibre_link-2988\" class=\"calibre1\"></a>\n",
"<pre class=\"programlisting2\">browser.find_element_by_class_name(<span class=\"calibre1\"><em class=\"literal3\">name</em></span>)\n",
"browser.find_elements_by_class_name(<span class=\"calibre1\"><em class=\"literal3\">name</em></span>)</pre>\n",
"</td>\n",
"<td valign=\"top\" class=\"calibre18\">\n",
"<p class=\"calibre4\">Elements that use the CSS class <span class=\"calibre1\"><em class=\"calibre5\"><code class=\"literal4\">name</code></em></span></p>\n",
"</td>\n",
"</tr>\n",
"<tr class=\"calibre19\">\n",
"<td valign=\"top\" class=\"calibre17\"><a id=\"calibre_link-2989\" class=\"calibre1\"></a>\n",
"<pre class=\"programlisting2\">browser.find_element_by_css_selector(<span class=\"calibre1\"><em class=\"literal3\">selector</em></span>)\n",
"browser.find_elements_by_css_selector(<span class=\"calibre1\"><em class=\"literal3\">selector</em></span>)</pre>\n",
"</td>\n",
"<td valign=\"top\" class=\"calibre18\">\n",
"<p class=\"calibre4\">Elements that match the CSS <span class=\"calibre1\"><em class=\"calibre5\"><code class=\"literal4\">selector</code></em></span></p>\n",
"</td>\n",
"</tr>\n",
"<tr class=\"calibre13\">\n",
"<td valign=\"top\" class=\"calibre17\"><a id=\"calibre_link-2990\" class=\"calibre1\"></a>\n",
"<pre class=\"programlisting2\">browser.find_element_by_id(<span class=\"calibre1\"><em class=\"literal3\">id</em></span>)\n",
"browser.find_elements_by_id(<span class=\"calibre1\"><em class=\"literal3\">id</em></span>)</pre>\n",
"</td>\n",
"<td valign=\"top\" class=\"calibre18\">\n",
"<p class=\"calibre4\">Elements with a matching <span class=\"calibre1\"><em class=\"calibre5\"><code class=\"literal4\">id</code></em></span> attribute value</p>\n",
"</td>\n",
"</tr>\n",
"<tr class=\"calibre19\">\n",
"<td valign=\"top\" class=\"calibre17\"><a id=\"calibre_link-2991\" class=\"calibre1\"></a>\n",
"<pre class=\"programlisting2\">browser.find_element_by_link_text(<span class=\"calibre1\"><em class=\"literal3\">text</em></span>)\n",
"browser.find_elements_by_link_text(<span class=\"calibre1\"><em class=\"literal3\">text</em></span>)</pre>\n",
"</td>\n",
"<td valign=\"top\" class=\"calibre18\">\n",
"<p class=\"calibre4\"><code class=\"literal2\">&lt;a&gt;</code> elements that completely match the <span class=\"calibre1\"><em class=\"calibre5\"><code class=\"literal4\">text</code></em></span> provided</p>\n",
"</td>\n",
"</tr>\n",
"<tr class=\"calibre13\">\n",
"<td valign=\"top\" class=\"calibre17\"><a id=\"calibre_link-2992\" class=\"calibre1\"></a>\n",
"<pre class=\"programlisting2\">browser.find_element_by_partial_link_text(<span class=\"calibre1\"><em class=\"literal3\">text</em></span>)\n",
"browser.find_elements_by_partial_link_text(<span class=\"calibre1\"><em class=\"literal3\">text</em></span>)</pre>\n",
"</td>\n",
"<td valign=\"top\" class=\"calibre18\">\n",
"<p class=\"calibre4\"><code class=\"literal2\">&lt;a&gt;</code> elements that contain the <span class=\"calibre1\"><em class=\"calibre5\"><code class=\"literal4\">text</code></em></span> provided</p>\n",
"</td>\n",
"</tr>\n",
"<tr class=\"calibre19\">\n",
"<td valign=\"top\" class=\"calibre17\"><a id=\"calibre_link-2993\" class=\"calibre1\"></a>\n",
"<pre class=\"programlisting2\">browser.find_element_by_name(<span class=\"calibre1\"><em class=\"literal3\">name</em></span>)\n",
"browser.find_elements_by_name(<span class=\"calibre1\"><em class=\"literal3\">name</em></span>)</pre>\n",
"</td>\n",
"<td valign=\"top\" class=\"calibre18\">\n",
"<p class=\"calibre4\">Elements with a matching <span class=\"calibre1\"><em class=\"calibre5\"><code class=\"literal4\">name</code></em></span> attribute value</p>\n",
"</td>\n",
"</tr>\n",
"<tr class=\"calibre13\">\n",
"<td valign=\"top\" class=\"calibre20\"><a id=\"calibre_link-2994\" class=\"calibre1\"></a>\n",
"<pre class=\"programlisting2\">browser.find_element_by_tag_name(<span class=\"calibre1\"><em class=\"literal3\">name</em></span>)\n",
"browser.find_elements_by_tag_name(<span class=\"calibre1\"><em class=\"literal3\">name</em></span>)</pre>\n",
"</td>\n",
"<td valign=\"top\" class=\"calibre21\">\n",
"<p class=\"calibre4\">Elements with a matching tag <span class=\"calibre1\"><em class=\"calibre5\"><code class=\"literal4\">name</code></em></span> (case insensitive; an <code class=\"literal2\">&lt;a&gt;</code> element is matched by <code class=\"literal2\">'a'</code> and <code class=\"literal2\">'A'</code>)</p>\n",
"</td>\n",
"</tr>\n",
"</tbody>\n",
"</table>\n",
"\n",
"### Table 2: WebElement Attributes and Methods\n",
"\n",
"<table summary=\"WebElement Attributes and Methods\" class=\"calibre9\">\n",
"<colgroup class=\"calibre10\">\n",
"<col class=\"calibre11\">\n",
"<col class=\"calibre11\">\n",
"</colgroup>\n",
"<thead class=\"calibre12\">\n",
"<tr class=\"calibre13\">\n",
"<th valign=\"top\" class=\"calibre14\">\n",
"<p class=\"calibre4\">Attribute or method</p>\n",
"</th>\n",
"<th valign=\"top\" class=\"calibre15\">\n",
"<p class=\"calibre4\">Description</p>\n",
"</th>\n",
"</tr>\n",
"</thead>\n",
"<tbody class=\"calibre16\">\n",
"<tr class=\"calibre13\">\n",
"<td valign=\"top\" class=\"calibre17\">\n",
"<p class=\"calibre4\"><code class=\"literal2\">tag_name</code></p>\n",
"</td>\n",
"<td valign=\"top\" class=\"calibre18\">\n",
"<p class=\"calibre4\">The tag name, such as <code class=\"literal2\">'a'</code> for an <code class=\"literal2\">&lt;a&gt;</code> element</p>\n",
"</td>\n",
"</tr>\n",
"<tr class=\"calibre19\">\n",
"<td valign=\"top\" class=\"calibre17\">\n",
"<p class=\"calibre4\"><code class=\"literal2\">get_attribute(</code><span class=\"calibre1\"><em class=\"calibre5\"><code class=\"literal4\">name</code></em></span><code class=\"literal2\">)</code></p>\n",
"</td>\n",
"<td valign=\"top\" class=\"calibre18\">\n",
"<p class=\"calibre4\">The value for the element’s <code class=\"literal2\">name</code> attribute</p>\n",
"</td>\n",
"</tr>\n",
"<tr class=\"calibre13\">\n",
"<td valign=\"top\" class=\"calibre17\">\n",
"<p class=\"calibre4\"><code class=\"literal2\">text</code></p>\n",
"</td>\n",
"<td valign=\"top\" class=\"calibre18\">\n",
"<p class=\"calibre4\">The text within the element, such as <code class=\"literal2\">'hello'</code> in <code class=\"literal2\">&lt;span&gt;hello&lt;/span&gt;</code></p>\n",
"</td>\n",
"</tr>\n",
"<tr class=\"calibre19\">\n",
"<td valign=\"top\" class=\"calibre17\">\n",
"<p class=\"calibre4\"><code class=\"literal2\">clear()</code></p>\n",
"</td>\n",
"<td valign=\"top\" class=\"calibre18\">\n",
"<p class=\"calibre4\">For text field or text area elements, clears the text typed into it</p>\n",
"</td>\n",
"</tr>\n",
"<tr class=\"calibre13\">\n",
"<td valign=\"top\" class=\"calibre17\">\n",
"<p class=\"calibre4\"><code class=\"literal2\">is_displayed()</code></p>\n",
"</td>\n",
"<td valign=\"top\" class=\"calibre18\">\n",
"<p class=\"calibre4\">Returns <code class=\"literal2\">True</code> if the element is visible; otherwise returns <code class=\"literal2\">False</code></p>\n",
"</td>\n",
"</tr>\n",
"<tr class=\"calibre19\">\n",
"<td valign=\"top\" class=\"calibre17\">\n",
"<p class=\"calibre4\"><code class=\"literal2\">is_enabled()</code></p>\n",
"</td>\n",
"<td valign=\"top\" class=\"calibre18\">\n",
"<p class=\"calibre4\">For input elements, returns <code class=\"literal2\">True</code> if the element is enabled; otherwise returns <code class=\"literal2\">False</code></p>\n",
"</td>\n",
"</tr>\n",
"<tr class=\"calibre13\">\n",
"<td valign=\"top\" class=\"calibre17\">\n",
"<p class=\"calibre4\"><code class=\"literal2\">is_selected()</code></p>\n",
"</td>\n",
"<td valign=\"top\" class=\"calibre18\">\n",
"<p class=\"calibre4\">For checkbox or radio button elements, returns <code class=\"literal2\">True</code> if the element is selected; otherwise returns <code class=\"literal2\">False</code></p>\n",
"</td>\n",
"</tr>\n",
"<tr class=\"calibre19\">\n",
"<td valign=\"top\" class=\"calibre20\">\n",
"<p class=\"calibre4\"><code class=\"literal2\">location</code></p>\n",
"</td>\n",
"<td valign=\"top\" class=\"calibre21\">\n",
"<p class=\"calibre4\">A dictionary with keys <code class=\"literal2\">'x'</code> and <code class=\"literal2\">'y'</code> for the position of the element in the page</p>\n",
"</td>\n",
"</tr>\n",
"</tbody>\n",
"</table>\n",
"\n",
"### Table 3: Commonly Used Variables in the selenium.webdriver.common.keys Module\n",
"\n",
"<table summary=\"Commonly Used Variables in the selenium.webdriver.common.keys Module\" class=\"calibre9\">\n",
"<colgroup class=\"calibre10\">\n",
"<col class=\"calibre11\">\n",
"<col class=\"calibre11\">\n",
"</colgroup>\n",
"<thead class=\"calibre12\">\n",
"<tr class=\"calibre13\">\n",
"<th valign=\"top\" class=\"calibre14\">\n",
"<p class=\"calibre4\">Attributes</p>\n",
"</th>\n",
"<th valign=\"top\" class=\"calibre15\">\n",
"<p class=\"calibre4\">Meanings</p>\n",
"</th>\n",
"</tr>\n",
"</thead>\n",
"<tbody class=\"calibre16\">\n",
"<tr class=\"calibre13\">\n",
"<td valign=\"top\" class=\"calibre17\">\n",
"<p class=\"calibre4\"><code class=\"literal2\">Keys.DOWN</code>, <code class=\"literal2\">Keys.UP</code>, <code class=\"literal2\">Keys.LEFT</code>, <code class=\"literal2\">Keys.RIGHT</code></p>\n",
"</td>\n",
"<td valign=\"top\" class=\"calibre18\">\n",
"<p class=\"calibre4\">The keyboard arrow keys</p>\n",
"</td>\n",
"</tr>\n",
"<tr class=\"calibre19\">\n",
"<td valign=\"top\" class=\"calibre17\">\n",
"<p class=\"calibre4\"><code class=\"literal2\">Keys.ENTER</code>, <code class=\"literal2\">Keys.RETURN</code></p>\n",
"</td>\n",
"<td valign=\"top\" class=\"calibre18\">\n",
"<p class=\"calibre4\">The <span class=\"smaller\">ENTER</span> and <span class=\"smaller\">RETURN</span> keys</p>\n",
"</td>\n",
"</tr>\n",
"<tr class=\"calibre13\">\n",
"<td valign=\"top\" class=\"calibre17\">\n",
"<p class=\"calibre4\"><code class=\"literal2\">Keys.HOME</code>, <code class=\"literal2\">Keys.END</code>, <code class=\"literal2\">Keys.PAGE_DOWN</code>, <code class=\"literal2\">Keys.PAGE_UP</code></p>\n",
"</td>\n",
"<td valign=\"top\" class=\"calibre18\">\n",
"<p class=\"calibre4\">The <code class=\"literal2\">home</code>, <code class=\"literal2\">end</code>, <code class=\"literal2\">pagedown</code>, and <code class=\"literal2\">pageup</code> keys</p>\n",
"</td>\n",
"</tr>\n",
"<tr class=\"calibre19\">\n",
"<td valign=\"top\" class=\"calibre17\">\n",
"<p class=\"calibre4\"><code class=\"literal2\">Keys.ESCAPE</code>, <code class=\"literal2\">Keys.BACK_SPACE</code>, <code class=\"literal2\">Keys.DELETE</code></p>\n",
"</td>\n",
"<td valign=\"top\" class=\"calibre18\">\n",
"<p class=\"calibre4\">The <span class=\"smaller\">ESC</span>, <span class=\"smaller\">BACKSPACE</span>, and <span class=\"smaller\">DELETE</span> keys</p>\n",
"</td>\n",
"</tr>\n",
"<tr class=\"calibre13\">\n",
"<td valign=\"top\" class=\"calibre17\">\n",
"<p class=\"calibre4\"><code class=\"literal2\">Keys.F1</code>, <code class=\"literal2\">Keys.F2</code>,..., <code class=\"literal2\">Keys.F12</code></p>\n",
"</td>\n",
"<td valign=\"top\" class=\"calibre18\">\n",
"<p class=\"calibre4\">The F1 to F12 keys at the top of the keyboard</p>\n",
"</td>\n",
"</tr>\n",
"<tr class=\"calibre19\">\n",
"<td valign=\"top\" class=\"calibre20\">\n",
"<p class=\"calibre4\"><code class=\"literal2\">Keys.TAB</code></p>\n",
"</td>\n",
"<td valign=\"top\" class=\"calibre21\">\n",
"<p class=\"calibre4\">The <span class=\"smaller\">TAB</span> key</p>\n",
"</td>\n",
"</tr>\n",
"</tbody>\n",
"</table>\n",
"\n",
"\n",
"### Table 4: Methods for Clicking Browser Buttons\n",
"\n",
"|Method name|Description|\n",
"|-|-|\n",
"|browser.back()| Clicks the Back button.\n",
"|browser.forward()| Clicks the Forward button.\n",
"|browser.refresh()| Clicks the Refresh/Reload button.\n",
"|browser.quit()| Clicks the Close Window button."
]
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

@ -0,0 +1,221 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Selenium 2 - Submitting pin to roadpolice.am"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [],
"source": [
"from selenium import webdriver\n",
"browser = webdriver.Chrome()\n",
"url = \"https://offense.roadpolice.am/violation\"\n",
"browser.get(url)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"pin_code = \"173679JNYEJM\""
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [],
"source": [
"input_form = browser.find_element_by_xpath('//*[@id=\"pin\"]')\n",
"input_form.send_keys(pin_code)\n",
"submit_button = browser.find_element_by_tag_name(\"button\")\n",
"submit_button.click()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [],
"source": [
"js = '''html = document.getElementsByTagName('html')[0];\n",
" return html.outerHTML;'''\n",
"html = browser.execute_script(js).encode('utf-8')"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [],
"source": [
"with open(\"page_source.html\",\"w\") as f:\n",
" f.write(html)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [],
"source": [
"with open(\"page_source.html\",\"w\") as f:\n",
" f.write(browser.page_source.encode('utf-8'))"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"11 հնս. 2017թ. Ժամը` 10:31\n"
]
}
],
"source": [
"date = browser.find_element_by_xpath('//*[@id=\"main_data\"]/tbody/tr[6]/td[3]')\n",
"print(date.text)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"5000 (հինգ հազար) դրամ (Վճարել առցանց):\n"
]
}
],
"source": [
"amount = browser.find_element_by_css_selector(\"li b\").text\n",
"print(amount)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [],
"source": [
"import re"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"5000\n"
]
}
],
"source": [
"drams = re.findall(\"[0-9]+\",amount)\n",
"print(drams[0])"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [],
"source": [
"browser.close()"
]
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

@ -0,0 +1,64 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "47d75092",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" <iframe\n",
" width=\"800\"\n",
" height=\"450\"\n",
" src=\"https://arxiv.org/pdf/1406.2661.pdf\"\n",
" frameborder=\"0\"\n",
" allowfullscreen\n",
" \n",
" ></iframe>\n",
" "
],
"text/plain": [
"<IPython.lib.display.IFrame at 0x7ffac85f9d90>"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from IPython.display import IFrame\n",
"IFrame('https://arxiv.org/pdf/1406.2661.pdf', width=800, height=450)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.9.13 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
},
"vscode": {
"interpreter": {
"hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}

Binary file not shown.

File diff suppressed because one or more lines are too long

@ -0,0 +1,265 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 138,
"id": "dca88210-18db-45ee-9949-7010b92dbae7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/home/jovyan\n"
]
}
],
"source": [
"import json\n",
"\n",
"# from json import JSONEncoder\n",
"import os\n",
"import pprint\n",
"\n",
"from gql import Client, gql\n",
"from gql.transport.requests import RequestsHTTPTransport\n",
"\n",
"cwd = os.getcwd()\n",
"print(cwd)"
]
},
{
"cell_type": "code",
"execution_count": 139,
"id": "7374f89e-23f8-407d-967b-c76e7d6d93a2",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"with open(\"scrubbed_records.json\") as f:\n",
" scrubbed_records = json.load(f)\n",
"# with open(\"keys.json\") as key1:\n",
"# keys = json.load(key1);\n",
"\n",
"# print(scrubbed_records[0])\n",
"\n",
"# d8 = keys | scrubbed_records\n",
"# print(d8)\n",
"# merged = {key: value for (key, value) in (keys.items() + scrubbed_records.items())}\n",
"# jsonString_merged ="
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f0367eee-e052-4184-b016-8ecb0dd1aa32",
"metadata": {
"tags": []
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 140,
"id": "4f03fbfe-147b-4011-b71f-b9031810aa44",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# print(jsonString_merged)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b2983b27-6169-4a16-bc70-adfe9b6023b0",
"metadata": {},
"outputs": [],
"source": [
"sample_transport = RequestsHTTPTransport(\n",
" url=\"http://directus:8055/graphql\",\n",
" use_json=True,\n",
" headers={\n",
" \"Content-type\": \"application/json\",\n",
" \"Authorization\": \"os.env(directus_token)\",\n",
" },\n",
" verify=True,\n",
" retries=3,\n",
")\n",
"\n",
"client = Client(\n",
" transport=sample_transport,\n",
" fetch_schema_from_transport=True,\n",
")\n",
"\n",
"job_posts = gql(\n",
" \"\"\"\n",
"mutation{\n",
" update_job_posts_item(id: ______, data: ______){\n",
" id\n",
" status\n",
" sort\n",
" date_created\n",
" date_updated\n",
" json_object\n",
" }\n",
"}\n",
"\"\"\"\n",
")\n",
"\n",
"\n",
"y = json.dumps(scrubbed_records)\n",
"for x in y:\n",
"\n",
" # ef = x.loads()\n",
" # print(x)\n",
" # id = x[\"article_attributes_data-listing-version-key\"]\n",
" # pairs = y.items()\n",
" pprint.pprint(x)\n",
"# print(y)\n",
"# id = y[\"article_attributes_data-listing-version-key\"]\n",
"# print(id)\n",
"# data = y[scrubbed_record]\n",
"# print(scrubbed_record)\n",
"\n",
"\n",
"# Mutation ($data: JSON!) {\n",
"# update_job_blocks_item(\n",
"# id: \"ziprecruiter\"\n",
"# data: {\n",
"# title: $title\n",
"# }\n",
"# )\n",
"# }\n",
"\n",
"# title = 'article_attributes_data-posted-on'\n",
"# \"article_attributes_data-posted-on\": \"Capital One\",\n",
"# \"article_attributes_data-listing-version-key\": \"DwJbw6IxmCHCl9sUWenVkQ.--M7HviTGj7\",\n",
"# \"article_attributes_data-job-id\": \"tmp_dc_capitalone_tier2_perengo_cpc9e35f5e59e35f5e5-lzqj0cb\",\n",
"# \"article_attributes_data-location\": \"Royse City, TX\",\n",
"# \"article_attributes_data-lat\": 32.9751205444,\n",
"# \"article_attributes_data-lng\": -96.3324813843,\n",
"\n",
"# print(title)\n",
"\n",
"# params = { \"title\": article_attributes_data-posted-on }\n",
"\n",
"# result = client.execute(mutate, variable_values=params)"
]
},
{
"cell_type": "code",
"execution_count": 136,
"id": "28c500a2-becf-4d86-a88d-c1f30b3a5e99",
"metadata": {},
"outputs": [],
"source": [
"import pprint\n",
"\n",
"# print(result)\n",
"\n",
"# query = gql('''\n",
"# query {\n",
"# json_server_by_id(id: \"ziprecruiter\") {\n",
"# data\n",
"# }\n",
"# }\n",
"# ''')\n",
"\n",
"\n",
"# mutate = gql(\n",
"# \"\"\"\n",
"# mutation ($data: JSON!) {\n",
"# update_json_server_item(\n",
"# id: \"ziprecruiter\"\n",
"# data: {\n",
"# data: $data\n",
"# }\n",
"# ) {\n",
"# data\n",
"# }\n",
"# }\n",
"# \"\"\"\n",
"# )\n",
"\n",
"# add = gql(\n",
"# \"\"\"\n",
"# mutation ($create_test_data: [create_test_input!]) {\n",
"# create_test_items( data: $create_test_data ) {\n",
"# id\n",
"# json_object\n",
"# }\n",
"# }\n",
"# \"\"\"\n",
"# )\n",
"# init = gql(\n",
"# \"\"\"\n",
"# mutation ($collection: String!, $data: !) {\n",
"# create_fields_item(collection: \"job_posts\", data: { field: \"title\", type: \"string\" ) {\n",
"# collection\n",
"# field\n",
"# }\n",
"# }\n",
"# \"\"\"\n",
"# )\n",
"\n",
"# test3 = { \"field\": \"title\", type: \"string\", \"meta\": { \"icon\": \"title\" }, \"schema\": { \"default_value\": \"null\" } }\n",
"\n",
"# params = {\"collection\": \"job_posts\", \"data\": test3}\n",
"\n",
"# result = client.execute(init, variable_values=params)\n",
"# # print(result)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2a52af9c-4127-4dbf-bac4-20086433fc80",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "f2b300a9-f7c0-4863-9696-29e92e6cedc8",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
},
"toc-autonumbering": false,
"toc-showcode": false,
"toc-showtags": false,
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"state": {},
"version_major": 2,
"version_minor": 0
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because it is too large Load Diff

Binary file not shown.

After

Width:  |  Height:  |  Size: 56 KiB

@ -0,0 +1,325 @@
<div>
<div class="
jobs-details__main-content jobs-details__main-content--single-pane full-width
">
<!---->
<!---->
<div>
<div class="jobs-unified-top-card t-14 ">
<div class="jobs-unified-top-card__buttons-container">
<div class="display-flex flex-column">
<div class="display-flex justify-flex-end">
<div id="ember189" class="artdeco-dropdown artdeco-dropdown--placement-bottom artdeco-dropdown--justification-right ember-view">
<button aria-expanded="false" id="ember190" class="social-share__dropdown-trigger artdeco-button artdeco-button--3 artdeco-button--tertiary artdeco-button--circle artdeco-button--muted artdeco-dropdown__trigger artdeco-dropdown__trigger--placement-bottom ember-view" type="button" tabindex="0">
<li-icon aria-hidden="true" type="share-linkedin-icon" class="artdeco-button__icon" size="medium"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" data-supported-dps="24x24" fill="currentColor" class="mercado-match" width="24" height="24" focusable="false">
<path d="M23 12l-4.61 7H16l4-6H8a3.92 3.92 0 00-4 3.84V17a4 4 0 00.19 1.24L5.12 21H3l-.73-2.22A6.4 6.4 0 012 16.94 6 6 0 018 11h12l-4-6h2.39z"></path>
</svg></li-icon>
<span class="artdeco-button__text">Share</span>
<!----></button>
<div tabindex="-1" aria-hidden="true" id="ember191" class="social-share__content text-align-left artdeco-dropdown__content artdeco-dropdown--is-dropdown-element artdeco-dropdown__content--has-arrow artdeco-dropdown__content--arrow-right artdeco-dropdown__content--justification-right artdeco-dropdown__content--placement-bottom ember-view" role="group"><!----></div>
</div>
<div>
<!---->
</div>
<div id="ember192" class="artdeco-dropdown jobs-options artdeco-dropdown--placement-bottom artdeco-dropdown--justification-right ember-view">
<button aria-expanded="false" id="ember193" class="artdeco-button artdeco-button--3 artdeco-button--tertiary artdeco-button--muted artdeco-button--circle artdeco-dropdown__trigger artdeco-dropdown__trigger--placement-bottom ember-view" type="button" tabindex="0">
<li-icon aria-hidden="true" type="ellipsis-horizontal-icon" class="artdeco-button__icon"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" data-supported-dps="24x24" fill="currentColor" class="mercado-match" width="24" height="24" focusable="false">
<path d="M14 12a2 2 0 11-2-2 2 2 0 012 2zM4 10a2 2 0 102 2 2 2 0 00-2-2zm16 0a2 2 0 102 2 2 2 0 00-2-2z"></path>
</svg></li-icon>
<span class="artdeco-button__text">
Show more options
</span>
<!----></button>
<div tabindex="-1" aria-hidden="true" id="ember194" class="artdeco-dropdown__content artdeco-dropdown--is-dropdown-element artdeco-dropdown__content--has-arrow artdeco-dropdown__content--arrow-right artdeco-dropdown__content--justification-right artdeco-dropdown__content--placement-bottom ember-view"><!----></div>
</div>
</div>
<!----> </div>
</div>
<div class="jobs-unified-top-card__content--two-pane">
<!---->
<a href="/jobs/view/2752821440/?alternateChannel=search&amp;refId=UFjk24M%2BLKGhBAY9aiOczQ%3D%3D&amp;trackingId=W%2B8C570N6B%2FHsgwqCw%2FOxg%3D%3D" id="ember195" class="ember-view">
<h2 class="t-24 t-bold">Senior/Lead DevOps Engineer</h2>
</a>
<div class="mt2">
<span class="jobs-unified-top-card__subtitle-primary-grouping mr2 t-black">
<span>
<a href="/company/glocomms/life/" id="ember196" class="ember-view t-black t-normal">
Glocomms
</a>
</span>
<span class="jobs-unified-top-card__bullet">
Mountain View, CA
</span>
<span class="jobs-unified-top-card__workplace-type">Remote</span>
</span>
<span class="jobs-unified-top-card__subtitle-secondary-grouping t-black--light">
<span class="jobs-unified-top-card__posted-date
jobs-unified-top-card__posted-date--new t-bold">
6 hours ago
</span>
<span class="jobs-unified-top-card__bullet">
<span class="jobs-unified-top-card__applicant-count
">
10 applicants
</span>
</span>
</span>
</div>
<div class="mt5 mb2">
<div class="jobs-unified-top-card__job-insight">
<div class="flex-shrink-zero mr2 t-black--light">
<div class="ivm-image-view-model ">
<div class="ivm-view-attr__img-wrapper ivm-view-attr__img-wrapper--use-img-tag display-flex
">
<li-icon aria-hidden="true" type="briefcase-icon" size="large"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" data-supported-dps="24x24" fill="currentColor" class="mercado-match" width="24" height="24" focusable="false">
<path d="M17 6V5a3 3 0 00-3-3h-4a3 3 0 00-3 3v1H2v4a3 3 0 003 3h14a3 3 0 003-3V6zM9 5a1 1 0 011-1h4a1 1 0 011 1v1H9zm10 9a4 4 0 003-1.38V17a3 3 0 01-3 3H5a3 3 0 01-3-3v-4.38A4 4 0 005 14z"></path>
</svg></li-icon>
</div>
</div>
</div>
<span>
<a class="app-aware-link" target="_self" href="#SALARY"><!---->$160,000/yr - $220,000/yr<!----></a><span class="white-space-pre"> </span>· Full-time · Mid-Senior level<!---->
</span>
</div>
<div class="jobs-unified-top-card__job-insight">
<div class="flex-shrink-zero mr2 t-black--light">
<div class="ivm-image-view-model ">
<div class="ivm-view-attr__img-wrapper ivm-view-attr__img-wrapper--use-img-tag display-flex
">
<li-icon aria-hidden="true" type="company-icon" size="large"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" data-supported-dps="24x24" fill="currentColor" class="mercado-match" width="24" height="24" focusable="false">
<path d="M4 2v20h16V2zm14 18h-4v-2h-4v2H6V4h12zm-7-8H8v-2h3zm0 4H8v-2h3zm5-4h-3v-2h3zm-5-4H8V6h3zm5 0h-3V6h3zm0 8h-3v-2h3z"></path>
</svg></li-icon>
</div>
</div>
</div>
<span>
<!---->51-200 employees · Staffing &amp; Recruiting<!---->
</span>
</div>
<!----> <div class="jobs-unified-top-card__job-insight">
<div class="flex-shrink-zero mr2 t-black--light">
<div class="ivm-image-view-model ">
<div class="ivm-view-attr__img-wrapper ivm-view-attr__img-wrapper--use-img-tag display-flex
">
<!----> <img width="32" src="https://media-exp1.licdn.com/dms/image/C4E35AQEIVkoUWgLFvw/profile-framedphoto-shrink_100_100/0/1597096649541?e=1636084800&amp;v=beta&amp;t=O7FLpFaXORoQWfqHkbn1obAGhL7oUQahjXF4lruD53o" loading="lazy" height="32" alt="" id="ember416" class="ivm-view-attr__img--centered EntityPhoto-circle-1 lazy-image ember-view">
</div>
</div>
</div>
<span>
<!---->Your profile matches this job<!---->
</span>
</div>
</div>
<div class="mt5">
<div class="display-flex">
<div class="jobs-s-apply jobs-s-apply--fadein inline-flex mr2">
<div class="jobs-apply-button--top-card">
<button aria-label="Apply to Senior/Lead DevOps Engineer at Glocomms" id="ember420" class="jobs-apply-button artdeco-button artdeco-button--3 artdeco-button--primary ember-view" data-control-name="jobdetails_topcard_inapply" data-job-id="2752821440"><!---->
<span class="artdeco-button__text">
Apply now
</span></button>
</div>
</div>
<button class="jobs-save-button artdeco-button artdeco-button--3 artdeco-button--secondary" type="button">
<!----> <span aria-hidden="true">
Save
</span>
<span class="a11y-text">
Save Senior/Lead DevOps Engineer at Glocomms
</span>
</button>
<!----> </div>
<!---->
<!----><!----> </div>
<!---->
<!---->
<!---->
<!----> </div>
<!---->
<!---->
<div class="jobs-unified-top-card__sticky-header
">
<div class="jobs-unified-top-card__title-container">
<a data-control-id="W+8C570N6B/HsgwqCw/Oxg==" href="/jobs/view/2752821440/?alternateChannel=search&amp;refId=UFjk24M%2BLKGhBAY9aiOczQ%3D%3D&amp;trackingId=W%2B8C570N6B%2FHsgwqCw%2FOxg%3D%3D" id="ember197" class="ember-view">
<h2 class="t-16 t-black t-bold truncate">
Senior/Lead DevOps Engineer
</h2>
</a>
<div class="t-14 truncate">
<span>Glocomms</span>
<span class="jobs-unified-top-card__bullet">Mountain View, CA</span>
<span class="jobs-unified-top-card__workplace-type">Remote</span>
</div>
</div>
<div class="jobs-unified-top-card__sticky-buttons-container">
<div class="jobs-s-apply jobs-s-apply--fadein inline-flex mr2">
<div class="jobs-apply-button--top-card">
<button aria-label="Apply to Senior/Lead DevOps Engineer at Glocomms" id="ember421" class="jobs-apply-button artdeco-button artdeco-button--2 artdeco-button--primary ember-view" data-control-name="jobdetails_topcard_inapply" data-job-id="2752821440"><!---->
<span class="artdeco-button__text">
Apply now
</span></button>
</div>
</div>
<button class="jobs-save-button mr2 artdeco-button artdeco-button--2 artdeco-button--secondary" aria-expanded="false" type="button">
<!----> <span aria-hidden="true">
Save
</span>
<span class="a11y-text">
Save Senior/Lead DevOps Engineer at Glocomms
</span>
</button>
<div id="ember198" class="artdeco-dropdown jobs-options artdeco-dropdown--placement-bottom artdeco-dropdown--justification-right ember-view inline-flex">
<button aria-expanded="false" id="ember199" class="artdeco-button artdeco-button--2 artdeco-button--tertiary artdeco-button--muted artdeco-button--circle artdeco-dropdown__trigger artdeco-dropdown__trigger--placement-bottom ember-view" type="button" tabindex="0">
<li-icon aria-hidden="true" type="ellipsis-horizontal-icon" class="artdeco-button__icon"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" data-supported-dps="24x24" fill="currentColor" class="mercado-match" width="24" height="24" focusable="false">
<path d="M14 12a2 2 0 11-2-2 2 2 0 012 2zM4 10a2 2 0 102 2 2 2 0 00-2-2zm16 0a2 2 0 102 2 2 2 0 00-2-2z"></path>
</svg></li-icon>
<span class="artdeco-button__text">
Show more options
</span>
<!----></button>
<div tabindex="-1" aria-hidden="true" id="ember200" class="artdeco-dropdown__content artdeco-dropdown--is-dropdown-element artdeco-dropdown__content--has-arrow artdeco-dropdown__content--arrow-right artdeco-dropdown__content--justification-right artdeco-dropdown__content--placement-bottom ember-view"><!----></div>
</div>
</div>
</div>
</div>
<!----> </div>
<div class="jobs-box--fadein jobs-box--full-width jobs-box--with-cta-large jobs-description
jobs-description--has-poster
jobs-description--reformatted
">
<!---->
<article class="jobs-description__container
m4">
<div class="jobs-description__content jobs-description-content
">
<div class="jobs-box__html-content jobs-description-content__text t-14 t-normal" id="job-details" tabindex="-1">
<!---->
<!---->
<div class="jobs-poster
jobs-poster--condensed
jobs-poster--reformatted ">
<h3 class="jobs-poster__title
t-14 t-black--light t-bold
mb1">
Posted by
</h3>
<!---->
<div class="jobs-poster__wrapper display-flex flex-row">
<a data-control-name="jobdetails_profile_poster" href="/in/dylan-finn-4a045b172/" id="ember413" class="ember-view">
<img src="https://media-exp1.licdn.com/dms/image/C4E03AQHd5lMChGqOCA/profile-displayphoto-shrink_100_100/0/1635432789428?e=1641427200&amp;v=beta&amp;t=s6hwUmVjT1h2tDHFaYQPvdHId4D0sSTd3wBz0No3PZM" loading="lazy" alt="Job poster profile" id="ember414" class="EntityPhoto-circle-5-ghost-person jobs-poster__photo lazy-image ember-view">
</a>
<div class="jobs-poster__info-wrapper ml2">
<div class="jobs-poster__info-container flex-column">
<a data-control-name="jobdetails_profile_poster" href="/in/dylan-finn-4a045b172/" id="ember415" class="jobs-poster__name-link ember-view">
<p class="jobs-poster__name t-14 t-black t-bold mb0">
Dylan Finn
<!----> </p>
</a>
<p class="jobs-poster__headline t-12 t-black--light t-normal mb0">
Recruitment Consultant | Software Development
</p>
<!----> </div>
<div class="jobs-poster__action-container jobs-box--fadein">
<div class="jobs-poster__premium-section mb1">
<li-icon type="premium-badge" size="8dp" role="img" aria-label="Premium"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 156 16" data-supported-dps="78x8" class="mercado-match" width="78" height="8" focusable="false">
<path d="M15.12 15.12A3 3 0 0016 13V3a3 3 0 00-3-3H3A3 3 0 00.88.88z" fill="#f8c77e"></path>
<path d="M.88.88A3 3 0 000 3v10a3 3 0 003 3h10a3 3 0 002.12-.88z" fill="#e7a33e"></path>
<path d="M58.41 4.84c0-3.45-2.49-4.64-5.77-4.64h-5.26v15.6h2.78V9.68h2.39l3.37 6.12h3l-3.59-6.52a4.27 4.27 0 003.08-4.44zm-6.06 2.67h-2.19v-5h2.19c2.18 0 3.27.59 3.27 2.37s-1.19 2.63-3.27 2.63zM33.26.2H28v15.6h2.78V10h2.48c3.38 0 6-1.58 6-5s-2.52-4.8-6-4.8zm-.19 7.6h-2.29V2.37h2.29c2.28 0 3.47.6 3.47 2.57S35.45 7.8 33.07 7.8zM156 .2v15.6h-2.58V8.69c0-1.48.3-3.65.39-5h-.09l-1.3 3.64-2.68 7h-1.29l-2.68-7-1.19-3.64h-.1c.1 1.38.29 3.55.29 5v7.11h-2.48V.2h3l2.78 7.7 1 3h.1l1-3 2.75-7.7zm-26 0h2.69v8.69c0 5.13-2.19 7.11-5.86 7.11s-6-2-6-7.11V.2h2.78v8.89c0 3.36 1.29 4.44 3.18 4.44S130 12.45 130 9.09zm-21.3 0h2.78v15.6h-2.78zM96.07.2h3.08v15.6h-2.58V8.69c0-1.48.3-3.65.4-5h-.1l-1.29 3.52-2.69 7.11H91.6l-2.68-7.11-1.19-3.55h-.1c.1 1.38.3 3.55.3 5v7.14h-2.49V.2h3l2.79 7.6 1 3.06h.1l1-3.06zM69.94 13.43h6.85v2.37h-9.63V.2h9.33v2.27h-6.55v4h5.56v2.42h-5.56z" style="isolation:isolate" fill-opacity=".75"></path>
</svg></li-icon>
</div>
<div>
<button class="message-anywhere-button link-without-visited-state t-14 t-bold" aria-label="Send InMail to Dylan Finn" type="button">
Send InMail
</button>
</div>
</div>
</div>
</div>
</div>
<span>
<p>an AI platform company that revolutionizes how companies support their employees - $315M in funding They are seeking a 100% Remote Senior, Staff, and Principal Devops Engineers to join their team!</p>
<!----> </span>
</div>
<div class="jobs-description__details">
<div class="jobs-ppc-quality relative">
<!----></div>
<!---->
</div>
</div>
</article>
<!----></div>
<!----><!---->
<div id="SALARY" class="jobs-box--generic-occludable-area-large ember-view"><!----></div>
<!----> <div id="ember418" class="jobs-box--generic-occludable-area-large ember-view"><!----></div>
<!---->
<!---->
<!---->
<div id="ember419" class="jobs-box--generic-occludable-area-large ember-view"><!----></div>
<!----> </div>
<!----> </div>
<div id="ember202" class="ember-view"><!----></div>
<div id="ember203" class="ember-view"><div id="ember204" class="ember-view"><!----></div></div>
<div>
<div id="ember205" class="ember-view"><!----></div>
</div>
<div id="ember206" class="ember-view"><!----></div>
<!---->
<div class="jobs-message-modal">
<div id="ember207" class="ember-view"><!----></div>
</div>
<!---->
<!---->
<!---->

Binary file not shown.

After

Width:  |  Height:  |  Size: 42 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 68 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.2 KiB

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

Binary file not shown.

After

Width:  |  Height:  |  Size: 166 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 130 KiB

@ -0,0 +1,102 @@
[
{
"job": "ember281",
"info": "Sr DevOps Engineer\nJobot\nNew York, NY\nRemote\nActively recruiting\nPromoted\nApply easily"
},
{
"job": "ember293",
"info": "Principal DevOps Engineer\nSophos\nMassachusetts, United States\nRemote\nMedical benefit\nTop applicant\nPromoted\nApply easily"
},
{
"job": "ember306",
"info": "DevOps Engineer\nMCS Group | Your Specialist Recruitment Consultancy\nTexas, United States\nRemote\n$70K/yr - $90K/yr\n401(k) benefit\nTop applicant\nPromoted\nApply easily"
},
{
"job": "ember319",
"info": "Sr. Full Stack DevOps Engineer\nFocus GTS\nUnited States\nRemote\nYour profile matches this job\nPromoted\nApply easily"
},
{
"job": "ember332",
"info": "DevOps Engineer\nAkerna Corp.\nUnited States\nRemote\nActively recruiting\n7 hours ago\nWithin the past 24 hours\nApply easily"
},
{
"job": "ember344",
"info": "Devops Engineer\nLGND Group\nAustin, TX\nRemote\n$90K/yr - $147K/yr (LinkedIn est.)\n5 hours ago\nWithin the past 24 hours\nApply easily"
},
{
"job": "ember357",
"info": "DevOps Engineer\nApplicantz\nUnited States\nRemote\n$61.74/hr\nActively recruiting\nPromoted\nApply easily"
},
{
"job": "ember370",
"info": "Senior DevOps Engineer - REMOTE option\nData Recognition Corporation\nMinnesota, United States\nRemote\nYour profile matches this job\nPromoted\nApply easily"
},
{
"job": "ember371",
"info": "DevOps Engineer\nTykhe Inc\nUnited States\nRemote\nTop applicant\nPromoted\nApply easily"
},
{
"job": "ember372",
"info": "DevOps Engineer\nAIM Consulting Group\nTexas, United States\nRemote\n$160K/yr - $180K/yr\nMedical, Vision, Dental, 401(k)\nYour profile matches this job\nPromoted\nApply easily"
},
{
"job": "ember373",
"info": "Sr DevOps Engineer\nUnited States\nRemote\n$150K/yr - $190K/yr\nMedical, Vision, Dental, 401(k)\nTop applicant\nPromoted\nApply easily"
},
{
"job": "ember374",
"info": "Cloud Infrastructure Engineer\nTechnologyHub\nUnited States\nRemote\n6 hours ago\nWithin the past 24 hours\nApply easily"
},
{
"job": "ember375",
"info": "DevOps Manager\neyrus\nUnited States\nRemote\n$110K/yr - $130K/yr\nMedical, Vision, Dental, 401(k)\nYour profile matches this job\n7 hours ago\nWithin the past 24 hours\nApply easily"
},
{
"job": "ember376",
"info": "DevOps Engineer\nSkills Alliance\nPhiladelphia, PA\nRemote\nYour profile matches this job\nPromoted\nApply easily"
},
{
"job": "ember377",
"info": "DevOps Engineer\nHCTec\nUnited States\nRemote\n$55/hr - $60/hr\nActively recruiting\nPromoted\nApply easily"
},
{
"job": "ember378",
"info": "Senior Devops Engineer\nVic.ai\nNew York City Metropolitan Area\nRemote\nMedical, Vision, 401(k), +2 benefits\nActively recruiting\nPromoted\nApply easily"
},
{
"job": "ember379",
"info": "Senior DevOps Engineer -(Azure, PowerShell, Terraform) - 100% Remote\nOptomi\nUnited States\nRemote\n1 alum works here\nPromoted\nApply easily"
},
{
"job": "ember380",
"info": "DevOps Engineer\nHireTalent - Diversity Staffing & Recruiting Firm\nUnited States\nRemote\nActively recruiting\nPromoted\nApply easily"
},
{
"job": "ember381",
"info": "Cloud Engineer/Specialist (Uptake Oil & Gas)\nConfidential\nUnited States\nRemote\n2 connections work here\n3 hours ago\nWithin the past 24 hours\nApply easily"
},
{
"job": "ember382",
"info": "DevOps Engineer\nBitTorrent, Inc.\nSan Francisco Bay Area\nRemote\nActively recruiting\n3 hours ago\nWithin the past 24 hours\nApply easily"
},
{
"job": "ember383",
"info": "DevOps Engineer - REMOTE\nKamis\nUnited States\nRemote\n$95K/yr\nTop applicant\nPromoted\nApply easily"
},
{
"job": "ember384",
"info": "Senior DevOps Engineer\nTalener\nUnited States\nRemote\n401(k), +1 benefit\nTop applicant\nPromoted\nApply easily"
},
{
"job": "ember385",
"info": "DevOps Engineer\nNet2Source Inc.\nIrving, TX\nRemote\nTop applicant\nPromoted\nApply easily"
},
{
"job": "ember386",
"info": "Security DevOps Engineer\nCaterpillar Inc.\nChicago, IL\nRemote\n5 alumni work here\nPromoted\nApply easily"
},
{
"job": "ember387",
"info": "Principal DevOps Engineer\nOtterBase\nUnited States\nRemote\n$140K/yr - $176K/yr\nActively recruiting\nPromoted\nApply easily"
}
]

Binary file not shown.

After

Width:  |  Height:  |  Size: 91 KiB

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

Binary file not shown.

After

Width:  |  Height:  |  Size: 168 KiB

File diff suppressed because one or more lines are too long

Binary file not shown.

After

Width:  |  Height:  |  Size: 240 KiB

File diff suppressed because one or more lines are too long

Binary file not shown.

After

Width:  |  Height:  |  Size: 68 KiB

File diff suppressed because one or more lines are too long

Binary file not shown.

After

Width:  |  Height:  |  Size: 158 KiB

@ -0,0 +1,83 @@
<li class="jobs-search-results__list-item occludable-update p0 relative ember-view" data-occludable-entity-urn="urn:li:fs_normalized_jobPosting:2780736909" id="ember314">
<div>
<div class="job-card-container relative job-card-list job-card-container--clickable job-card-list--underline-title-on-hover jobs-search-two-pane__job-card-container--viewport-tracking-24" data-job-id="2780736909">
<div class="job-card-list__entity-lockup artdeco-entity-lockup artdeco-entity-lockup--size-4 ember-view" id="ember558">
<div class="mr1 artdeco-entity-lockup__image artdeco-entity-lockup__image--type-square ember-view" id="ember559" type="square">
<a class="disabled ember-view job-card-container__link" data-control-id="F2UUrVxSUWJ+MlnFpPWfew==" href="/jobs/view/2780736909/?eBP=CwEAAAF88-E5iMvWKedI1HBMmPYJvJE-txCZYmolc52-ESQM_TuTUEcSpoVgJDa8yet0OoCPfASbQPkmf9W5sugfkG16aWgqFfxGDu8kqoi7waccOXBQ4nqnoWwOnI9dSIFBMomXdwimT0phPbj-amWvVyWzmsk9CxECsIswObrFqC3doZECCeUo3c2NubeIc4N6aS4-suie3_J2t-TP-2dIJmMlGhaT8QxpRkCBR0XUmI-nhrIf4SpfqQ2KYB7c_Q8iSbKVEkHAVzp37Eg-LxfhxzEdMMWvqGwaBAItpBynQBU3igjppYVBfB3p_x-Z12B9AzBuNPaYphnTJEF6Zmsz0w4cBf5kzm-SAmUHbYe0MQci-bhBG5SfC6ARjwo&amp;recommendedFlavor=ACTIVELY_HIRING_COMPANY&amp;refId=6F8LpXMenBJ5i7WYaBp2oA%3D%3D&amp;trackingId=F2UUrVxSUWJ%2BMlnFpPWfew%3D%3D&amp;trk=flagship3_search_srp_jobs" id="ember560" tabindex="-1">
<img alt="Prodigy Resources logo" class="ember-view" id="ember561" src="https://media-exp1.licdn.com/dms/image/C4E0BAQG-WH30DdN2OA/company-logo_100_100/0/1617111258677?e=1644451200&amp;v=beta&amp;t=pJ7mKF5kDEN3g8v14BRdCwafzQYaP0R7uONLECUC-3o" title="Prodigy Resources"/>
</a>
</div>
<div class="flex-grow-1 artdeco-entity-lockup__content ember-view" id="ember562">
<div class="full-width artdeco-entity-lockup__title ember-view" id="ember563">
<a class="disabled ember-view job-card-container__link job-card-list__title" data-control-id="F2UUrVxSUWJ+MlnFpPWfew==" href="/jobs/view/2780736909/?eBP=CwEAAAF88-E5iMvWKedI1HBMmPYJvJE-txCZYmolc52-ESQM_TuTUEcSpoVgJDa8yet0OoCPfASbQPkmf9W5sugfkG16aWgqFfxGDu8kqoi7waccOXBQ4nqnoWwOnI9dSIFBMomXdwimT0phPbj-amWvVyWzmsk9CxECsIswObrFqC3doZECCeUo3c2NubeIc4N6aS4-suie3_J2t-TP-2dIJmMlGhaT8QxpRkCBR0XUmI-nhrIf4SpfqQ2KYB7c_Q8iSbKVEkHAVzp37Eg-LxfhxzEdMMWvqGwaBAItpBynQBU3igjppYVBfB3p_x-Z12B9AzBuNPaYphnTJEF6Zmsz0w4cBf5kzm-SAmUHbYe0MQci-bhBG5SfC6ARjwo&amp;recommendedFlavor=ACTIVELY_HIRING_COMPANY&amp;refId=6F8LpXMenBJ5i7WYaBp2oA%3D%3D&amp;trackingId=F2UUrVxSUWJ%2BMlnFpPWfew%3D%3D&amp;trk=flagship3_search_srp_jobs" id="ember564" tabindex="0">
Remote Azure DevOps Engineer
</a>
</div>
<div class="artdeco-entity-lockup__subtitle ember-view" id="ember565">
<a class="job-card-container__link job-card-container__company-name ember-view" data-control-id="F2UUrVxSUWJ+MlnFpPWfew==" data-control-name="job_card_company_link" href="/company/657475/" id="ember566">
Prodigy Resources
</a>
</div>
<div class="artdeco-entity-lockup__caption ember-view" id="ember567">
<ul class="job-card-container__metadata-wrapper">
<li class="job-card-container__metadata-item">
McLean, VA
</li>
<li class="job-card-container__metadata-item job-card-container__metadata-item--workplace-type">
Remote
</li>
</ul>
</div>
<!-- -->
</div>
<div class="job-card-container__action-container flex-shrink-zero display-flex align-items-flex-end flex-column">
<div class="job-card-container__action--visible-on-hover">
<button aria-label="Mark Remote Azure DevOps Engineer with Hide job action" class="job-card-container__action artdeco-button artdeco-button--circle artdeco-button--muted artdeco-button--2 artdeco-button--tertiary ember-view" id="ember568" type="button">
<li-icon aria-hidden="true" class="artdeco-button__icon" type="eyeball-slash-icon">
<svg class="mercado-match" data-supported-dps="24x24" fill="currentColor" focusable="false" height="24" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
<path d="M2 2.71l3.88 3.87A12 12 0 001 12a11.74 11.74 0 0011 7 12.4 12.4 0 005.18-1.12L21.29 22l.71-.71L2.71 2zm6.15 6.14l1.42 1.43A2.93 2.93 0 009 12a3 3 0 003 3 2.93 2.93 0 001.72-.57l1.43 1.42A4.93 4.93 0 0112 17a5 5 0 01-5-5 4.93 4.93 0 011.15-3.15zM23 12a12.1 12.1 0 01-4 4.87l-2.58-2.57A5 5 0 009.7 7.58L7.82 5.7A12.64 12.64 0 0112 5a11.76 11.76 0 0111 7zm-8 0a3.25 3.25 0 01-.11.77l-3.66-3.66A3.25 3.25 0 0112 9a3 3 0 013 3z">
</path>
</svg>
</li-icon>
<span class="artdeco-button__text">
Hide job
</span>
</button>
</div>
<!-- -->
</div>
</div>
<!-- -->
<div class="job-card-list__insight">
<div>
<div class="job-flavors__flavor">
<li-icon aria-label="Actively recruiting" class="job-flavors__icon job-flavors__icon--green7" role="img" size="medium" type="radar-screen-icon">
<svg class="mercado-match" data-supported-dps="24x24" fill="currentColor" focusable="false" height="24" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
<path d="M12 20a8 8 0 010-16 7.91 7.91 0 014.9 1.69l-1.43 1.42a6 6 0 101.42 1.42l3.82-3.82a1 1 0 000-1.41A1 1 0 0020 3a1 1 0 00-.7.29l-1 1A10 10 0 1022 12h-2a8 8 0 01-8 8zm5-8a5 5 0 11-5-5 4.93 4.93 0 012.76.82l-2.24 2.24A2.24 2.24 0 0012 10a2 2 0 102 2 2.24 2.24 0 00-.07-.51l2.24-2.24A5 5 0 0117 12z">
</path>
</svg>
</li-icon>
<span class="job-flavors__label t-12 t-black--light">
Actively recruiting
</span>
</div>
</div>
</div>
<ul class="job-card-list__footer-wrapper job-card-container__footer-wrapper flex-shrink-zero display-flex t-sans t-12 t-black--light t-normal t-roman">
<li class="t-12 t-normal t-black--light job-card-container__footer-item">
Promoted
</li>
<!-- -->
<li class="job-card-container__apply-method job-card-container__footer-item inline-flex align-items-center">
<li-icon aria-hidden="true" class="mr1" size="small" type="linkedin-inbug-color-icon">
<svg class="mercado-match" data-supported-dps="16x16" focusable="false" height="16" viewbox="0 0 16 16" width="16" xmlns="http://www.w3.org/2000/svg">
<path d="M15 2v12a1 1 0 01-1 1H2a1 1 0 01-1-1V2a1 1 0 011-1h12a1 1 0 011 1zM5 6H3v7h2zm.25-2A1.25 1.25 0 104 5.25 1.25 1.25 0 005.25 4zM13 9.29c0-2.2-.73-3.49-2.86-3.49A2.71 2.71 0 007.89 7V6H6v7h2V9.73a1.73 1.73 0 011.52-1.92h.14C10.82 7.8 11 8.94 11 9.73V13h2z" fill="#0a66c2">
</path>
</svg>
</li-icon>
Apply easily
</li>
</ul>
</div>
</div>
</li>

Binary file not shown.

After

Width:  |  Height:  |  Size: 132 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 56 KiB

@ -0,0 +1,311 @@
<div>
<div class="
jobs-details__main-content jobs-details__main-content--single-pane full-width
">
<!---->
<!---->
<div>
<div class="jobs-unified-top-card t-14 ">
<div class="jobs-unified-top-card__buttons-container">
<div class="display-flex flex-column">
<div class="display-flex justify-flex-end">
<div id="ember190" class="artdeco-dropdown artdeco-dropdown--placement-bottom artdeco-dropdown--justification-right ember-view">
<button aria-expanded="false" id="ember191" class="social-share__dropdown-trigger artdeco-button artdeco-button--3 artdeco-button--tertiary artdeco-button--circle artdeco-button--muted artdeco-dropdown__trigger artdeco-dropdown__trigger--placement-bottom ember-view" type="button" tabindex="0">
<li-icon aria-hidden="true" type="share-linkedin-icon" class="artdeco-button__icon" size="medium"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" data-supported-dps="24x24" fill="currentColor" class="mercado-match" width="24" height="24" focusable="false">
<path d="M23 12l-4.61 7H16l4-6H8a3.92 3.92 0 00-4 3.84V17a4 4 0 00.19 1.24L5.12 21H3l-.73-2.22A6.4 6.4 0 012 16.94 6 6 0 018 11h12l-4-6h2.39z"></path>
</svg></li-icon>
<span class="artdeco-button__text">Share</span>
<!----></button>
<div tabindex="-1" aria-hidden="true" id="ember192" class="social-share__content text-align-left artdeco-dropdown__content artdeco-dropdown--is-dropdown-element artdeco-dropdown__content--has-arrow artdeco-dropdown__content--arrow-right artdeco-dropdown__content--justification-right artdeco-dropdown__content--placement-bottom ember-view" role="group"><!----></div>
</div>
<div>
<!---->
</div>
<div id="ember193" class="artdeco-dropdown jobs-options artdeco-dropdown--placement-bottom artdeco-dropdown--justification-right ember-view">
<button aria-expanded="false" id="ember194" class="artdeco-button artdeco-button--3 artdeco-button--tertiary artdeco-button--muted artdeco-button--circle artdeco-dropdown__trigger artdeco-dropdown__trigger--placement-bottom ember-view" type="button" tabindex="0">
<li-icon aria-hidden="true" type="ellipsis-horizontal-icon" class="artdeco-button__icon"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" data-supported-dps="24x24" fill="currentColor" class="mercado-match" width="24" height="24" focusable="false">
<path d="M14 12a2 2 0 11-2-2 2 2 0 012 2zM4 10a2 2 0 102 2 2 2 0 00-2-2zm16 0a2 2 0 102 2 2 2 0 00-2-2z"></path>
</svg></li-icon>
<span class="artdeco-button__text">
Show more options
</span>
<!----></button>
<div tabindex="-1" aria-hidden="true" id="ember195" class="artdeco-dropdown__content artdeco-dropdown--is-dropdown-element artdeco-dropdown__content--has-arrow artdeco-dropdown__content--arrow-right artdeco-dropdown__content--justification-right artdeco-dropdown__content--placement-bottom ember-view"><!----></div>
</div>
</div>
<!----> </div>
</div>
<div class="jobs-unified-top-card__content--two-pane">
<!---->
<a href="/jobs/view/2778661047/?alternateChannel=search&amp;refId=b2dPzIhQJwG6IzntzT4umw%3D%3D&amp;trackingId=jHqNC2fjfgmsvFwIwFuawA%3D%3D" id="ember196" class="ember-view">
<h2 class="t-24 t-bold">DevOps Engineer</h2>
</a>
<div class="mt2">
<span class="jobs-unified-top-card__subtitle-primary-grouping mr2 t-black">
<span>
<a href="/company/jobot/life/" id="ember197" class="ember-view t-black t-normal">
Jobot
</a>
</span>
<span class="jobs-unified-top-card__bullet">
Austin, TX
</span>
<span class="jobs-unified-top-card__workplace-type">Remote</span>
</span>
<span class="jobs-unified-top-card__subtitle-secondary-grouping t-black--light">
<span class="jobs-unified-top-card__posted-date
jobs-unified-top-card__posted-date--new t-bold">
1 hour ago
</span>
<!----> </span>
</div>
<div class="mt5 mb2">
<div class="jobs-unified-top-card__job-insight">
<div class="flex-shrink-zero mr2 t-black--light">
<div class="ivm-image-view-model ">
<div class="ivm-view-attr__img-wrapper ivm-view-attr__img-wrapper--use-img-tag display-flex
">
<li-icon aria-hidden="true" type="briefcase-icon" size="large"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" data-supported-dps="24x24" fill="currentColor" class="mercado-match" width="24" height="24" focusable="false">
<path d="M17 6V5a3 3 0 00-3-3h-4a3 3 0 00-3 3v1H2v4a3 3 0 003 3h14a3 3 0 003-3V6zM9 5a1 1 0 011-1h4a1 1 0 011 1v1H9zm10 9a4 4 0 003-1.38V17a3 3 0 01-3 3H5a3 3 0 01-3-3v-4.38A4 4 0 005 14z"></path>
</svg></li-icon>
</div>
</div>
</div>
<span>
<!---->Full-time · Mid-Senior level<!---->
</span>
</div>
<div class="jobs-unified-top-card__job-insight">
<div class="flex-shrink-zero mr2 t-black--light">
<div class="ivm-image-view-model ">
<div class="ivm-view-attr__img-wrapper ivm-view-attr__img-wrapper--use-img-tag display-flex
">
<li-icon aria-hidden="true" type="company-icon" size="large"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" data-supported-dps="24x24" fill="currentColor" class="mercado-match" width="24" height="24" focusable="false">
<path d="M4 2v20h16V2zm14 18h-4v-2h-4v2H6V4h12zm-7-8H8v-2h3zm0 4H8v-2h3zm5-4h-3v-2h3zm-5-4H8V6h3zm5 0h-3V6h3zm0 8h-3v-2h3z"></path>
</svg></li-icon>
</div>
</div>
</div>
<span>
<!---->201-500 employees · Staffing &amp; Recruiting<!---->
</span>
</div>
<!----> <div class="jobs-unified-top-card__job-insight">
<div class="flex-shrink-zero mr2 t-black--light">
<div class="ivm-image-view-model ">
<div class="ivm-view-attr__img-wrapper ivm-view-attr__img-wrapper--use-img-tag display-flex
">
<li-icon type="radar-screen-icon" class="ivm-view-attr__icon--signal-positive " size="large" role="img" aria-label="Actively recruiting"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" data-supported-dps="24x24" fill="currentColor" class="mercado-match" width="24" height="24" focusable="false">
<path d="M12 20a8 8 0 010-16 7.91 7.91 0 014.9 1.69l-1.43 1.42a6 6 0 101.42 1.42l3.82-3.82a1 1 0 000-1.41A1 1 0 0020 3a1 1 0 00-.7.29l-1 1A10 10 0 1022 12h-2a8 8 0 01-8 8zm5-8a5 5 0 11-5-5 4.93 4.93 0 012.76.82l-2.24 2.24A2.24 2.24 0 0012 10a2 2 0 102 2 2.24 2.24 0 00-.07-.51l2.24-2.24A5 5 0 0117 12z"></path>
</svg></li-icon>
</div>
</div>
</div>
<span>
<!---->Actively recruiting<!---->
</span>
</div>
</div>
<div class="mt5">
<div class="display-flex">
<div class="jobs-s-apply jobs-s-apply--fadein inline-flex mr2">
<div class="jobs-apply-button--top-card">
<button aria-label="Apply to DevOps Engineer at Jobot" id="ember417" class="jobs-apply-button artdeco-button artdeco-button--3 artdeco-button--primary ember-view" data-control-name="jobdetails_topcard_inapply" data-job-id="2778661047"><!---->
<span class="artdeco-button__text">
Apply now
</span></button>
</div>
</div>
<button class="jobs-save-button artdeco-button artdeco-button--3 artdeco-button--secondary" type="button">
<!----> <span aria-hidden="true">
Save
</span>
<span class="a11y-text">
Save DevOps Engineer at Jobot
</span>
</button>
<!----> </div>
<!---->
<!----><!----> </div>
<!---->
<!---->
<!---->
<!----> </div>
<!---->
<!---->
<div class="jobs-unified-top-card__sticky-header
">
<div class="jobs-unified-top-card__title-container">
<a data-control-id="jHqNC2fjfgmsvFwIwFuawA==" href="/jobs/view/2778661047/?alternateChannel=search&amp;refId=b2dPzIhQJwG6IzntzT4umw%3D%3D&amp;trackingId=jHqNC2fjfgmsvFwIwFuawA%3D%3D" id="ember198" class="ember-view">
<h2 class="t-16 t-black t-bold truncate">
DevOps Engineer
</h2>
</a>
<div class="t-14 truncate">
<span>Jobot</span>
<span class="jobs-unified-top-card__bullet">Austin, TX</span>
<span class="jobs-unified-top-card__workplace-type">Remote</span>
</div>
</div>
<div class="jobs-unified-top-card__sticky-buttons-container">
<div class="jobs-s-apply jobs-s-apply--fadein inline-flex mr2">
<div class="jobs-apply-button--top-card">
<button aria-label="Apply to DevOps Engineer at Jobot" id="ember418" class="jobs-apply-button artdeco-button artdeco-button--2 artdeco-button--primary ember-view" data-control-name="jobdetails_topcard_inapply" data-job-id="2778661047"><!---->
<span class="artdeco-button__text">
Apply now
</span></button>
</div>
</div>
<button class="jobs-save-button mr2 artdeco-button artdeco-button--2 artdeco-button--secondary" aria-expanded="false" type="button">
<!----> <span aria-hidden="true">
Save
</span>
<span class="a11y-text">
Save DevOps Engineer at Jobot
</span>
</button>
<div id="ember199" class="artdeco-dropdown jobs-options artdeco-dropdown--placement-bottom artdeco-dropdown--justification-right ember-view inline-flex">
<button aria-expanded="false" id="ember200" class="artdeco-button artdeco-button--2 artdeco-button--tertiary artdeco-button--muted artdeco-button--circle artdeco-dropdown__trigger artdeco-dropdown__trigger--placement-bottom ember-view" type="button" tabindex="0">
<li-icon aria-hidden="true" type="ellipsis-horizontal-icon" class="artdeco-button__icon"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" data-supported-dps="24x24" fill="currentColor" class="mercado-match" width="24" height="24" focusable="false">
<path d="M14 12a2 2 0 11-2-2 2 2 0 012 2zM4 10a2 2 0 102 2 2 2 0 00-2-2zm16 0a2 2 0 102 2 2 2 0 00-2-2z"></path>
</svg></li-icon>
<span class="artdeco-button__text">
Show more options
</span>
<!----></button>
<div tabindex="-1" aria-hidden="true" id="ember201" class="artdeco-dropdown__content artdeco-dropdown--is-dropdown-element artdeco-dropdown__content--has-arrow artdeco-dropdown__content--arrow-right artdeco-dropdown__content--justification-right artdeco-dropdown__content--placement-bottom ember-view"><!----></div>
</div>
</div>
</div>
</div>
<!----> </div>
<div class="jobs-box--fadein jobs-box--full-width jobs-box--with-cta-large jobs-description
jobs-description--reformatted
">
<!---->
<article class="jobs-description__container
m4">
<div class="jobs-description__content jobs-description-content
">
<div class="jobs-box__html-content jobs-description-content__text t-14 t-normal" id="job-details" tabindex="-1">
<!---->
<!---->
<!----> <span>
<strong>We are an industry leader in the health space with over 100M monthly users!<br>
<br>
</strong>This Jobot Job is hosted by Jordan Goulding<br>
<br>
Are you a fit? Easy Apply now by clicking the "Apply" button and sending us your resume.<br>
<br>
Salary $170,000 per year<br>
<br>
<strong>A Bit About Us<br>
<br>
</strong>We are an industry leader in the health space with over 100M monthly users, come join the team!<br>
<br>
<strong><br>
<br>
<strong>Why join us?<br>
<br>
<br>
</strong></strong>Great pay and health benefits<br>
<br>
401K<br>
<br>
Work life balance<br>
<br>
Flexible vacation policy<br>
<br>
<strong>Job Details<br>
<br>
</strong>Must be senior level with the following<br>
<br>
AWS (EC2, S3, IAM, RDS, etc.)<br>
<br>
Cloud Networking<br>
<br>
Docker or Kubernetes<br>
<br>
Go or Python<br>
<br>
Interested in hearing more? Easy Apply now by clicking the "Apply" button.<br>
<br>
<!----> </span>
</div>
<div class="jobs-description__details">
<!---->
</div>
</div>
</article>
<!----></div>
<!----><!---->
<div id="SALARY" class="jobs-box--generic-occludable-area-large ember-view"><!----></div>
<!----> <div id="ember415" class="jobs-box--generic-occludable-area-large ember-view"><!----></div>
<!---->
<!---->
<!---->
<div id="ember416" class="jobs-box--generic-occludable-area-large ember-view"><!----></div>
<!----> </div>
<!----> </div>
<div id="ember203" class="ember-view"><!----></div>
<div id="ember204" class="ember-view"><div id="ember205" class="ember-view"><!----></div></div>
<div>
<div id="ember206" class="ember-view"><!----></div>
</div>
<div id="ember207" class="ember-view"><!----></div>
<!---->
<div class="jobs-message-modal">
<div id="ember208" class="ember-view"><!----></div>
</div>
<!---->
<!---->
<!---->

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 176 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 240 KiB

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 149 KiB

@ -0,0 +1,102 @@
[
{
"job": "ember279",
"info": "Lead DevOps Engineer\nJobot\nPhiladelphia, PA\nRemote\n401(k) benefit\nActively recruiting\nPromoted\nApply easily"
},
{
"job": "ember292",
"info": "Senior/Lead DevOps Engineer\nGlocomms\nMountain View, CA\nRemote\n$160K/yr - $220K/yr\nYour profile matches this job\nPromoted\nApply easily"
},
{
"job": "ember306",
"info": "Sr DevOps Engineer\nUnited States\nRemote\nMedical, Vision, Dental, 401(k)\nTop applicant\nPromoted\nApply easily"
},
{
"job": "ember318",
"info": "Senior DevOps Engineer\nCoSourcing Partners - Robotic Process Automation and IT Services Company\nGreater Chicago Area\nRemote\n$120K/yr - $125K/yr\nTop applicant\nPromoted\nApply easily"
},
{
"job": "ember331",
"info": "DevOps Engineer\nNextiva\nUnited States\nRemote\nActively recruiting\n2 hours ago\nWithin the past 24 hours\nApply easily"
},
{
"job": "ember343",
"info": "DevOps Engineer\nWorkday\nSan Francisco Bay Area\nRemote\n$60/hr - $70/hr\nYour profile matches this job\n9 hours ago\nWithin the past 24 hours\nApply easily"
},
{
"job": "ember357",
"info": "Senior DevOps Engineer\nFundrise\nUnited States\nRemote\nMedical, Vision, Dental, +1 benefit\nYour profile matches this job\nPromoted\nApply easily"
},
{
"job": "ember371",
"info": ""
},
{
"job": "ember372",
"info": "Senior DevOps Engineer\nNextiva\nUnited States\nRemote\nActively recruiting\nPromoted\nApply easily"
},
{
"job": "ember373",
"info": "Jr-Mid DevOps Engineer (AWS)\nHays\nFlorida, United States\nRemote\nTop applicant\nPromoted\nApply easily"
},
{
"job": "ember374",
"info": "DevOps Engineer (with strong Jenkins Experience)\nNet2Source Inc.\nHartford, CT\nRemote\nTop applicant\nPromoted\nApply easily"
},
{
"job": "ember375",
"info": "Cloud Engineer\nBenjamin Douglas\nDallas-Fort Worth Metroplex\nRemote\n$165K/yr\nActively recruiting\n16 hours ago\nWithin the past 24 hours\nApply easily"
},
{
"job": "ember376",
"info": "DevOps Engineer\nTrust In SODA\nUnited States\nRemote\nMedical benefit\nActively recruiting\n18 hours ago\nWithin the past 24 hours\nApply easily"
},
{
"job": "ember377",
"info": "AWS DevOps Engineer\nSemanticBits\nUnited States\nRemote\nMedical, Vision, Dental, 401(k), +1 benefit\nActively recruiting\nPromoted\nApply easily"
},
{
"job": "ember378",
"info": "Lead DevOps Engineer\nStorm2\nUnited States\nRemote\nActively recruiting\nPromoted\nApply easily"
},
{
"job": "ember379",
"info": "DevOps Engineer\nClient Resources, Inc.\nOmaha, NE\nRemote\nMedical, Vision, Dental\nActively recruiting\nPromoted\nApply easily"
},
{
"job": "ember380",
"info": "DevOps Engineer\nParker and Lynch\nKansas City Metropolitan Area\nRemote\n1 alum works here\nPromoted\nApply easily"
},
{
"job": "ember381",
"info": "DevOps Engineer\nKunz, Leigh & Associates\nMichigan, United States\nRemote\nMedical, Vision, Dental, 401(k), +1 benefit\nPromoted\nApply easily"
},
{
"job": "ember382",
"info": "DevOps Engineer\nZenex Partners\nUnited States\nRemote\nActively recruiting\n22 hours ago\nWithin the past 24 hours\nApply easily"
},
{
"job": "ember383",
"info": "DevOps Engineer\nNetsynk Inc\nUnited States\nRemote\n13 hours ago\nWithin the past 24 hours\nApply easily"
},
{
"job": "ember384",
"info": "Sr. DevOps Engineer\nProven Recruiting\nSan Diego, CA\nRemote\nActively recruiting\nPromoted\nApply easily"
},
{
"job": "ember385",
"info": "Senior DevOps Engineer\nLPX Group\nUnited States\nRemote\nActively recruiting\nPromoted\nApply easily"
},
{
"job": "ember386",
"info": "Senior DevOps Engineer (Remote available)\nTrioTech Recruitment\nNew York, United States\nRemote\nActively recruiting\nPromoted\nApply easily"
},
{
"job": "ember387",
"info": "Senior DevOps Engineer\nContino\nMinneapolis, MN\nRemote\nActively recruiting\nPromoted\nApply easily"
},
{
"job": "ember388",
"info": "DevOps Engineer\nMercatalyst\nUnited States\nRemote\nTop applicant\nPromoted\nApply easily"
}
]

Binary file not shown.

After

Width:  |  Height:  |  Size: 91 KiB

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save