diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 6ba7c728e..76ba930cd 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -234,8 +234,8 @@ jobs: # - "Grokking_Demo" # - "Head_Detector_Demo" # - "Interactive_Neuroscope" - # - "LLaMA" - # - "LLaMA2_GPU_Quantized" # Requires quantization libs + too slow for CI timeout + - "LLaMA" + - "LLaMA2_GPU_Quantized" # Requires quantization libs + too slow for CI timeout - "Main_Demo" # - "No_Position_Experiment" - "Othello_GPT" @@ -267,15 +267,22 @@ jobs: - name: Install dependencies run: | uv lock --check - uv sync + uv sync --group quantization - name: Install pandoc uses: awalsh128/cache-apt-pkgs-action@latest with: packages: pandoc version: 1.0 + - name: Authenticate HuggingFace + if: env.HF_TOKEN != '' + run: uv run python -c "import os; from huggingface_hub import login; login(token=os.environ['HF_TOKEN'])" + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} - name: Check Notebook Output Consistency # Note: currently only checks notebooks we have specifically setup for this run: pytest --nbval-sanitize-with demos/doc_sanitize.cfg demos/${{ matrix.notebook }}.ipynb + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} build-docs: diff --git a/demos/Attribution_Patching_Demo.ipynb b/demos/Attribution_Patching_Demo.ipynb index e7adb828b..1559b02b1 100644 --- a/demos/Attribution_Patching_Demo.ipynb +++ b/demos/Attribution_Patching_Demo.ipynb @@ -252,13 +252,6 @@ } }, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Warning: You are sending unauthenticated requests to the HF Hub. Please set a HF_TOKEN to enable higher rate limits and faster downloads.\n" - ] - }, { "data": { "application/vnd.jupyter.widget-view+json": { @@ -3807,7 +3800,7 @@ "Top 0th token. Logit: 20.73 Prob: 95.80% Token: | Paris|\n", "Top 1th token. Logit: 16.49 Prob: 1.39% Token: | E|\n", "Top 2th token. Logit: 14.69 Prob: 0.23% Token: | the|\n", - "Top 3th token. Logit: 14.58 Prob: 0.21% Token: | \u00c9|\n", + "Top 3th token. Logit: 14.58 Prob: 0.21% Token: | É|\n", "Top 4th token. Logit: 14.44 Prob: 0.18% Token: | France|\n", "Top 5th token. Logit: 14.36 Prob: 0.16% Token: | Mont|\n", "Top 6th token. Logit: 13.77 Prob: 0.09% Token: | Le|\n", @@ -4237,11 +4230,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_d230304b88114f2a9b85f5a48f441ce6", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_ee535e2cfa694be1a7857b1867b8b608", "tabbable": null, "tooltip": null, - "value": "\u2007456k/?\u2007[00:00<00:00,\u200712.7MB/s]" + "value": " 456k/? [00:00<00:00, 12.7MB/s]" } }, "020cf001eb7d496295a325cbc0ee8718": { @@ -4278,7 +4271,7 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_af67816d50074ae498ef9b600b4175ed", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_7ac7b11ef3e34bf1a12926c745e08707", "tabbable": null, "tooltip": null, @@ -4301,11 +4294,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_7a32c6104cdb4eee8dadc248c129040c", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_8c0e7d4b46c14e2bb2167752820a9274", "tabbable": null, "tooltip": null, - "value": "\u20071.36M/?\u2007[00:00<00:00,\u200717.1MB/s]" + "value": " 1.36M/? [00:00<00:00, 17.1MB/s]" } }, "043e0e7fe43744589b7bad2527c2eac0": { @@ -4324,11 +4317,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_d3dab66a1c254f07afa02e73e6fd121d", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_a386fa811d524ae08ac67cce5ebf3a15", "tabbable": null, "tooltip": null, - "value": "\u20071.36M/?\u2007[00:00<00:00,\u200720.0MB/s]" + "value": " 1.36M/? [00:00<00:00, 20.0MB/s]" } }, "04d1b3296c75497bb314206d6c7d5341": { @@ -4347,11 +4340,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_8e6cf78296b14bc381f13658ebf99912", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_0ebc4d3f1e94415086e749f4cd41b783", "tabbable": null, "tooltip": null, - "value": "\u20071.04M/?\u2007[00:00<00:00,\u200710.6MB/s]" + "value": " 1.04M/? [00:00<00:00, 10.6MB/s]" } }, "0582e71e725a4851a1905aceaa3c36ae": { @@ -4670,11 +4663,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_2569c461e9144c4c82e856e4533449ba", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_2f332b079a3044fa8ab87f028a7b80b0", "tabbable": null, "tooltip": null, - "value": "\u200748/48\u2007[01:10<00:00,\u2007\u20071.46s/it]" + "value": " 48/48 [01:10<00:00,  1.46s/it]" } }, "0fd7652c5e624ef7b2a36a0b0397f51d": { @@ -4806,11 +4799,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_5cc6434927224f72aadd34dc0e0c2894", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_432ff9b9f3574d69b47e8272dd762923", "tabbable": null, "tooltip": null, - "value": "merges.txt:\u2007" + "value": "merges.txt: " } }, "12f960167a1c417aacdd77ce3a997e35": { @@ -4829,7 +4822,7 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_8dd28e04200641a9a2a4e5ed241db518", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_912fc8506a6f45e38f1573d27eff6457", "tabbable": null, "tooltip": null, @@ -4876,11 +4869,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_ae5d013bec884f4b97d1852b1fb52432", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_87c101f3cc0f4553870ca9de688b9e83", "tabbable": null, "tooltip": null, - "value": "vocab.json:\u2007" + "value": "vocab.json: " } }, "1775bd14b2104a078aa63991cc11ba85": { @@ -4968,11 +4961,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_6dd11b3c888a461aaa85372b044ccd53", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_dc90adc8272e4e3e929844e2ceef149b", "tabbable": null, "tooltip": null, - "value": "\u2007124/124\u2007[00:00<00:00,\u200754.7kB/s]" + "value": " 124/124 [00:00<00:00, 54.7kB/s]" } }, "180d2ba6e10e4e808eba69a8517d5080": { @@ -4991,7 +4984,7 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_de109b45a18f42b5aa83f63ef683379f", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_4e44c9999b664ea9bad1b4e360ee76c7", "tabbable": null, "tooltip": null, @@ -5687,7 +5680,7 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_59190b0bd8e74ee1bab6aea2f931856d", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_c0aa3c04c0a74717b8fc6700213bf579", "tabbable": null, "tooltip": null, @@ -5797,11 +5790,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_700b88d4443848bab341b9d7b00cab54", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_b2e784a339524df682698e606959668e", "tabbable": null, "tooltip": null, - "value": "tokenizer.json:\u2007" + "value": "tokenizer.json: " } }, "31a28b69348b40bfbd14a54380bfb766": { @@ -5820,7 +5813,7 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_eddca0196bdf4e1eb5605e557bfe597b", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_c762da254b434ffea5b7c35e73009302", "tabbable": null, "tooltip": null, @@ -6315,7 +6308,7 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_5ed82326612a4505a34bc16d6b0b5fa8", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_69d19b1cf82443ff8994ffd7b156921c", "tabbable": null, "tooltip": null, @@ -6338,11 +6331,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_c9571f91e4894ac0ab6f9433d6dd7258", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_c08b955fa9494958bee9f565c568fc31", "tabbable": null, "tooltip": null, - "value": "tokenizer.json:\u2007" + "value": "tokenizer.json: " } }, "3c377339930f49a5891caeb0639a8360": { @@ -6562,11 +6555,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_68f6906692b447f1acec3cef5772fe5a", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_67cda37b9ae74d3484442b7d3bb19a26", "tabbable": null, "tooltip": null, - "value": "merges.txt:\u2007" + "value": "merges.txt: " } }, "4000e5115c6d48d687ab9b9695a0d826": { @@ -6601,7 +6594,7 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_d94d7a4f5ab34fc9a4a4ee0a07764461", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_0fd7652c5e624ef7b2a36a0b0397f51d", "tabbable": null, "tooltip": null, @@ -6822,11 +6815,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_212c5660764844db8cdc6e3a16099521", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_7de33c6a558d40a69a85b3db9e203ae8", "tabbable": null, "tooltip": null, - "value": "model.safetensors:\u2007100%" + "value": "model.safetensors: 100%" } }, "46c907a0ac31481f9147bf22e2ac5864": { @@ -6898,11 +6891,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_91db9856db97451196e16d433896af48", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_4e68d17b4b7a45369d6917887ddd7a28", "tabbable": null, "tooltip": null, - "value": "vocab.json:\u2007" + "value": "vocab.json: " } }, "49be2b480d5847a3af7835c317236280": { @@ -7254,11 +7247,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_d7e105c660824d349c4ee17006f04437", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_2848d61b3098431baa1d82fb85f469fe", "tabbable": null, "tooltip": null, - "value": "\u2007548M/548M\u2007[00:18<00:00,\u200760.2MB/s]" + "value": " 548M/548M [00:18<00:00, 60.2MB/s]" } }, "54af6102260d458db54e634c9814aa6f": { @@ -7295,11 +7288,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_6d3de9443ae74b75930d8397e9c7ed9a", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_63c8f7fdcf6346d9b3ca140d0f63f8e4", "tabbable": null, "tooltip": null, - "value": "\u2007144/144\u2007[00:07<00:00,\u200718.78it/s]" + "value": " 144/144 [00:07<00:00, 18.78it/s]" } }, "54daaf323029464b9b67f8a4f53b3002": { @@ -7334,7 +7327,7 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_39f8ccf31f6c49c7a95c59989236a3cf", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_54af6102260d458db54e634c9814aa6f", "tabbable": null, "tooltip": null, @@ -7654,11 +7647,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_7ffa006c49564aa8ad58f08f48b98955", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_357dcb56edba4564b6ec3051f3e977a5", "tabbable": null, "tooltip": null, - "value": "\u2007144/144\u2007[00:08<00:00,\u200715.78it/s]" + "value": " 144/144 [00:08<00:00, 15.78it/s]" } }, "5f730b9ef10e4b8bb97c4fef1bd7cbb2": { @@ -7790,7 +7783,7 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_4b378e2fe92a4bb5a0cc2adee8a9372d", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_8e4b9fcabfbc4a37a54f08a99e28b220", "tabbable": null, "tooltip": null, @@ -7839,11 +7832,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_d7699d95a0ab4240bfa2754ac81a4dea", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_b92108f127ec4341af59d110b5f991c4", "tabbable": null, "tooltip": null, - "value": "Loading\u2007weights:\u2007100%" + "value": "Loading weights: 100%" } }, "61ffe9ae7ab44e94b5729cae80e49437": { @@ -7999,11 +7992,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_38e4ab89d9ed4f16a2a481054a18977f", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_3673315e0fe741048d2ba304360be671", "tabbable": null, "tooltip": null, - "value": "Loading\u2007weights:\u2007100%" + "value": "Loading weights: 100%" } }, "67cda37b9ae74d3484442b7d3bb19a26": { @@ -8204,11 +8197,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_e3abfde7cfd24e938684e179059edd9d", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_293230277eac481e84ab200e7cd5bdc1", "tabbable": null, "tooltip": null, - "value": "\u200726.0/26.0\u2007[00:00<00:00,\u20073.64kB/s]" + "value": " 26.0/26.0 [00:00<00:00, 3.64kB/s]" } }, "6ca82062740348f2bee12629de7f8e2f": { @@ -8351,11 +8344,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_39409ff188e6463bab5bf783828cdbd6", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_7bd58f2c5b444ed9b5f21c6b364c9dce", "tabbable": null, "tooltip": null, - "value": "\u2007456k/?\u2007[00:00<00:00,\u20079.34MB/s]" + "value": " 456k/? [00:00<00:00, 9.34MB/s]" } }, "6dd11b3c888a461aaa85372b044ccd53": { @@ -8551,11 +8544,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_c08dfc48a3574ac1ba2e416960d1d3ea", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_3d29acbe122346388e66e0741971a810", "tabbable": null, "tooltip": null, - "value": "generation_config.json:\u2007100%" + "value": "generation_config.json: 100%" } }, "737d22cc16184d6a92cf045c476c7a01": { @@ -8574,11 +8567,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_8f5f5df1b0314449a113f7bb959fa273", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_7671d9fee96d4e9f921046a1fb092672", "tabbable": null, "tooltip": null, - "value": "\u2007689/689\u2007[00:00<00:00,\u2007131kB/s]" + "value": " 689/689 [00:00<00:00, 131kB/s]" } }, "73cd80c764784b4197af01198ba6b886": { @@ -8597,7 +8590,7 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_f796802e863c48138fdcec92f546a372", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_22e77d7546334e038b64cc2c856a6a13", "tabbable": null, "tooltip": null, @@ -8620,11 +8613,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_cb86f659e5ad4c5296c32b97d99d357c", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_8dacdd0b8b5e4433ae4511433eb7df1d", "tabbable": null, "tooltip": null, - "value": "generation_config.json:\u2007100%" + "value": "generation_config.json: 100%" } }, "741e4c3c5c56426c91955f6b0622f629": { @@ -8643,11 +8636,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_5e617862d88745c792f610f6651662f6", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_60e5aae936bb41faad191cf2155f78d3", "tabbable": null, "tooltip": null, - "value": "tokenizer_config.json:\u2007100%" + "value": "tokenizer_config.json: 100%" } }, "75b76bb0ff2f491a8e5febeb8166cbd2": { @@ -8814,11 +8807,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_07149da010c5489696b03653df25cdd2", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_8ccfc9585b794ba293cbe376311c42ba", "tabbable": null, "tooltip": null, - "value": "\u2007144/144\u2007[00:07<00:00,\u200717.62it/s]" + "value": " 144/144 [00:07<00:00, 17.62it/s]" } }, "775073e43a7a4b2f970c810d2a05c73e": { @@ -9415,11 +9408,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_cfee0a94009c461dbdca5b73961f7fbe", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_f732f9391dd14bcaace6fd5c27a8335a", "tabbable": null, "tooltip": null, - "value": "\u2007144/144\u2007[00:07<00:00,\u200718.66it/s]" + "value": " 144/144 [00:07<00:00, 18.66it/s]" } }, "84867a129d0043b4910ac244e8a984df": { @@ -10090,11 +10083,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_a996f052d0ed4d938f0c71fc72e8c1b6", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_2a6fd373a6524eb3ae033ea78d3cb61e", "tabbable": null, "tooltip": null, - "value": "config.json:\u2007100%" + "value": "config.json: 100%" } }, "9157c241f7064a8596e1ffaeb850e59c": { @@ -10206,7 +10199,7 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_c51496d64234439ebbfec98b59f44803", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_29bf4e8f0e6042b498c6ac50d8fedf68", "tabbable": null, "tooltip": null, @@ -10509,11 +10502,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_d95b7b9cf6914acb9d1152502d2ba41b", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_d7c69daa5fa44a6487f5dc66380ec31a", "tabbable": null, "tooltip": null, - "value": "\u2007180/180\u2007[00:10<00:00,\u200718.55it/s]" + "value": " 180/180 [00:10<00:00, 18.55it/s]" } }, "9b50599c4a084d9eb5b8755040c9bd32": { @@ -10548,11 +10541,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_f5bbf314d840422b9e486386de3f5bb6", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_dc68cf2bf3e94df881377a106754a350", "tabbable": null, "tooltip": null, - "value": "\u20071.04M/?\u2007[00:00<00:00,\u200710.5MB/s]" + "value": " 1.04M/? [00:00<00:00, 10.5MB/s]" } }, "9ba87779605c4969bf48b4071a94c630": { @@ -10672,11 +10665,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_363c2c2e96624875af87c420c7e2cf95", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_b732ea0e03674d4384ac0d2dbf2a5f69", "tabbable": null, "tooltip": null, - "value": "config.json:\u2007100%" + "value": "config.json: 100%" } }, "a0df7ae7fcc1441a8c9cca5a80b539b0": { @@ -10695,11 +10688,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_bcaf8ebca41240bd86dde0c617b68f01", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_dd6bf89931a64c63bbcd2cf526835c2d", "tabbable": null, "tooltip": null, - "value": "\u20072160/2160\u2007[02:00<00:00,\u200718.64it/s]" + "value": " 2160/2160 [02:00<00:00, 18.64it/s]" } }, "a1f007e8fb68491daa3ba444ca49f505": { @@ -10810,11 +10803,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_7655d70259dd4dbc8bd4d288f8850b7c", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_eb49abf5e8ea41e69c1307f78fda4a90", "tabbable": null, "tooltip": null, - "value": "\u2007180/180\u2007[00:09<00:00,\u200718.56it/s]" + "value": " 180/180 [00:09<00:00, 18.56it/s]" } }, "a711143026bc46a5b1b7bc3dccca1850": { @@ -10859,11 +10852,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_2970477ecd6545b2bb698748d4019dac", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_7c2906dcc3d34ae6846332eb5375cc58", "tabbable": null, "tooltip": null, - "value": "\u2007124/124\u2007[00:00<00:00,\u200773.2kB/s]" + "value": " 124/124 [00:00<00:00, 73.2kB/s]" } }, "a92811787eb84dd19d9ec2fb2eab7eee": { @@ -11382,11 +11375,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_b75438a235a243d49404467d54b63373", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_8f8ffe07f8314800ad637194c8c7d10f", "tabbable": null, "tooltip": null, - "value": "\u2007665/665\u2007[00:00<00:00,\u2007122kB/s]" + "value": " 665/665 [00:00<00:00, 122kB/s]" } }, "b59d1c8089e04592a1b87a7c198d1f6c": { @@ -11482,7 +11475,7 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_97f6ad1918334a3ab503d4a5da11c9ef", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_0e453235a18e4f5c9008040b1420f718", "tabbable": null, "tooltip": null, @@ -11610,11 +11603,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_45e840f42f8c46d491678fad7820e835", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_e29dd892780a4208b61593998e588e1f", "tabbable": null, "tooltip": null, - "value": "model.safetensors:\u2007100%" + "value": "model.safetensors: 100%" } }, "bb91ef19e455404aac3d283f868f9687": { @@ -12051,11 +12044,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_2452dd39a79742b29964500360f4a478", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_6ca82062740348f2bee12629de7f8e2f", "tabbable": null, "tooltip": null, - "value": "\u2007580/580\u2007[00:00<00:00,\u20075679.21it/s,\u2007Materializing\u2007param=transformer.wte.weight]" + "value": " 580/580 [00:00<00:00, 5679.21it/s, Materializing param=transformer.wte.weight]" } }, "c455478a557645b29777950e364a5006": { @@ -12259,11 +12252,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_cb791fd8c7ae49079f9162125e98ff79", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_52cd22d9c796437692165d3f3ed48e82", "tabbable": null, "tooltip": null, - "value": "\u20072160/2160\u2007[02:00<00:00,\u200715.47it/s]" + "value": " 2160/2160 [02:00<00:00, 15.47it/s]" } }, "c5bca44eefb940d39fba70d4fff71571": { @@ -12359,11 +12352,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_f44f01a296fb4d198718574f1f802ba2", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_c6b1b39c22564a59bc5b950d7a46708f", "tabbable": null, "tooltip": null, - "value": "\u200726.0/26.0\u2007[00:00<00:00,\u20074.24kB/s]" + "value": " 26.0/26.0 [00:00<00:00, 4.24kB/s]" } }, "c6b1b39c22564a59bc5b950d7a46708f": { @@ -12418,11 +12411,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_7d0ba24e89554742a562ce50135447f0", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_502e8cbf7b7c47b480b8a85405fc24cf", "tabbable": null, "tooltip": null, - "value": "\u20072160/2160\u2007[02:00<00:00,\u200717.95it/s]" + "value": " 2160/2160 [02:00<00:00, 17.95it/s]" } }, "c836782daaa847248009a626db347182": { @@ -12563,11 +12556,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_3d83725b10254139a51cb688a495459d", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_020cf001eb7d496295a325cbc0ee8718", "tabbable": null, "tooltip": null, - "value": "\u20072160/2160\u2007[02:00<00:00,\u200718.75it/s]" + "value": " 2160/2160 [02:00<00:00, 18.75it/s]" } }, "cac816368dee481a9fee6b196a2b16d6": { @@ -12761,7 +12754,7 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_72035846c38c4a439583cc5e974c0a52", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_7aebe1ba464949849da0e182d90d0669", "tabbable": null, "tooltip": null, @@ -12987,11 +12980,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_8ab4ec47b46e401883c185417c452f17", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_fdb4d541f8414fe4bee9265554cd7522", "tabbable": null, "tooltip": null, - "value": "tokenizer_config.json:\u2007100%" + "value": "tokenizer_config.json: 100%" } }, "d3dab66a1c254f07afa02e73e6fd121d": { @@ -13693,11 +13686,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_b41e54c58689400b86fc0dbf18e4bbaa", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_98bce027ddf046c29bbbb03c6e9b1de3", "tabbable": null, "tooltip": null, - "value": "\u2007144/144\u2007[00:08<00:00,\u200718.70it/s]" + "value": " 144/144 [00:08<00:00, 18.70it/s]" } }, "df2f48a5055b4cb7a9db8115c407fd8c": { @@ -13893,11 +13886,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_29376b858cd4489a8fcefc2b096df1e5", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_6f7575088f10441c87ded2f68ac37e9f", "tabbable": null, "tooltip": null, - "value": "\u20072160/2160\u2007[01:59<00:00,\u200718.72it/s]" + "value": " 2160/2160 [01:59<00:00, 18.72it/s]" } }, "e3abfde7cfd24e938684e179059edd9d": { @@ -14220,11 +14213,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_db91637067cb428c94674237eabda8f7", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_dd98180b8cba40aa82fc6221dd4676d0", "tabbable": null, "tooltip": null, - "value": "\u2007180/180\u2007[00:09<00:00,\u200717.32it/s]" + "value": " 180/180 [00:09<00:00, 17.32it/s]" } }, "eb49abf5e8ea41e69c1307f78fda4a90": { @@ -14765,7 +14758,7 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_e2c844c07e434e718186afaf72312371", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_3704e0be72444fd9a07038fbe1b19156", "tabbable": null, "tooltip": null, @@ -14841,11 +14834,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_61ffe9ae7ab44e94b5729cae80e49437", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_a314648aea3d40f89ac931c47f81c9e6", "tabbable": null, "tooltip": null, - "value": "\u2007148/148\u2007[00:00<00:00,\u20075498.19it/s,\u2007Materializing\u2007param=transformer.wte.weight]" + "value": " 148/148 [00:00<00:00, 5498.19it/s, Materializing param=transformer.wte.weight]" } }, "fd150a5176074e959dfa52a35770b5f0": { @@ -14864,11 +14857,11 @@ "description": "", "description_allow_html": false, "layout": "IPY_MODEL_a05625d67e634f2a80c65cdcfcbe8f8c", - "placeholder": "\u200b", + "placeholder": "​", "style": "IPY_MODEL_d1e752f79bbc40ddbae7a02895c9b74e", "tabbable": null, "tooltip": null, - "value": "\u20076.43G/6.43G\u2007[04:52<00:00,\u2007111MB/s]" + "value": " 6.43G/6.43G [04:52<00:00, 111MB/s]" } }, "fdb4d541f8414fe4bee9265554cd7522": { @@ -14950,4 +14943,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/demos/LLaMA.ipynb b/demos/LLaMA.ipynb index 24e0efd48..faf1fed52 100644 --- a/demos/LLaMA.ipynb +++ b/demos/LLaMA.ipynb @@ -27,7 +27,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -35,6 +35,8 @@ "output_type": "stream", "text": [ "Running as a Jupyter notebook - intended for development only!\n", + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n", "Using renderer: colab\n" ] } @@ -81,10 +83,12 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ + "# NBVAL_IGNORE_OUTPUT\n", + "\n", "# Import stuff\n", "import torch\n", "import tqdm.auto as tqdm\n", @@ -101,7 +105,6 @@ ") # Hooking utilities\n", "from transformer_lens.model_bridge import TransformerBridge\n", "\n", - "# NBVAL_IGNORE_OUTPUT\n", "_ = torch.set_grad_enabled(False)\n", "\n", "def imshow(tensor, renderer=None, xaxis=\"\", yaxis=\"\", **kwargs):\n", @@ -154,7 +157,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -179,8 +182,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Loading LLaMA-2\n", - "LLaMA-2 is hosted on HuggingFace, but gated by login.\n", + "## Loading LLaMA-3.2\n", + "LLaMA-3.2 is hosted on HuggingFace, but gated by login.\n", "\n", "Before running the notebook, either set your `HF_TOKEN` environment variable or log in via the CLI:\n", "```bash\n", @@ -191,18 +194,18 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "b60d7b0399ed41ebbfbc6bc4244b05ee", + "model_id": "6613d10ad8974b08bb07b4a1294c43d4", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "Loading weights: 0%| | 0/291 [00:00\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -355,23 +358,23 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Shape of the value tensor: torch.Size([1, 34, 32, 128])\n", - "Original Loss: 2.930\n", - "Ablated Loss: 2.879\n" + "Shape of the value tensor: torch.Size([1, 32, 8, 64])\n", + "Original Loss: 2.615\n", + "Ablated Loss: 2.598\n" ] } ], "source": [ "# NBVAL_IGNORE_OUTPUT\n", "layer_to_ablate = 0\n", - "head_index_to_ablate = 31\n", + "head_index_to_ablate = 4\n", "\n", "# We define a head ablation hook\n", "# The type annotations are NOT necessary, they're just a useful guide to the reader\n", diff --git a/demos/LLaMA2_GPU_Quantized.ipynb b/demos/LLaMA2_GPU_Quantized.ipynb index 56aa07925..3129e6489 100644 --- a/demos/LLaMA2_GPU_Quantized.ipynb +++ b/demos/LLaMA2_GPU_Quantized.ipynb @@ -6,7 +6,7 @@ "id": "EyASOtpeCUsO" }, "source": [ - "# LLaMA and Llama-2 in TransformerLens" + "# LLaMA in TransformerLens (Quantized)" ] }, { @@ -20,7 +20,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 18, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -74,7 +74,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 19, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -105,12 +105,14 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": { "id": "P8zS3MPkCUsR" }, "outputs": [], "source": [ + "# NBVAL_IGNORE_OUTPUT\n", + "\n", "# Import stuff\n", "import torch\n", "import tqdm.auto as tqdm\n", @@ -127,7 +129,6 @@ ") # Hooking utilities\n", "from transformer_lens.model_bridge import TransformerBridge\n", "\n", - "# NBVAL_IGNORE_OUTPUT\n", "_ = torch.set_grad_enabled(False)\n", "\n", "def imshow(tensor, renderer=None, xaxis=\"\", yaxis=\"\", **kwargs):\n", @@ -185,7 +186,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 21, "metadata": { "id": "RdJ0AuW_CUsS" }, @@ -214,8 +215,8 @@ "id": "UmOqXE9wCUsS" }, "source": [ - "## Loading LLaMA-2\n", - "LLaMA-2 is hosted on HuggingFace, but gated by login.\n", + "## Loading LLaMA-3.2\n", + "LLaMA-3.2 is hosted on HuggingFace, but gated by login.\n", "\n", "Before running the notebook, either set your `HF_TOKEN` environment variable or log in via the CLI:\n", "```bash\n", @@ -252,7 +253,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 22, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -388,12 +389,96 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "bb6704de83c640d3ac1b5d4046b23414", + "model_id": "02974f818bc54305b535861303ca208e", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "Loading weights: 0%| | 0/291 [00:00\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -682,7 +747,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 31, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -695,16 +760,16 @@ "name": "stdout", "output_type": "stream", "text": [ - "Shape of the value tensor: torch.Size([1, 34, 32, 128])\n", - "Original Loss: 2.957\n", - "Ablated Loss: 2.918\n" + "Shape of the value tensor: torch.Size([1, 32, 8, 64])\n", + "Original Loss: 2.951\n", + "Ablated Loss: 2.934\n" ] } ], "source": [ "# NBVAL_IGNORE_OUTPUT\n", "layer_to_ablate = 0\n", - "head_index_to_ablate = 31\n", + "head_index_to_ablate = 4\n", "\n", "# We define a head ablation hook\n", "# The type annotations are NOT necessary, they're just a useful guide to the reader\n", diff --git a/demos/stable_lm.ipynb b/demos/stable_lm.ipynb index 419b2cbfc..077952edb 100644 --- a/demos/stable_lm.ipynb +++ b/demos/stable_lm.ipynb @@ -29,7 +29,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 1, "metadata": { "id": "D_OSNfeCSjS2" }, @@ -38,9 +38,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Running as a Jupyter notebook - intended for development only!\n", - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n" + "Running as a Jupyter notebook - intended for development only!\n" ] } ], @@ -65,7 +63,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 2, "metadata": { "id": "PXB6xkimoH2h" }, @@ -80,7 +78,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -205,12 +203,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "07d32635e7f444b8b8591b25fc39f141", + "model_id": "db184d0e866c489b8753520f96b2d771", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "Loading weights: 0%| | 0/196 [00:00