From cde22afa417ca5b6b95237fc958a357d4e7baf4b Mon Sep 17 00:00:00 2001 From: Stephen Gruppetta Date: Tue, 5 May 2026 21:32:27 +0000 Subject: [PATCH] Sample code for: How to Make a Scatter Plot in Python With plt.scatter() --- visualizing-python-plt-scatter/README.md | 3 ++ .../requirements.txt | 2 + .../step_01_basic_scatter.py | 10 ++++ .../step_02_timing_comparison.py | 24 ++++++++++ .../step_03_size_encoding.py | 11 +++++ .../step_04_size_and_color.py | 22 +++++++++ .../step_05_two_products.py | 32 +++++++++++++ .../step_06_two_products_markers.py | 33 +++++++++++++ .../step_07_two_products_labels_alpha.py | 46 +++++++++++++++++++ .../step_08_colormap.py | 45 ++++++++++++++++++ .../step_09_bus_distribution.py | 18 ++++++++ .../step_10_bus_random_scatter.py | 15 ++++++ .../step_11_bus_combined.py | 25 ++++++++++ .../step_12_bus_in_out_region.py | 39 ++++++++++++++++ 14 files changed, 325 insertions(+) create mode 100644 visualizing-python-plt-scatter/README.md create mode 100644 visualizing-python-plt-scatter/requirements.txt create mode 100644 visualizing-python-plt-scatter/step_01_basic_scatter.py create mode 100644 visualizing-python-plt-scatter/step_02_timing_comparison.py create mode 100644 visualizing-python-plt-scatter/step_03_size_encoding.py create mode 100644 visualizing-python-plt-scatter/step_04_size_and_color.py create mode 100644 visualizing-python-plt-scatter/step_05_two_products.py create mode 100644 visualizing-python-plt-scatter/step_06_two_products_markers.py create mode 100644 visualizing-python-plt-scatter/step_07_two_products_labels_alpha.py create mode 100644 visualizing-python-plt-scatter/step_08_colormap.py create mode 100644 visualizing-python-plt-scatter/step_09_bus_distribution.py create mode 100644 visualizing-python-plt-scatter/step_10_bus_random_scatter.py create mode 100644 visualizing-python-plt-scatter/step_11_bus_combined.py create mode 100644 visualizing-python-plt-scatter/step_12_bus_in_out_region.py diff --git a/visualizing-python-plt-scatter/README.md b/visualizing-python-plt-scatter/README.md new file mode 100644 index 0000000000..e474f1d600 --- /dev/null +++ b/visualizing-python-plt-scatter/README.md @@ -0,0 +1,3 @@ +# How to Make a Scatter Plot in Python With plt.scatter() + +This folder provides the code examples for the Real Python tutorial [How to Make a Scatter Plot in Python With plt.scatter()](https://realpython.com/visualizing-python-plt-scatter/) diff --git a/visualizing-python-plt-scatter/requirements.txt b/visualizing-python-plt-scatter/requirements.txt new file mode 100644 index 0000000000..9799938239 --- /dev/null +++ b/visualizing-python-plt-scatter/requirements.txt @@ -0,0 +1,2 @@ +matplotlib==3.10.1 +numpy==2.2.4 diff --git a/visualizing-python-plt-scatter/step_01_basic_scatter.py b/visualizing-python-plt-scatter/step_01_basic_scatter.py new file mode 100644 index 0000000000..1e777b0430 --- /dev/null +++ b/visualizing-python-plt-scatter/step_01_basic_scatter.py @@ -0,0 +1,10 @@ +# Sections: Getting Started With plt.scatter() / Comparing plt.scatter() and plt.plot() + +import matplotlib.pyplot as plt + +price = [2.50, 1.23, 4.02, 3.25, 5.00, 4.40] +sales_per_day = [34, 62, 49, 22, 13, 19] + +plt.scatter(price, sales_per_day) +# plt.plot(price, sales_per_day, "o") # equivalent using plt.plot() +plt.show() diff --git a/visualizing-python-plt-scatter/step_02_timing_comparison.py b/visualizing-python-plt-scatter/step_02_timing_comparison.py new file mode 100644 index 0000000000..688ac7d6c0 --- /dev/null +++ b/visualizing-python-plt-scatter/step_02_timing_comparison.py @@ -0,0 +1,24 @@ +# Section: Comparing plt.scatter() and plt.plot() + +import timeit +import matplotlib.pyplot as plt # noqa: F401 + +price = [2.50, 1.23, 4.02, 3.25, 5.00, 4.40] +sales_per_day = [34, 62, 49, 22, 13, 19] + +print( + "plt.scatter()", + timeit.timeit( + "plt.scatter(price, sales_per_day)", + number=1000, + globals=globals(), + ), +) +print( + "plt.plot()", + timeit.timeit( + "plt.plot(price, sales_per_day, 'o')", + number=1000, + globals=globals(), + ), +) diff --git a/visualizing-python-plt-scatter/step_03_size_encoding.py b/visualizing-python-plt-scatter/step_03_size_encoding.py new file mode 100644 index 0000000000..1566f3479f --- /dev/null +++ b/visualizing-python-plt-scatter/step_03_size_encoding.py @@ -0,0 +1,11 @@ +# Section: Changing the Size + +import matplotlib.pyplot as plt +import numpy as np + +price = np.asarray([2.50, 1.23, 4.02, 3.25, 5.00, 4.40]) +sales_per_day = np.asarray([34, 62, 49, 22, 13, 19]) +profit_margin = np.asarray([20, 35, 40, 20, 27.5, 15]) + +plt.scatter(x=price, y=sales_per_day, s=profit_margin * 10) +plt.show() diff --git a/visualizing-python-plt-scatter/step_04_size_and_color.py b/visualizing-python-plt-scatter/step_04_size_and_color.py new file mode 100644 index 0000000000..669c06d48b --- /dev/null +++ b/visualizing-python-plt-scatter/step_04_size_and_color.py @@ -0,0 +1,22 @@ +# Section: Changing the Color + +import matplotlib.pyplot as plt +import numpy as np + +price = np.asarray([2.50, 1.23, 4.02, 3.25, 5.00, 4.40]) +sales_per_day = np.asarray([34, 62, 49, 22, 13, 19]) +profit_margin = np.asarray([20, 35, 40, 20, 27.5, 15]) + +low = (0, 1, 0) +medium = (1, 1, 0) +high = (1, 0, 0) + +sugar_content = [low, high, medium, medium, high, low] + +plt.scatter( + x=price, + y=sales_per_day, + s=profit_margin * 10, + c=sugar_content, +) +plt.show() diff --git a/visualizing-python-plt-scatter/step_05_two_products.py b/visualizing-python-plt-scatter/step_05_two_products.py new file mode 100644 index 0000000000..1b746a2372 --- /dev/null +++ b/visualizing-python-plt-scatter/step_05_two_products.py @@ -0,0 +1,32 @@ +# Section: Changing the Shape + +import matplotlib.pyplot as plt +import numpy as np + +low = (0, 1, 0) +medium = (1, 1, 0) +high = (1, 0, 0) + +price_orange = np.asarray([2.50, 1.23, 4.02, 3.25, 5.00, 4.40]) +sales_per_day_orange = np.asarray([34, 62, 49, 22, 13, 19]) +profit_margin_orange = np.asarray([20, 35, 40, 20, 27.5, 15]) +sugar_content_orange = [low, high, medium, medium, high, low] + +price_cereal = np.asarray([1.50, 2.50, 1.15, 1.95]) +sales_per_day_cereal = np.asarray([67, 34, 36, 12]) +profit_margin_cereal = np.asarray([20, 42.5, 33.3, 18]) +sugar_content_cereal = [low, high, medium, low] + +plt.scatter( + x=price_orange, + y=sales_per_day_orange, + s=profit_margin_orange * 10, + c=sugar_content_orange, +) +plt.scatter( + x=price_cereal, + y=sales_per_day_cereal, + s=profit_margin_cereal * 10, + c=sugar_content_cereal, +) +plt.show() diff --git a/visualizing-python-plt-scatter/step_06_two_products_markers.py b/visualizing-python-plt-scatter/step_06_two_products_markers.py new file mode 100644 index 0000000000..ff61d82db5 --- /dev/null +++ b/visualizing-python-plt-scatter/step_06_two_products_markers.py @@ -0,0 +1,33 @@ +# Section: Changing the Shape + +import matplotlib.pyplot as plt +import numpy as np + +low = (0, 1, 0) +medium = (1, 1, 0) +high = (1, 0, 0) + +price_orange = np.asarray([2.50, 1.23, 4.02, 3.25, 5.00, 4.40]) +sales_per_day_orange = np.asarray([34, 62, 49, 22, 13, 19]) +profit_margin_orange = np.asarray([20, 35, 40, 20, 27.5, 15]) +sugar_content_orange = [low, high, medium, medium, high, low] + +price_cereal = np.asarray([1.50, 2.50, 1.15, 1.95]) +sales_per_day_cereal = np.asarray([67, 34, 36, 12]) +profit_margin_cereal = np.asarray([20, 42.5, 33.3, 18]) +sugar_content_cereal = [low, high, medium, low] + +plt.scatter( + x=price_orange, + y=sales_per_day_orange, + s=profit_margin_orange * 10, + c=sugar_content_orange, +) +plt.scatter( + x=price_cereal, + y=sales_per_day_cereal, + s=profit_margin_cereal * 10, + c=sugar_content_cereal, + marker="d", +) +plt.show() diff --git a/visualizing-python-plt-scatter/step_07_two_products_labels_alpha.py b/visualizing-python-plt-scatter/step_07_two_products_labels_alpha.py new file mode 100644 index 0000000000..606da4b3c7 --- /dev/null +++ b/visualizing-python-plt-scatter/step_07_two_products_labels_alpha.py @@ -0,0 +1,46 @@ +# Section: Changing the Transparency + +import matplotlib.pyplot as plt +import numpy as np + +low = (0, 1, 0) +medium = (1, 1, 0) +high = (1, 0, 0) + +price_orange = np.asarray([2.50, 1.23, 4.02, 3.25, 5.00, 4.40]) +sales_per_day_orange = np.asarray([34, 62, 49, 22, 13, 19]) +profit_margin_orange = np.asarray([20, 35, 40, 20, 27.5, 15]) +sugar_content_orange = [low, high, medium, medium, high, low] + +price_cereal = np.asarray([1.50, 2.50, 1.15, 1.95]) +sales_per_day_cereal = np.asarray([67, 34, 36, 12]) +profit_margin_cereal = np.asarray([20, 42.5, 33.3, 18]) +sugar_content_cereal = [low, high, medium, low] + +plt.scatter( + x=price_orange, + y=sales_per_day_orange, + s=profit_margin_orange * 10, + c=sugar_content_orange, + alpha=0.5, +) +plt.scatter( + x=price_cereal, + y=sales_per_day_cereal, + s=profit_margin_cereal * 10, + c=sugar_content_cereal, + marker="d", + alpha=0.5, +) + +plt.title("Sales vs Prices for Orange Drinks and Cereal Bars") +plt.legend(["Orange Drinks", "Cereal Bars"]) +plt.xlabel("Price (Currency Unit)") +plt.ylabel("Average weekly sales") +plt.text( + 3.2, + 55, + "Size of marker = profit margin\nColor of marker = sugar content", +) + +plt.show() diff --git a/visualizing-python-plt-scatter/step_08_colormap.py b/visualizing-python-plt-scatter/step_08_colormap.py new file mode 100644 index 0000000000..95968fedf3 --- /dev/null +++ b/visualizing-python-plt-scatter/step_08_colormap.py @@ -0,0 +1,45 @@ +# Section: Customizing the Colormap and Style of Your Scatter Plot + +import matplotlib.pyplot as plt +import numpy as np + +price_orange = np.asarray([2.50, 1.23, 4.02, 3.25, 5.00, 4.40]) +sales_per_day_orange = np.asarray([34, 62, 49, 22, 13, 19]) +profit_margin_orange = np.asarray([20, 35, 40, 20, 27.5, 15]) +sugar_content_orange = [15, 35, 22, 27, 38, 14] + +price_cereal = np.asarray([1.50, 2.50, 1.15, 1.95]) +sales_per_day_cereal = np.asarray([67, 34, 36, 12]) +profit_margin_cereal = np.asarray([20, 42.5, 33.3, 18]) +sugar_content_cereal = [21, 49, 29, 24] + +plt.scatter( + x=price_orange, + y=sales_per_day_orange, + s=profit_margin_orange * 10, + c=sugar_content_orange, + cmap="jet", + alpha=0.5, +) +plt.scatter( + x=price_cereal, + y=sales_per_day_cereal, + s=profit_margin_cereal * 10, + c=sugar_content_cereal, + cmap="jet", + marker="d", + alpha=0.5, +) + +plt.title("Sales vs Prices for Orange Drinks and Cereal Bars") +plt.legend(["Orange Drinks", "Cereal Bars"]) +plt.xlabel("Price (Currency Unit)") +plt.ylabel("Average weekly sales") +plt.text( + 2.7, + 55, + "Size of marker = profit margin\nColor of marker = sugar content", +) +plt.colorbar() + +plt.show() diff --git a/visualizing-python-plt-scatter/step_09_bus_distribution.py b/visualizing-python-plt-scatter/step_09_bus_distribution.py new file mode 100644 index 0000000000..0663c0ed16 --- /dev/null +++ b/visualizing-python-plt-scatter/step_09_bus_distribution.py @@ -0,0 +1,18 @@ +# Section: Further Scatter Plot Techniques With plt.scatter() + +import matplotlib.pyplot as plt +import numpy as np + +mean = 15, 45 +sd = 5, 7 + +x = np.linspace(0, 59, 60) # Represents each minute within the hour +first_distribution = np.exp(-0.5 * ((x - mean[0]) / sd[0]) ** 2) +second_distribution = 0.9 * np.exp(-0.5 * ((x - mean[1]) / sd[1]) ** 2) +y = first_distribution + second_distribution +y = y / max(y) + +plt.plot(x, y) +plt.ylabel("Relative probability of bus arrivals") +plt.xlabel("Minutes past the hour") +plt.show() diff --git a/visualizing-python-plt-scatter/step_10_bus_random_scatter.py b/visualizing-python-plt-scatter/step_10_bus_random_scatter.py new file mode 100644 index 0000000000..0f32babdd0 --- /dev/null +++ b/visualizing-python-plt-scatter/step_10_bus_random_scatter.py @@ -0,0 +1,15 @@ +# Section: Further Scatter Plot Techniques With plt.scatter() + +import random +import matplotlib.pyplot as plt +import numpy as np + +n_buses = 40 +bus_times = np.asarray([random.randint(0, 59) for _ in range(n_buses)]) +bus_likelihood = np.asarray([random.random() for _ in range(n_buses)]) + +plt.scatter(x=bus_times, y=bus_likelihood) +plt.title("Randomly chosen bus arrival times and relative probabilities") +plt.ylabel("Relative probability of bus arrivals") +plt.xlabel("Minutes past the hour") +plt.show() diff --git a/visualizing-python-plt-scatter/step_11_bus_combined.py b/visualizing-python-plt-scatter/step_11_bus_combined.py new file mode 100644 index 0000000000..15cc22298e --- /dev/null +++ b/visualizing-python-plt-scatter/step_11_bus_combined.py @@ -0,0 +1,25 @@ +# Section: Further Scatter Plot Techniques With plt.scatter() + +import random +import matplotlib.pyplot as plt +import numpy as np + +mean = 15, 45 +sd = 5, 7 + +x = np.linspace(0, 59, 60) +first_distribution = np.exp(-0.5 * ((x - mean[0]) / sd[0]) ** 2) +second_distribution = 0.9 * np.exp(-0.5 * ((x - mean[1]) / sd[1]) ** 2) +y = first_distribution + second_distribution +y = y / max(y) + +n_buses = 40 +bus_times = np.asarray([random.randint(0, 59) for _ in range(n_buses)]) +bus_likelihood = np.asarray([random.random() for _ in range(n_buses)]) + +plt.scatter(x=bus_times, y=bus_likelihood) +plt.plot(x, y) +plt.title("Randomly chosen bus arrival times and relative probabilities") +plt.ylabel("Relative probability of bus arrivals") +plt.xlabel("Minutes past the hour") +plt.show() diff --git a/visualizing-python-plt-scatter/step_12_bus_in_out_region.py b/visualizing-python-plt-scatter/step_12_bus_in_out_region.py new file mode 100644 index 0000000000..1fc8b8732e --- /dev/null +++ b/visualizing-python-plt-scatter/step_12_bus_in_out_region.py @@ -0,0 +1,39 @@ +# Section: Further Scatter Plot Techniques With plt.scatter() + +import random +import matplotlib.pyplot as plt +import numpy as np + +mean = 15, 45 +sd = 5, 7 + +x = np.linspace(0, 59, 60) +first_distribution = np.exp(-0.5 * ((x - mean[0]) / sd[0]) ** 2) +second_distribution = 0.9 * np.exp(-0.5 * ((x - mean[1]) / sd[1]) ** 2) +y = first_distribution + second_distribution +y = y / max(y) + +n_buses = 40 +bus_times = np.asarray([random.randint(0, 59) for _ in range(n_buses)]) +bus_likelihood = np.asarray([random.random() for _ in range(n_buses)]) + +in_region = bus_likelihood < y[bus_times] +out_region = bus_likelihood >= y[bus_times] + +plt.scatter( + x=bus_times[in_region], + y=bus_likelihood[in_region], + color="green", +) +plt.scatter( + x=bus_times[out_region], + y=bus_likelihood[out_region], + color="red", + marker="x", +) + +plt.plot(x, y) +plt.title("Randomly chosen bus arrival times and relative probabilities") +plt.ylabel("Relative probability of bus arrivals") +plt.xlabel("Minutes past the hour") +plt.show()