Skip to content

Commit fd61b9e

Browse files
author
bitoollearner
committed
PySpark LeetCode Questions
PySpark LeetCode Questions
1 parent c8f6d93 commit fd61b9e

8 files changed

+607
-67
lines changed

Solved/579. Find Cumulative Salary of an Employee (Hard)-(Solved).ipynb

Lines changed: 151 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44
"cell_type": "markdown",
55
"metadata": {
66
"application/vnd.databricks.v1+cell": {
7-
"cellMetadata": {},
7+
"cellMetadata": {
8+
"byteLimit": 2048000,
9+
"rowLimit": 10000
10+
},
811
"inputWidgets": {},
912
"nuid": "db7ebec7-889d-4791-9b66-269554c0d581",
1013
"showTitle": false,
@@ -21,7 +24,10 @@
2124
"execution_count": 0,
2225
"metadata": {
2326
"application/vnd.databricks.v1+cell": {
24-
"cellMetadata": {},
27+
"cellMetadata": {
28+
"byteLimit": 2048000,
29+
"rowLimit": 10000
30+
},
2531
"inputWidgets": {},
2632
"nuid": "70352d4b-6bb2-42f3-8465-ba88ab41d730",
2733
"showTitle": false,
@@ -40,7 +46,10 @@
4046
"cell_type": "markdown",
4147
"metadata": {
4248
"application/vnd.databricks.v1+cell": {
43-
"cellMetadata": {},
49+
"cellMetadata": {
50+
"byteLimit": 2048000,
51+
"rowLimit": 10000
52+
},
4453
"inputWidgets": {},
4554
"nuid": "154313e3-f403-4508-a1d1-a93ae1e178d9",
4655
"showTitle": false,
@@ -144,7 +153,10 @@
144153
"execution_count": 0,
145154
"metadata": {
146155
"application/vnd.databricks.v1+cell": {
147-
"cellMetadata": {},
156+
"cellMetadata": {
157+
"byteLimit": 2048000,
158+
"rowLimit": 10000
159+
},
148160
"inputWidgets": {},
149161
"nuid": "787ef081-e08b-4a47-9b9a-a1a07b8b2b93",
150162
"showTitle": false,
@@ -155,33 +167,161 @@
155167
"outputs": [],
156168
"source": [
157169
"employee_data_579 = [\n",
158-
" (1, 1, 20), (2, 1, 20), (1, 2, 30), (2, 2, 30), (3, 2, 40),\n",
159-
" (1, 3, 40), (3, 3, 60), (1, 4, 60), (3, 4, 70), (1, 7, 90), (1, 8, 90)\n",
170+
" (1, 1, 20),\n",
171+
" (2, 1, 20),\n",
172+
" (1, 2, 30),\n",
173+
" (2, 2, 30),\n",
174+
" (3, 2, 40),\n",
175+
" (1, 3, 40),\n",
176+
" (3, 3, 60),\n",
177+
" (1, 4, 60),\n",
178+
" (3, 4, 70),\n",
179+
" (1, 7, 90),\n",
180+
" (1, 8, 90)\n",
160181
"]\n",
161-
"\n",
162182
"employee_columns_579 = [\"id\", \"month\", \"salary\"]\n",
163183
"employee_df_579 = spark.createDataFrame(employee_data_579, employee_columns_579)\n",
164184
"employee_df_579.show()"
165185
]
186+
},
187+
{
188+
"cell_type": "code",
189+
"execution_count": 0,
190+
"metadata": {
191+
"application/vnd.databricks.v1+cell": {
192+
"cellMetadata": {
193+
"byteLimit": 2048000,
194+
"rowLimit": 10000
195+
},
196+
"inputWidgets": {},
197+
"nuid": "aa834c97-caf6-4e5e-a320-48d0d053858b",
198+
"showTitle": false,
199+
"tableResultSettingsMap": {},
200+
"title": ""
201+
}
202+
},
203+
"outputs": [],
204+
"source": [
205+
"windowSpec = Window.partitionBy(\"id\").orderBy(col(\"month\"))"
206+
]
207+
},
208+
{
209+
"cell_type": "code",
210+
"execution_count": 0,
211+
"metadata": {
212+
"application/vnd.databricks.v1+cell": {
213+
"cellMetadata": {
214+
"byteLimit": 2048000,
215+
"rowLimit": 10000
216+
},
217+
"inputWidgets": {},
218+
"nuid": "2144b64d-d36f-4ecd-ac19-6632eb5fc2fe",
219+
"showTitle": false,
220+
"tableResultSettingsMap": {},
221+
"title": ""
222+
}
223+
},
224+
"outputs": [],
225+
"source": [
226+
"df_lagged_579 = employee_df_579 \\\n",
227+
" .withColumn(\"salary_lag1\", lag(\"salary\", 1).over(windowSpec)) \\\n",
228+
" .withColumn(\"salary_lag2\", lag(\"salary\", 2).over(windowSpec))"
229+
]
230+
},
231+
{
232+
"cell_type": "code",
233+
"execution_count": 0,
234+
"metadata": {
235+
"application/vnd.databricks.v1+cell": {
236+
"cellMetadata": {
237+
"byteLimit": 2048000,
238+
"rowLimit": 10000
239+
},
240+
"inputWidgets": {},
241+
"nuid": "0cfb664b-e3d0-4773-aad4-0e8a7e17622e",
242+
"showTitle": false,
243+
"tableResultSettingsMap": {},
244+
"title": ""
245+
}
246+
},
247+
"outputs": [],
248+
"source": [
249+
"df_sum_579 = df_lagged_579\\\n",
250+
" .withColumn(\"cum_salary\", col(\"salary\") + coalesce(col(\"salary_lag1\"), lit(0)) + coalesce(col(\"salary_lag2\"), lit(0)))"
251+
]
252+
},
253+
{
254+
"cell_type": "code",
255+
"execution_count": 0,
256+
"metadata": {
257+
"application/vnd.databricks.v1+cell": {
258+
"cellMetadata": {
259+
"byteLimit": 2048000,
260+
"rowLimit": 10000
261+
},
262+
"inputWidgets": {},
263+
"nuid": "174ee40e-ac0b-4315-994c-4dd7347f5e83",
264+
"showTitle": false,
265+
"tableResultSettingsMap": {},
266+
"title": ""
267+
}
268+
},
269+
"outputs": [],
270+
"source": [
271+
"most_recent_df_579 = employee_df_579.groupBy(\"id\").agg(max(\"month\").alias(\"max_month\"))\n"
272+
]
273+
},
274+
{
275+
"cell_type": "code",
276+
"execution_count": 0,
277+
"metadata": {
278+
"application/vnd.databricks.v1+cell": {
279+
"cellMetadata": {
280+
"byteLimit": 2048000,
281+
"rowLimit": 10000
282+
},
283+
"inputWidgets": {},
284+
"nuid": "a5afd4ad-26ae-48a3-989c-6edf27b1355e",
285+
"showTitle": false,
286+
"tableResultSettingsMap": {},
287+
"title": ""
288+
}
289+
},
290+
"outputs": [],
291+
"source": [
292+
"df_sum_579\\\n",
293+
" .join(most_recent_df_579,on='id',how='inner')\\\n",
294+
" .filter(col('month') != col('max_month'))\\\n",
295+
" .select('id','month','cum_salary').orderBy(col('id')).show()"
296+
]
166297
}
167298
],
168299
"metadata": {
169300
"application/vnd.databricks.v1+notebook": {
170-
"computePreferences": null,
301+
"computePreferences": {
302+
"hardware": {
303+
"accelerator": null,
304+
"gpuPoolId": null,
305+
"memory": null
306+
}
307+
},
171308
"dashboards": [],
172309
"environmentMetadata": {
173310
"base_environment": "",
174-
"environment_version": "1"
311+
"environment_version": "2"
175312
},
176313
"inputWidgetPreferences": null,
177314
"language": "python",
178315
"notebookMetadata": {
179316
"pythonIndentUnit": 4
180317
},
181-
"notebookName": "579. Find Cumulative Salary of an Employee (Hard)",
318+
"notebookName": "579. Find Cumulative Salary of an Employee (Hard)-(Solved)",
182319
"widgets": {}
320+
},
321+
"language_info": {
322+
"name": "python"
183323
}
184324
},
185325
"nbformat": 4,
186326
"nbformat_minor": 0
187-
}
327+
}

Solved/580. Count Student Number in Departments (Medium)-(Solved).ipynb

Lines changed: 53 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44
"cell_type": "markdown",
55
"metadata": {
66
"application/vnd.databricks.v1+cell": {
7-
"cellMetadata": {},
7+
"cellMetadata": {
8+
"byteLimit": 2048000,
9+
"rowLimit": 10000
10+
},
811
"inputWidgets": {},
912
"nuid": "db7ebec7-889d-4791-9b66-269554c0d581",
1013
"showTitle": false,
@@ -21,7 +24,10 @@
2124
"execution_count": 0,
2225
"metadata": {
2326
"application/vnd.databricks.v1+cell": {
24-
"cellMetadata": {},
27+
"cellMetadata": {
28+
"byteLimit": 2048000,
29+
"rowLimit": 10000
30+
},
2531
"inputWidgets": {},
2632
"nuid": "70352d4b-6bb2-42f3-8465-ba88ab41d730",
2733
"showTitle": false,
@@ -40,7 +46,10 @@
4046
"cell_type": "markdown",
4147
"metadata": {
4248
"application/vnd.databricks.v1+cell": {
43-
"cellMetadata": {},
49+
"cellMetadata": {
50+
"byteLimit": 2048000,
51+
"rowLimit": 10000
52+
},
4453
"inputWidgets": {},
4554
"nuid": "154313e3-f403-4508-a1d1-a93ae1e178d9",
4655
"showTitle": false,
@@ -111,7 +120,10 @@
111120
"execution_count": 0,
112121
"metadata": {
113122
"application/vnd.databricks.v1+cell": {
114-
"cellMetadata": {},
123+
"cellMetadata": {
124+
"byteLimit": 2048000,
125+
"rowLimit": 10000
126+
},
115127
"inputWidgets": {},
116128
"nuid": "787ef081-e08b-4a47-9b9a-a1a07b8b2b93",
117129
"showTitle": false,
@@ -141,25 +153,58 @@
141153
"department_df_580 = spark.createDataFrame(department_data_580, department_columns_580)\n",
142154
"department_df_580.show()"
143155
]
156+
},
157+
{
158+
"cell_type": "code",
159+
"execution_count": 0,
160+
"metadata": {
161+
"application/vnd.databricks.v1+cell": {
162+
"cellMetadata": {
163+
"byteLimit": 2048000,
164+
"rowLimit": 10000
165+
},
166+
"inputWidgets": {},
167+
"nuid": "58c3bf6a-d64a-4aa8-be4c-df11846586cd",
168+
"showTitle": false,
169+
"tableResultSettingsMap": {},
170+
"title": ""
171+
}
172+
},
173+
"outputs": [],
174+
"source": [
175+
"department_df_580\\\n",
176+
" .join(student_df_580, on='dept_id', how='left')\\\n",
177+
" .groupBy('dept_name').agg(count(col('student_name')).alias('student_count'))\\\n",
178+
" .orderBy(col('student_count').desc(), col('dept_name').asc()).show()"
179+
]
144180
}
145181
],
146182
"metadata": {
147183
"application/vnd.databricks.v1+notebook": {
148-
"computePreferences": null,
184+
"computePreferences": {
185+
"hardware": {
186+
"accelerator": null,
187+
"gpuPoolId": null,
188+
"memory": null
189+
}
190+
},
149191
"dashboards": [],
150192
"environmentMetadata": {
151193
"base_environment": "",
152-
"environment_version": "1"
194+
"environment_version": "2"
153195
},
154196
"inputWidgetPreferences": null,
155197
"language": "python",
156198
"notebookMetadata": {
157199
"pythonIndentUnit": 4
158200
},
159-
"notebookName": "580. Count Student Number in Departments (Medium)",
201+
"notebookName": "580. Count Student Number in Departments (Medium)-(Solved)",
160202
"widgets": {}
203+
},
204+
"language_info": {
205+
"name": "python"
161206
}
162207
},
163208
"nbformat": 4,
164209
"nbformat_minor": 0
165-
}
210+
}

0 commit comments

Comments
 (0)