diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..8e7bc14 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,10 @@ +FROM ipython/notebook + +RUN mkdir /docker_assignment +WORKDIR /docker_assignment +RUN mkdir ./dockerfolder ./data + +COPY dockerfolder/*.ipynb ./dockerfolder/ +COPY data/* ./data/ + +WORKDIR /docker_assignment/dockerfolder/ diff --git a/README.md b/README.md deleted file mode 100644 index 567c74e..0000000 --- a/README.md +++ /dev/null @@ -1,9 +0,0 @@ -#BUILD DOCKER IMAGE WITH THIS COMMAND - -docker build -t . - - -#RUN DOCKER IMAGE WITH THIS COMMAND - -docker run -p 8888:8888 --rm sh -c "ipython notebook --ip=*" - diff --git a/data/hospitaldata.csv b/data/hospitaldata.csv old mode 100644 new mode 100755 diff --git a/dockerfile b/dockerfile deleted file mode 100644 index ba8d249..0000000 --- a/dockerfile +++ /dev/null @@ -1,12 +0,0 @@ -FROM ipython/notebook - -RUN pip install pandas -RUN pip install datetime -RUN mkdir /Assignment_Docker -WORKDIR /Assignment_Docker -RUN mkdir ./files ./data - -COPY files/*.ipynb ./files/ -COPY data/* ./data/ - -WORKDIR /Assignment_Docker/files/ diff --git a/dockerfolder/Saif_khi_Python_Assignment2.ipynb b/dockerfolder/Saif_khi_Python_Assignment2.ipynb new file mode 100644 index 0000000..0e87ef0 --- /dev/null +++ b/dockerfolder/Saif_khi_Python_Assignment2.ipynb @@ -0,0 +1,655 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "\n", + "plt.style.use('ggplot')\n", + "\n", + "Hospital_data= pd.read_csv(\"C://Users//muhammad.saif//Documents//hospitaldata.csv\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Date id Time Age Sex Consulting_Doctor \\\n", + "0 Sunday, January 01, 2017 101 11:00 40 F Dr Kinza Alam \n", + "1 Monday, January 02, 2017 150 10:45AM 26 M Nursing Staff \n", + "2 Monday, January 02, 2017 58 12:38PM 30 F Dr Riffat Naheed \n", + "3 Monday, January 02, 2017 75 1:00PM 40 M Dr Riffat Naheed \n", + "4 Monday, January 02, 2017 97 2:45PM 27 M Dr Riffat Naheed \n", + "\n", + " Specialty Procedure Total_Charges Amount_Received. \\\n", + "0 Gynae C Section 30000 30000.0 \n", + "1 NaN Dressing 1500 1500.0 \n", + "2 Psychotherapist Consultation 1000 1000.0 \n", + "3 Psychotherapist Consultation 1500 1500.0 \n", + "4 Psychotherapist Consultation 2000 2000.0 \n", + "\n", + " Amount_Balance Amount.Received.By Amount.in.Hospital Receptionist_Name \\\n", + "0 - Mrs Shamsa NaN Hamza \n", + "1 - Dr Saniya NaN Haris \n", + "2 - Mrs Shamsa 300.0 Fiza \n", + "3 - Mrs Shamsa 450.0 Zaheer \n", + "4 - Mrs Shamsa 600.0 Haris \n", + "\n", + " Next.Apt \n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n" + ] + } + ], + "source": [ + "for i in Hospital_data.columns:\n", + " if \"..\" in i:\n", + " Hospital_data.rename(columns={i: i.replace('..','_')}, inplace=True)\n", + " \n", + "print(Hospital_data.head())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Monday 51\n", + "Wednesday 43\n", + "Tuesday 42\n", + "Thursday 33\n", + "Friday 26\n", + "Saturday 20\n", + "Sunday 7\n", + "Name: Date, dtype: int64\n" + ] + } + ], + "source": [ + "hospitaldate=Hospital_data['Date']\n", + "days=hospitaldate.map(lambda x: str(x).split(',')[0])\n", + "counts=days.value_counts()\n", + "print(counts)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "32.734375" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Hospital_data['Age']=pd.to_numeric(Hospital_data['Age'],errors='coerce' )\n", + "Hospital_data['Age'].mean()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "23" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "no_of_children=Hospital_data[Hospital_data.Age<=12]\n", + "len(no_of_children.index)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Male: Consultation\n", + "Female: Consultation\n" + ] + } + ], + "source": [ + "str=Hospital_data.Sex\n", + "str.replace(\"f\", \"F\")\n", + "Male=Hospital_data[Hospital_data.Sex=='M']\n", + "Female=Hospital_data[Hospital_data.Sex=='F']\n", + "print(\"Male:\" ,Male['Procedure'].value_counts().index.tolist()[0])\n", + "print(\"Female:\" ,Female['Procedure'].value_counts().index.tolist()[0])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Total_Charges\n", + "Consulting_Doctor \n", + "Brig Asif 2800.0\n", + "Brig Farrukh 3750.0\n", + "Col Ulfat Ellahi 1000.0\n", + "Dr Alaf Khan 513050.0\n", + "Dr Ali 26100.0\n", + "Dr Ammad 1400.0\n", + "Dr Ammara 1500.0\n", + "Dr Fakiha 22600.0\n", + "Dr Irfan 11000.0\n", + "Dr Kinza Alam 76700.0\n", + "Dr Mehwish 1000.0\n", + "Dr Mumtaz 1000.0\n", + "Dr Paul 1500.0\n", + "Dr Qurat ul Ain 20900.0\n", + "Dr Riffat Naheed 18800.0\n", + "Dr Saad 52000.0\n", + "Dr Saad Riaz 5700.0\n", + "Dr Saima Shams 1500.0\n", + "Dr Saniya 4000.0\n", + "Dr Shireen 3200.0\n", + "Dr Waqar Azeem 6000.0\n", + "Dr Zubair 1700.0\n", + "Nursing Staff 9150.0\n" + ] + } + ], + "source": [ + "Hospital_data['Total_Charges']=pd.to_numeric(Hospital_data['Total_Charges'],errors='coerce')\n", + "highest_earner = Hospital_data[['Consulting_Doctor', 'Total_Charges']].groupby(['Consulting_Doctor']).sum()\n", + "print(highest_earner)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Total_Charges\n", + "Procedure \n", + "22 Unit Bridge 69500.0\n", + "4 Unit Bridge 11000.0\n", + "8 Unit Bridge+2 R.C.T 30000.0\n", + "BSR 50.0\n", + "C Section 65000.0\n", + "Cancelled NaN\n", + "Consultation 83950.0\n", + "Consultation + X Ray 3000.0\n", + "Consultation + Dressing 5000.0\n", + "Consultation + Nebulize 1150.0\n", + "Consultation + USG 1500.0\n", + "Consultation+Denture 1500.0\n", + "Consultation+ECG 1300.0\n", + "Consultation+ER Retain 2800.0\n", + "Consultation+Retain 3500.0\n", + "Consultation+USG 1500.0\n", + "Consultation+X Ray 4750.0\n", + "Consultation+x Ray 650.0\n", + "Coupety 300.0\n", + "Crown 20000.0\n", + "Denture+Scalling+Filling 5500.0\n", + "Dressing 2900.0\n", + "Er Retain 300.0\n", + "Extraction 14600.0\n", + "Filling 6500.0\n", + "Filling + X Rays 2600.0\n", + "Injection 1800.0\n", + "Laboratory Test NaN\n", + "Medicine 100.0\n", + "Operation 50000.0\n", + "Orthodontics 240000.0\n", + "Pharmacy 900.0\n", + "Polishing 500.0\n", + "Pop 3700.0\n", + "R.C.T 15500.0\n", + "R.C.T+Crown 8000.0\n", + "R.C.T+Scalling+Crown 9500.0\n", + "R.P.D + Crown 11000.0\n", + "RCT 3000.0\n", + "RCT (4 teeth) Bridge (9 teeth) 48000.0\n", + "Scalling 16500.0\n", + "Scalling+Polishing 4000.0\n", + "Stiches 700.0\n", + "USG 5500.0\n", + "USG Abdomen 1000.0\n", + "USG KUB 1000.0\n", + "X Ray 5800.0\n" + ] + } + ], + "source": [ + "proc_type=Hospital_data[['Procedure', 'Total_Charges']].groupby(['Procedure']).sum()\n", + "print(proc_type)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "13.0 29\n", + "18.0 23\n", + "15.0 20\n", + "14.0 20\n", + "12.0 20\n", + "17.0 16\n", + "16.0 15\n", + "19.0 14\n", + "11.0 10\n", + "20.0 8\n", + "10.0 6\n", + "21.0 5\n", + "22.0 4\n", + "9.0 2\n", + "23.0 2\n", + "Name: Time, dtype: int64" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Hospital_data.Time=pd.to_datetime(Hospital_data.Time, errors='coerce')\n", + "Hospital_data.Time.dt.hour.value_counts()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def bracket_time(hour):\n", + " if hour>= 6.0 and hour<12.0:\n", + " return \"Morning\"\n", + " elif hour>= 12 and hour<14:\n", + " return \"Afternoon\"\n", + " elif hour>=14 and hour<19 :\n", + " return \"Evening\"\n", + " elif hour>=19 and hour<=23 or hour >= 0 and hour < 6 :\n", + " return \"Night\"\n", + " \n", + "Hospital_data['bracket_time']=Hospital_data.Time.dt.hour.apply(bracket_time)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "37\n" + ] + } + ], + "source": [ + "patients_repeated=Hospital_data['id'].value_counts()\n", + "print(len(patients_repeated[patients_repeated>1].index))\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Id \t Visits\n", + " 1 12\n", + "46 5\n", + "122 5\n", + "140 4\n", + "94 4\n", + "17 4\n", + "45 3\n", + "101 3\n", + "63 3\n", + "132 3\n", + "114 3\n", + "109 3\n", + "107 3\n", + "145 3\n", + "20 2\n", + "97 2\n", + "59 2\n", + "88 2\n", + "96 2\n", + "112 2\n", + "116 2\n", + "118 2\n", + "25 2\n", + "80 2\n", + "120 2\n", + "40 2\n", + "100 2\n", + "64 2\n", + "4 2\n", + "133 2\n", + " ..\n", + "68 1\n", + "69 1\n", + "70 1\n", + "71 1\n", + "72 1\n", + "73 1\n", + "74 1\n", + "75 1\n", + "76 1\n", + "77 1\n", + "154 1\n", + "79 1\n", + "81 1\n", + "82 1\n", + "83 1\n", + "84 1\n", + "85 1\n", + "86 1\n", + "87 1\n", + "89 1\n", + "90 1\n", + "91 1\n", + "92 1\n", + "93 1\n", + "95 1\n", + "98 1\n", + "99 1\n", + "102 1\n", + "103 1\n", + "78 1\n", + "Name: id, dtype: int64\n" + ] + } + ], + "source": [ + "print(\"Id \\t Visits\\n\",patients_repeated)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id Procedure \n", + "1 Pharmacy 10\n", + "12 22 Unit Bridge 2\n", + "13 Consultation 2\n", + "17 Consultation 2\n", + " RCT (4 teeth) Bridge (9 teeth) 2\n", + "20 Consultation 2\n", + "25 Consultation 2\n", + "45 R.P.D + Crown 2\n", + "46 Dressing 4\n", + "63 Consultation 2\n", + "80 Consultation 2\n", + "94 Injection 3\n", + "97 Consultation 2\n", + "101 C Section 2\n", + "109 R.C.T 2\n", + "112 Operation 2\n", + "114 Consultation 3\n", + "116 X Ray 2\n", + "122 Injection 5\n", + "130 Consultation 2\n", + "140 4 Unit Bridge 2\n", + "145 Crown 3\n", + "151 R.C.T+Scalling+Crown 2\n", + "153 Orthodontics 2\n", + "dtype: int64\n" + ] + } + ], + "source": [ + "x=Hospital_data[['id','Procedure']]\n", + "patient_visiting_again=x.groupby(['id','Procedure']).size()\n", + "print(patient_visiting_again[patient_visiting_again>1])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Female Age Median: 30.0\n", + "Male Age Median: 29.0\n" + ] + } + ], + "source": [ + "female_median=Female.Age.median()\n", + "male_median=Male.Age.median()\n", + "print(\"Female Age Median: \", female_median)\n", + "print(\"Male Age Median: \", male_median)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "83950.0" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "consulting_charges=Hospital_data[Hospital_data.Procedure=='Consultation']\n", + "consulting_charges['Total_Charges'].sum()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Total_Charges
Age0.02809
\n", + "
" + ], + "text/plain": [ + " Total_Charges\n", + "Age 0.02809" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Cor=Hospital_data.corr()\n", + "Cor.loc[['Age'],['Total_Charges']]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAD8CAYAAACYebj1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAF+BJREFUeJzt3X9s1PXhx/HXtRWhlF6vvYK2WhQomUDnltGAm9oJR3SE\nQSWGKcEMiUsEGYPFzU4TIMJcJ3RHKgU25sTxB8OEeRsbf91whUCW7/ErLTBBCCJbLbT0OChSj959\nvn8w36HQwtEfn89HeD4SEj4/73V3Hi8/vz2WZVkCAEBSmtMBAADuQSkAAAxKAQBgUAoAAINSAAAY\nlAIAwKAUAAAGpQAAMCgFAIBBKQAAjAynA3RHQ0NDp+P9fr+am5ttTpMasnUP2bqHbN3n5nw9yVZQ\nUJDSfGwpAAAMSgEAYFAKAACDUgAAGJQCAMCgFAAABqUAADAoBQCAQSkAAIyv5BXNcL/Ej6b2ynpO\nd2OZ9PV/7ZXXBu5EbCkAAAxKAQBgUAoAAINSAAAYlAIAwKAUAAAGpQAAMCgFAIBBKQAADEoBAGBQ\nCgAAg1IAABiUAgDAoBQAAAalAAAwKAUAgEEpAAAM25689vLLL6t///5KS0tTenq6Kisr1draqmAw\nqKamJuXn52vRokXKysqyKxIA4Bq2Po5zyZIlys7ONsOhUEglJSUqLy9XKBRSKBTSrFmz7IwEALiK\no7uPIpGIysrKJEllZWWKRCJOxgGAO56tWwrLli1TWlqaJk2apEAgoFgsJp/PJ0nKyclRLBazMw4A\n4Bq2lcKyZcuUm5urWCym5cuXq6CgoMN0j8cjj8fT6bLhcFjhcFiSVFlZKb/f3+l8GRkZXU5z2p2W\n7XSvru3W2PU532nfaW9xczbJ3fnsyGZbKeTm5kqSvF6vSktLdezYMXm9XkWjUfl8PkWj0Q7HG64W\nCAQUCATMcHNzc6fz+f3+Lqc5jWz2seu9uPlzI1v3uTlfT7Jd+z/iXbHlmEJbW5suXbpk/l5XV6ei\noiKNHTtWtbW1kqTa2lqVlpbaEQcA0AVbthRisZhWrlwpSUokEnr00Uf1jW98Q8OHD1cwGNT27dvN\nKakAAOfYUgpDhgzRihUrrhs/aNAgLV682I4IAIAUcEUzAMCgFAAABqUAADAoBQCAQSkAAAxKAQBg\nUAoAAINSAAAYlAIAwKAUAAAGpQAAMCgFAIBBKQAADEoBAGBQCgAAg1IAABiUAgDAoBQAAAalAAAw\nKAUAgEEpAAAMSgEAYFAKAACDUgAAGJQCAMCgFAAABqUAADAoBQCAkWHniyWTSVVUVCg3N1cVFRVq\nbW1VMBhUU1OT8vPztWjRImVlZdkZCQBwFVu3FLZt26bCwkIzHAqFVFJSourqapWUlCgUCtkZBwBw\nDdtK4ezZs9q3b58mTpxoxkUiEZWVlUmSysrKFIlE7IoDAOiEbaWwYcMGzZo1Sx6Px4yLxWLy+XyS\npJycHMViMbviAAA6Ycsxhb1798rr9WrYsGE6dOhQp/N4PJ4OhXG1cDiscDgsSaqsrJTf7+90voyM\njC6nOe1Oy3a6V9d2a+z6nO+077S3uDmb5O58dmSzpRSOHDmiPXv2aP/+/YrH47p06ZKqq6vl9XoV\njUbl8/kUjUaVnZ3d6fKBQECBQMAMNzc3dzqf3+/vcprTyGYfu96Lmz83snWfm/P1JFtBQUFK89lS\nCjNnztTMmTMlSYcOHdLWrVu1YMECbdy4UbW1tSovL1dtba1KS0vtiAMA6IKj1ymUl5errq5OCxYs\nUH19vcrLy52MAwB3PFuvU5Ck0aNHa/To0ZKkQYMGafHixXZHAAB0gSuaAQAGpQAAMCgFAIBBKQAA\nDEoBAGBQCgAAI+VS2LZtm86fP9+XWQAADkv5OoWDBw9q06ZNGj16tB5//HGVlpbqrrvu6stsAACb\npVwKP//5z3XhwgXt2rVLf//737V+/XqNGzdOjz/+uEaNGtWXGQEANrmlK5oHDRqkp556Sk899ZRO\nnjyp1atX68MPP5Tf79fEiRM1efJk9e/fv6+yAgD62C3f5qK+vl47d+5UJBLR8OHDNX/+fPn9fm3b\ntk1vvvmm3njjjb7ICQCwQcql8Mc//lG7d+9WZmamHn/8cVVVVSk3N9dMLy4u1gsvvNAnIQEA9ki5\nFC5fvqxXXnlFI0aM6HxFGRmqrKzstWAAAPulXApPP/20+vXr12Fca2ur4vG42WIoLCzs3XQAAFul\nfJ3CihUr1NLS0mFcS0uLVq5c2euhAADOSLkUGhoaVFRU1GFcUVGR/vvf//Z6KACAM1IuhezsbDU2\nNnYY19jYqEGDBvV6KACAM1I+pvDEE0+oqqpKzz77rIYMGaLGxkZt3rxZEyZM6Mt8AAAbpVwK5eXl\nysjI0MaNG3X27Fnl5eVpwoQJmjJlSl/mAwDYKOVSSEtL09SpUzV16tS+zAMAcNAtXdHc0NCgTz75\nRG1tbR3GswsJAG4PKZfCn//8Z23ZskVDhw7V3Xff3WEapQAAt4eUS+HLexsNHTq0L/MAAByU8imp\n/fr144plALjNpVwKP/jBD/SHP/xB0WhUyWSywx8AwO0h5d1Ha9askST94x//uG7a5s2bey8RAMAx\nKZfC6tWr+zIHAMAFUi6F/Px8SVIymVQsFpPP5+uzUAAAZ6RcChcvXtTvf/97/etf/zJXNu/Zs0fH\njh3Ts88+e8Nl4/G4lixZovb2diUSCY0fP14zZsxQa2urgsGgmpqalJ+fr0WLFikrK6vHbwoA0D0p\nH2hev369MjMztWbNGmVkXOmSkSNHavfu3Tdd9q677tKSJUu0YsUKvfXWWzpw4ICOHj2qUCikkpIS\nVVdXq6SkRKFQqPvvBADQYymXQn19vV544YUOu42ys7MVi8VuuqzH41H//v0lSYlEQolEQh6PR5FI\nRGVlZZKksrIyRSKRW80PAOhFKe8+yszM1IULFzqUQnNzc8rHFpLJpF599VU1NjbqySefVHFxcYdj\nEzk5OSkVDACg76RcChMnTjS3zrYsS0ePHtWmTZs0adKklJZPS0vTihUrdPHiRa1cuVKffvpph+ke\nj0cej6fTZcPhsMLhsCSpsrJSfr+/8zeTkdHlNKfdadlO9+rabo1dn/Od9p32Fjdnk9ydz45sKZfC\ntGnT1K9fP73zzjtKJBJau3atAoGAJk+efEsvOHDgQI0ePVoHDhyQ1+tVNBqVz+dTNBpVdnZ2p8sE\nAgEFAgEz3Nzc3Ol8fr+/y2lOI5t97Hovbv7cyNZ9bs7Xk2wFBQUpzZdyKXg8Hk2ePPmWS0CSzp8/\nr/T0dA0cOFDxeFx1dXWaNm2axo4dq9raWpWXl6u2tlalpaW3vG4AQO9JuRQOHjzY5bQxY8bccNlo\nNKqamholk0lZlqVHHnlE3/rWtzRy5EgFg0Ft377dnJIKAHBOyqWwdu3aDsPnz59Xe3u78vLybnq1\n89ChQ/XWW29dN37QoEFavHhxqhEAAH0s5VKoqanpMJxMJrVlyxYNGDCg10MBAJyR8nUK1y2Ylqbp\n06frL3/5S2/mAQA4qNulIEl1dXVKS+vRKgAALpLy7qO5c+d2GI7H44rH43rxxRd7PRQAwBkpl8KP\nf/zjDsN333237r33XmVmZvZ6KACAM1IuhVGjRvVlDgCAC6RcCm+//XaXt6G42vz583sUCADgnJSP\nEg8cOFCRSETJZFK5ublKJpOKRCLKzMzUkCFDzB8AwFdXylsKn332mSoqKvTQQw+ZcR999JG2bNmi\nOXPm9Ek4AIC9Ut5SOHr0qIqLizuMGzFihI4ePdrroQAAzki5FB588EFt2rRJ8Xhc0pVTUv/0pz/p\ngQce6KtsAACbpbz7aN68eaqurtYPf/hDZWVlqbW1VcOHD9eCBQv6Mh8AwEYpl8LgwYO1fPlyNTc3\nm2cguPVBFACA7km5FCTpwoULOnz4sKLRqKZNm6aWlhZZlqW8vLy+ygfcssSPptryOtc+XS59/V9t\neV2gL6V8TOHw4cNauHChdu7cqS1btkiSGhsbtX79+j4LBwCwV8qlsGHDBi1cuFCvv/660tPTJV05\n++j48eN9Fg4AYK+US6GpqUklJSUdxmVkZCiRSPR6KACAM1Iuhfvuu08HDhzoMK6+vl5FRUW9HgoA\n4IyUDzQ///zz+vWvf61vfvObisfj+t3vfqe9e/fqZz/7WV/mAwDYKOVSGDlypFasWKGdO3eqf//+\n8vv9evPNNznzCABuIymVQjKZ1BtvvKHXX39d06ZN6+tMAACHpFQKaWlpOnPmjCzL6us86GWpnLN/\n7fn2AO5cKR9ofuaZZ7R+/Xo1NTUpmUx2+AMAuD2kfEzht7/9rSRpx44d103bvHlz7yUCADjmpqVw\n7tw55eTkaPXq1XbkAQA46Ka7j37yk59IkvLz85Wfn6/33nvP/P3LPwCA28NNS+Hag8uHDh3qszAA\nAGfdtBQ8Ho8dOQAALnDTYwqJREIHDx40w8lkssOwJI0ZM+aG62hublZNTY3OnTsnj8ejQCCgyZMn\nq7W1VcFgUE1NTcrPz9eiRYuUlZXVzbcCAOipm5aC1+vV2rVrzXBWVlaHYY/Hc9OD0Onp6Xr++ec1\nbNgwXbp0SRUVFfr617+uf/7znyopKVF5eblCoZBCoZBmzZrVg7cDAOiJm5ZCTU1Nj1/E5/PJ5/NJ\nkgYMGKDCwkK1tLQoEolo6dKlkqSysjItXbqUUgAAB6V88VpvOXPmjE6cOKERI0YoFouZssjJyVEs\nFrM7DgDgKrf0OM6eamtrU1VVlWbPnq3MzMwO0zweT5cHtcPhsMLhsCSpsrKyy2dDZ2Rk3PC50aef\n/nY3k/dcxtb/c+SZ1tzCwj5uemb5zX4LTnJzNsnd+ezIZlsptLe3q6qqSo899pjGjRsn6crximg0\nKp/Pp2g0quzs7E6XDQQCCgQCZri5ubnT+fx+f5fTnNbe3u7abOgdbvp+3fxbcHM2yd35epKtoKAg\npfls2X1kWZbWrVunwsJCTZkyxYwfO3asamtrJUm1tbUqLS21Iw4AoAu2bCkcOXJEO3bsUFFRkXko\nz3PPPafy8nIFg0Ft377dnJIKAHCOLaXwta99Te+//36n0xYvXmxHBOC2du0t0u06lpS+/q82vRLs\nYvvZRwAA96IUAAAGpQAAMCgFAIBBKQAADEoBAGDYepuLO5mTt9gAgFSxpQAAMCgFAIBBKQAADEoB\nAGBQCgAAg1IAABiUAgDAoBQAAAalAAAwKAUAgEEpAAAMSgEAYFAKAACDUgAAGNw6G+gliR9NdToC\n0GNsKQAADEoBAGBQCgAAg1IAABiUAgDAoBQAAIYtp6SuWbNG+/btk9frVVVVlSSptbVVwWBQTU1N\nys/P16JFi5SVlWVHHABAF2zZUvjud7+r1157rcO4UCikkpISVVdXq6SkRKFQyI4oAIAbsKUURo0a\ndd1WQCQSUVlZmSSprKxMkUjEjigAgBtw7JhCLBaTz+eTJOXk5CgWizkVBQDwP664zYXH45HH4+ly\nejgcVjgcliRVVlbK7/d3Ol9GRkaX0yTpdM9iArjGjX5vXbnZ79Rpbs5nRzbHSsHr9Soajcrn8yka\njSo7O7vLeQOBgAKBgBlubm7udD6/39/lNAC9rzu/N7f/Tt2cryfZCgoKUprPsd1HY8eOVW1trSSp\ntrZWpaWlTkUBAPyPLVsKq1at0uHDh3XhwgW99NJLmjFjhsrLyxUMBrV9+3ZzSioAwFkey7Isp0Pc\nqoaGhk7H32zTilsbA7eP9PV/7ZP1svsIAID/oRQAAIYrTkkFgK8KR3dDf7C7z1+CLQUAgEEpAAAM\nSgEAYFAKAACDUgAAGJQCAMCgFAAABqUAADAoBQCAQSkAAAxKAQBgUAoAAINSAAAYlAIAwKAUAAAG\npQAAMCgFAIBBKQAADEoBAGBQCgAAg1IAABgZTgcAgO5I/Ghqn6z3dJ+s9auDLQUAgEEpAAAMSgEA\nYDh+TOHAgQN69913lUwmNXHiRJWXlzsdCQDuWI5uKSSTSb3zzjt67bXXFAwGtWvXLv3nP/9xMhIA\n3NEcLYVjx47pnnvu0ZAhQ5SRkaFvf/vbikQiTkYCgDuao6XQ0tKivLw8M5yXl6eWlhYHEwHAnc3x\nYwqpCIfDCofDkqTKykoVFBR0Oe+Npunve3o7GgDY6ob/xvUCR7cUcnNzdfbsWTN89uxZ5ebmXjdf\nIBBQZWWlKisrb7i+ioqKXs/YW8jWPWTrHrJ1n5vz2ZHN0VIYPny4PvvsM505c0bt7e3avXu3xo4d\n62QkALijObr7KD09XXPmzNEvf/lLJZNJPfHEE7r//vudjAQAd7T0pUuXLnUywL333qvvfe97mjx5\nsh566KEer2/YsGG9kKpvkK17yNY9ZOs+N+fr62wey7KsPn0FAMBXBre5AAAYX4lTUlPhpttlrFmz\nRvv27ZPX61VVVZUkqbW1VcFgUE1NTcrPz9eiRYuUlZVle7bm5mbV1NTo3Llz8ng8CgQCmjx5sivy\nxeNxLVmyRO3t7UokEho/frxmzJjhimxfSiaTqqioUG5urioqKlyT7eWXX1b//v2Vlpam9PR0VVZW\nuibbxYsXtW7dOp06dUoej0dz585VQUGB49kaGhoUDAbN8JkzZzRjxgyVlZU5nk2S/va3v2n79u3y\neDy6//77NW/ePMXj8b7PZt0GEomENX/+fKuxsdG6fPmy9corr1inTp1yLM+hQ4es48ePWz/96U/N\nuI0bN1offPCBZVmW9cEHH1gbN250JFtLS4t1/Phxy7Is6/PPP7cWLFhgnTp1yhX5ksmkdenSJcuy\nLOvy5cvWL37xC+vIkSOuyPalrVu3WqtWrbJ+9atfWZblnu913rx5ViwW6zDOLdnefvttKxwOW5Z1\n5XttbW11TbYvJRIJ68UXX7TOnDnjimxnz5615s2bZ33xxReWZVlWVVWV9eGHH9qS7bbYfeS222WM\nGjXquvaORCIqKyuTJJWVlTmWz+fzmQNVAwYMUGFhoVpaWlyRz+PxqH///pKkRCKhRCIhj8fjimzS\nleto9u3bp4kTJ5pxbsnWGTdk+/zzz/Xvf/9bEyZMkCRlZGRo4MCBrsh2tfr6et1zzz3Kz893TbZk\nMql4PK5EIqF4PC6fz2dLttti91Fnt8v4+OOPHUx0vVgsJp/PJ0nKyclRLBZzONGVzeUTJ05oxIgR\nrsmXTCb16quvqrGxUU8++aSKi4tdk23Dhg2aNWuWLl26ZMa5JZskLVu2TGlpaZo0aZICgYArsp05\nc0bZ2dlas2aNTp48qWHDhmn27NmuyHa1Xbt26Tvf+Y4kd3ynubm5+v73v6+5c+eqX79+evjhh/Xw\nww/bku22KIWvGo/HI4/H42iGtrY2VVVVafbs2crMzOwwzcl8aWlpWrFihS5evKiVK1fq008/dUW2\nvXv3yuv1atiwYTp06FCn8zj5uS1btky5ubmKxWJavnz5dbdCcCpbIpHQiRMnNGfOHBUXF+vdd99V\nKBRyRbYvtbe3a+/evZo5c+Z105zK1traqkgkopqaGmVmZuo3v/mNduzYYUu226IUUr1dhpO8Xq+i\n0ah8Pp+i0aiys7Mdy9Le3q6qqio99thjGjdunOvySdLAgQM1evRoHThwwBXZjhw5oj179mj//v2K\nx+O6dOmSqqurXZFNkvnv3ev1qrS0VMeOHXNFtry8POXl5am4uFiSNH78eIVCIVdk+9L+/fv14IMP\nKicnR5I7fgv19fUaPHiwee1x48bp6NGjtmS7LY4pfBVulzF27FjV1tZKkmpra1VaWupIDsuytG7d\nOhUWFmrKlCmuynf+/HldvHhR0pUzkerq6lRYWOiKbDNnztS6detUU1OjhQsXasyYMVqwYIErsrW1\ntZldWm1tbaqrq1NRUZErsuXk5CgvL08NDQ2Srvxjd99997ki25eu3nUkueO34Pf79fHHH+uLL76Q\nZVmqr6+37bdw21y8tm/fPr333nvmdhnTp093LMuqVat0+PBhXbhwQV6vVzNmzFBpaamCwaCam5sd\nPc3to48+0uLFi1VUVGQ2PZ977jkVFxc7nu/kyZOqqalRMpmUZVl65JFH9Mwzz+jChQuOZ7vaoUOH\ntHXrVlVUVLgi2+nTp7Vy5UpJV3bXPProo5o+fborsknSJ598onXr1qm9vV2DBw/WvHnzZFmWK7K1\ntbVp3rx5Wr16tdmN6pbP7f3339fu3buVnp6uBx54QC+99JLa2tr6PNttUwoAgJ67LXYfAQB6B6UA\nADAoBQCAQSkAAAxKAQBgUAoAAINSAAAYlAIAwPh/eM259kF4n/8AAAAASUVORK5CYII=\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "Hospital_data['Age'].plot.hist()\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "16500.0" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Hospital_data[(Hospital_data.Procedure== 'X Rays') | (Hospital_data.Procedure == 'Scalling')]['Total_Charges'].sum()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/files/saadkhan_khi_python_assignment2.ipynb b/files/saadkhan_khi_python_assignment2.ipynb deleted file mode 100644 index 260769f..0000000 --- a/files/saadkhan_khi_python_assignment2.ipynb +++ /dev/null @@ -1,172 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import datetime\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "\n", - "df=pd.read_csv(\"hospitaldata.csv\")\n", - "\n", - "#Question 01\n", - "print(\"Question 01\")\n", - "df.columns=[names.replace('.', '') for names in list(df)]\n", - "print(df.columns)\n", - "\n", - "#Cleaning Dataset\n", - "df.Date=pd.to_datetime(df.Date)\n", - "\n", - "#Some of the data in age column contains value for months like 6M and 28M, we have to convert those values to years\n", - "df.loc[(df[\"Age\"]!='nan') &(df[\"Age\"].str.contains(\"M\")),\"Age\"]=df.loc[(df[\"Age\"]!='nan') &(df[\"Age\"].str.contains(\"M\")),\"Age\"].apply(lambda x: str( round(float(float(str(x).replace(\"M\",\"\"))/12),1) ))\n", - "\n", - "\n", - "df.Age=pd.to_numeric(df.Age,errors=\"coerce\")\n", - "df.Age=df.Age.fillna(0)\n", - "df.AmountBalance=df[\"AmountBalance\"].str.strip()\n", - "df.AmountBalance=df[\"AmountBalance\"].str.replace(\"-\",'nan')\n", - "df.AmountBalance=df[\"AmountBalance\"].str.replace(\",\", \"\").astype(float)\n", - "\n", - "df.loc[:,\"Sex\"]=df.loc[:,\"Sex\"].replace(\"-\",float('nan'))\n", - "df[\"Sex\"]=df[\"Sex\"].str.upper()\n", - "df.TotalCharges=pd.to_numeric(df.TotalCharges,errors=\"coerce\")\n", - "df.Time = pd.to_datetime(df.Time,errors=\"coerce\",format=\"%I:%M%p\")\n", - "\n", - "df.NextApt = pd.to_datetime(df.NextApt,errors=\"coerce\", format=\"%m/%d/%Y\")\n", - "\n", - "\n", - "#df.AmountReceivedBy=df.AmountReceivedBy.apply(lambda x: str(x).replace('\\d+', ''))\n", - "\n", - "#Question 02: Which day of the weeks has more visits?\n", - "print(\"Question 02\")\n", - "print(df.Date.dt.weekday_name.value_counts().idxmax())\n", - "\n", - "#Question 03: Average age of patients\n", - "print(\"Question 03\")\n", - "print(round(df.Age.mean(),0))\n", - "\n", - "#Question 04: Num of visits by childeren\n", - "print(\"Question 04\")\n", - "print(len(df[(df[\"Age\"]>1) & (df[\"Age\"]<=12)]))\n", - "\n", - "\n", - "#Question 05: Which gender type had what kind of procedure in abundance?\n", - "print(\"Question 05\")\n", - "print(df.groupby(['Sex']).apply(lambda x: x['Procedure'].value_counts().idxmax()))\n", - "\n", - "#Question 06: Which Doctor is earning highest\n", - "print(\"Question 06\")\n", - "print(df.groupby(['ConsultingDoctor'])[\"TotalCharges\"].sum().idxmax())\n", - "\n", - "#Question 07: Which Procedure type earns more money\n", - "print(\"Question 07\")\n", - "print(df.groupby(['Procedure'])[\"TotalCharges\"].sum().idxmax())\n", - "\n", - "#Question 08: Which time of the day has highest frequency of visits by hour?\n", - "print(\"Question 08\")\n", - "print(datetime.datetime.strptime(str(df.Time.dt.hour.value_counts().idxmax()), '%H.%M').strftime(\"%I:%M %p\"))\n", - "\n", - "#Question 09: Create a bracket of time by Morning, Afternoon, Evening, Night (6am – 12pm – Morning, 12 pm- 4 pm, Afternoon, 4 pm- 7pm, Evening, 7pm – 6 am, Night).\n", - "print(\"Question 09\")\n", - "df.loc[(df.Time >= datetime.datetime.strptime('06:00AM', '%I:%M%p') ) & (df.Time <= datetime.datetime.strptime('12:00PM', '%I:%M%p')), 'TimeBracket'] = \"Morning\"\n", - "df.loc[(df.Time >= datetime.datetime.strptime('12:00PM', '%I:%M%p') ) & (df.Time <= datetime.datetime.strptime('04:00PM', '%I:%M%p')), 'TimeBracket'] = \"Afternoon\"\n", - "df.loc[(df.Time >= datetime.datetime.strptime('04:00PM', '%I:%M%p') ) & (df.Time <= datetime.datetime.strptime('07:00PM', '%I:%M%p')), 'TimeBracket'] = \"Evening\"\n", - "df.loc[(df.Time >= datetime.datetime.strptime('07:00PM', '%I:%M%p') ) & (df.Time <= datetime.datetime.strptime('06:00AM', '%I:%M%p')), 'TimeBracket'] = \"Night\"\n", - "\n", - "#Question 10: How many patients are repeated visitors?\n", - "print(\"Question 10\")\n", - "print(len(df.id.value_counts()[df.id.value_counts()>1].index))\n", - "\n", - "#Question 11: Give us the id of repeated visitors.\n", - "print(\"Question 11\")\n", - "print(df.id.value_counts()[df.id.value_counts()>1].index)\n", - "\n", - "#Question 12: Which patients visited again for the same problem?\n", - "print(\"Question 12\")\n", - "print(list(df.groupby(\"id\")[\"Specialty\"].value_counts()[df.groupby(\"id\")[\"Specialty\"].value_counts()>1].index))\n", - "\n", - "#Question 13: What is the median age for Females and Males?\n", - "print(\"Question 13\")\n", - "print(df.groupby(\"Sex\")[\"Age\"].median())\n", - "\n", - "#Question 14: What is the total amount in balance?\n", - "print(\"Question 14\")\n", - "print(df.AmountBalance.sum())\n", - "\n", - "#Question 15: How much money was made by Procedure Type “Consultation”?\n", - "print(\"Question 15\")\n", - "print(df[df[\"Procedure\"]==\"Consultation\"].groupby(\"Procedure\")[\"AmountReceived\"].sum())\n", - "\n", - "#Question 16: Is there a relation between Age and Total Charges paid?\n", - "print(\"Question 16\")\n", - "print(df.loc[:,[\"Age\",\"TotalCharges\"]].corr())\n", - "\n", - "#Question 17: Which Age group had highest number of visits?\n", - "print(\"Question 17\")\n", - "print(df[df[\"Age\"]!=0].Age.value_counts().idxmax())\n", - "\n", - "#Question 18: What is the total cost earned by Procedure Type X Ray and Scalling together?\n", - "print(\"Question 18\")\n", - "print(sum(list(df[(df[\"Procedure\"]==\"X Ray\") | (df[\"Procedure\"]==\"Scalling\")].groupby(\"Procedure\")[\"AmountReceived\"].sum())))\n", - "\n", - "\n", - "#Creating a clean csv dataset file\n", - "df.to_csv(\"clean_hospitaldata.csv\",sep=\",\")\n", - "print(\"File created successfully\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.0" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}