dataiku
diff --git a/‎HISTORY.txt‎
Lines changed: 12 additions & 2 deletions b/‎HISTORY.txt‎
Lines changed: 12 additions & 2 deletions
diff --git a/‎dataikuapi/dss/admin.py‎
Lines changed: 140 additions & 0 deletions b/‎dataikuapi/dss/admin.py‎
Lines changed: 140 additions & 0 deletions
diff --git a/‎dataikuapi/dss/apideployer.py‎
Lines changed: 5 additions & 14 deletions b/‎dataikuapi/dss/apideployer.py‎
Lines changed: 5 additions & 14 deletions
diff --git a/‎dataikuapi/dss/dataset.py‎
Lines changed: 1 addition & 1 deletion b/‎dataikuapi/dss/dataset.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎dataikuapi/dss/ml.py‎
Lines changed: 29 additions & 21 deletions b/‎dataikuapi/dss/ml.py‎
Lines changed: 29 additions & 21 deletions
@@ -1,12 +1,22 @@
 Changelog
 ==========
 
-4.1.0 (2018-01-10)
+4.3.0 (2018-06-01)
+------------------
+
+* Initial release for DSS 4.3
+
+4.2.1 (2018-04-30)
+-------------------
+
+* Initial release for DSS 4.2
+
+4.1.0 (2017-11-10)
 -------------------
 
 * Initial release for DSS 4.1
 
-4.0.0 (2018-01-10)
+4.0.0 (2017-01-10)
 ------------------
 
 * Initial release for DSS 4.0
 
@@ -567,3 +567,143 @@ def set_definition(self, definition):
         return self.client._perform_empty(
             "PUT", "/admin/globalAPIKeys/%s" % self.key,
             body = definition)
+
+class DSSCluster(object):
+    """
+    A handle to interact with a cluster on the DSS instance
+    """
+    def __init__(self, client, cluster_id):
+        """Do not call that directly, use :meth:`dataikuapi.dss.DSSClient.get_cluster`"""
+        self.client = client
+        self.cluster_id = cluster_id
+    
+    ########################################################
+    # Cluster deletion
+    ########################################################
+    
+    def delete(self):
+        """
+        Deletes the cluster. This does not previously stop it.
+        """
+        self.client._perform_empty(
+            "DELETE", "/admin/clusters/%s" % (self.cluster_id))
+
+        
+    ########################################################
+    # Cluster description
+    ########################################################
+    
+    def get_settings(self):
+        """
+        Get the cluster's settings. This includes opaque data for the cluster if this is 
+        a started managed cluster.
+
+        The returned object can be used to save settings.
+
+        :returns: a :class:`DSSClusterSettings` object to interact with cluster settings
+        :rtype: :class:`DSSClusterSettings`
+        """
+        settings = self.client._perform_json(
+            "GET", "/admin/clusters/%s" % (self.cluster_id))
+        return DSSClusterSettings(self.client, self.cluster_id, settings)
+
+    def set_definition(self, cluster):
+        """
+        Set the cluster's definition. The definition should come from a call to the get_definition()
+        method. 
+
+      
+        :param cluster: a cluster definition
+
+        Returns:
+            the updated cluster definition, as a JSON object
+        """
+        return self.client._perform_json(
+            "PUT", "/admin/clusters/%s" % (self.cluster_id), body=cluster)
+
+    def get_status(self):
+        """
+        Get the cluster's status and usage
+
+        :returns: The cluster status, as a :class:`DSSClusterStatus` object
+        :rtype: :class:`DSSClusterStatus`
+        """
+        status = self.client._perform_json("GET", "/admin/clusters/%s/status" % (self.cluster_id))
+        return DSSClusterStatus(self.client, self.cluster_id, status)
+   
+    ########################################################
+    # Cluster actions
+    ########################################################
+
+    def start(self):
+        """
+        Starts or attaches the cluster.
+
+        This operation is only valid for a managed cluster.
+        """
+        resp = self.client._perform_json(
+            "POST", "/admin/clusters/%s/actions/start" % (self.cluster_id))
+        if resp is None:
+            raise Exception('Env update returned no data')
+        if resp.get('messages', {}).get('error', False):
+            raise Exception('Cluster operation failed : %s' % (json.dumps(resp.get('messages', {}).get('messages', {}))))
+        return resp
+
+    def stop(self):
+        """
+        Stops or detaches the cluster
+
+        This operation is only valid for a managed cluster.
+        """
+        resp = self.client._perform_json(
+            "POST", "/admin/clusters/%s/actions/stop" % (self.cluster_id))
+        if resp is None:
+            raise Exception('Env update returned no data')
+        if resp.get('messages', {}).get('error', False):
+            raise Exception('Cluster operation failed : %s' % (json.dumps(resp.get('messages', {}).get('messages', {}))))
+        return resp
+
+class DSSClusterSettings(object):
+    def __init__(self, client, cluster_id, settings):
+        """Do not call directly, use :meth:`DSSCluster.get_settings`"""
+        self.client = client
+        self.cluster_id = cluster_id
+        self.settings = settings
+
+    def get_raw(self):
+        """
+        Gets all settings as a raw dictionary. This returns a reference to the raw settings, not a copy,
+        so changes made to the returned object will be reflected when saving.
+
+        Fields that can be updated:
+         - permissions, usableByAll, owner
+         - params
+        """
+        return self.settings
+
+    def get_plugin_data(self):
+        """
+        If this is a managed attached cluster, returns the opaque data returned by the cluster's start
+        operation. Else, returns None.
+
+        You should generally not modify this
+        """
+        return self.settings.get("data", None)
+
+    def save(self):
+        """Saves back the settings to the cluster"""
+        return self.client._perform_json(
+            "PUT", "/admin/clusters/%s" % (self.cluster_id), body=self.settings)
+
+class DSSClusterStatus(object):
+    def __init__(self, client, cluster_id, settings):
+        """Do not call directly, use :meth:`DSSCluster.get_Status`"""
+        self.client = client
+        self.cluster_id = cluster_id
+        self.status = status
+
+    def get_raw(self):
+        """
+        Gets the whole status as a raw dictionary.
+        """
+        return self.status
@@ -37,7 +37,7 @@ def get_deployment(self, deployment_id):
     def create_deployment(self, deployment_id, service_id, infra_id, version):
         """
         Creates a deployment and returns the handle to interact with it. The returned deployment
-        is not yet started and you need to call :meth:`~DSSAPIDeployerDeployment.update`
+        is not yet started and you need to call :meth:`~DSSAPIDeployerDeployment.start_update`
 
         :param str deployment_id: Identifier of the deployment to create
         :param str service_id: Identifier of the API Service to target
@@ -77,7 +77,7 @@ def get_infra(self, infra_id):
         :param str infra_id: Identifier of the infra to get
         :rtype: :class:`DSSAPIDeployerDeployment`
         """
-        return DSSAPIDeployerDeployment(self.client, infra_id)
+        return DSSAPIDeployerInfra(self.client, infra_id)
 
     def list_services(self, as_objects = True):
         """
@@ -148,15 +148,6 @@ def get_settings(self):
 
         return DSSAPIDeployerInfraSettings(self.client, self.infra_id, settings)
 
-    def delete(self):
-        """
-        Deletes this infra
-
-        You may only delete a deployment if it is disabled and has been updated after disabling it.
-        """
-        return self.client._perform_empty(
-            "DELETE", "/api-deployer/infras/%s" % (self.infra_id))
-
 class DSSAPIDeployerInfraSettings(object):
     """The settings of an API Deployer Infra. 
 
@@ -194,7 +185,7 @@ def get_raw(self):
     def save(self):
         """Saves back these settings to the infra"""
         self.client._perform_empty(
-                "PUT", "/api-deployer/infra/%s/settings" % (self.infra_id),
+                "PUT", "/api-deployer/infras/%s/settings" % (self.infra_id),
                 body = self.settings)
 
 
@@ -240,7 +231,7 @@ def get_settings(self):
 
     def start_update(self):
         """
-        Updates this deployment to try to match the actual state to the current settings
+        Starts an asynchronous update of this deployment to try to match the actual state to the current settings
 
         :returns: a :class:`dataikuapi.dss.future.DSSFuture` tracking the progress of the update. Call 
                    :meth:`~dataikuapi.dss.future.DSSFuture.wait_for_result` on the returned object
@@ -258,7 +249,7 @@ def delete(self):
         You may only delete a deployment if it is disabled and has been updated after disabling it.
         """
         return self.client._perform_empty(
-            "DELETE", "/api-deployer/deployments/%s/actions/update" % (self.deployment_id))
+            "DELETE", "/api-deployer/deployments/%s" % (self.deployment_id))
 
 
 class DSSAPIDeployerDeploymentSettings(object):
 
@@ -88,7 +88,7 @@ def get_metadata(self):
         
         Returns:
             a dict object. For more information on available metadata, please see
-            https://doc.dataiku.com/dss/api/latest
+            https://doc.dataiku.com/dss/api/4.3/rest/
         """
         return self.client._perform_json(
                 "GET", "/projects/%s/datasets/%s/metadata" % (self.project_key, self.dataset_name))
 
@@ -18,8 +18,8 @@ def set_split_random(self, train_ratio = 0.8, selection = None, dataset_name=Non
         Sets the train/test split to random splitting of an extract of a single dataset
 
         :param float train_ratio: Ratio of rows to use for train set. Must be between 0 and 1
-        :param object selection: A :class:`DSSDatasetSelectionBuilder` to build the settings of the extract of the dataset. May be None (won't be changed)
-        :param str dataset_name: Name of dataset to split. If None, the main dataset used to create the ML Task will be used.
+        :param object selection: A :class:`~dataikuapi.dss.utils.DSSDatasetSelectionBuilder` to build the settings of the extract of the dataset. May be None (won't be changed)
+        :param str dataset_name: Name of dataset to split. If None, the main dataset used to create the visual analysis will be used.
         """
         sp = self.mltask_settings["splitParams"]
         sp["ttPolicy"] = "SPLIT_SINGLE_DATASET"
@@ -40,8 +40,8 @@ def set_split_kfold(self, n_folds = 5, selection = None, dataset_name=None):
         Sets the train/test split to k-fold splitting of an extract of a single dataset
 
         :param int n_folds: number of folds. Must be greater than 0
-        :param object selection: A :class:`DSSDatasetSelectionBuilder` to build the settings of the extract of the dataset. May be None (won't be changed)
-        :param str dataset_name: Name of dataset to split. If None, the main dataset used to create the ML Task will be used.
+        :param object selection: A :class:`~dataikuapi.dss.utils.DSSDatasetSelectionBuilder` to build the settings of the extract of the dataset. May be None (won't be changed)
+        :param str dataset_name: Name of dataset to split. If None, the main dataset used to create the visual analysis will be used.
         """
         sp = self.mltask_settings["splitParams"]
         sp["ttPolicy"] = "SPLIT_SINGLE_DATASET"
@@ -59,14 +59,14 @@ def set_split_kfold(self, n_folds = 5, selection = None, dataset_name=None):
 
     def set_split_explicit(self, train_selection, test_selection, dataset_name=None, test_dataset_name=None, train_filter=None, test_filter=None):
         """
-        Sets the train/test split to explicit extract of one or two dataset
+        Sets the train/test split to explicit extract of one or two dataset(s)
 
-        :param object train_selection: A :class:`DSSDatasetSelectionBuilder` to build the settings of the extract of the train dataset. May be None (won't be changed)
-        :param object test_selection: A :class:`DSSDatasetSelectionBuilder` to build the settings of the extract of the test dataset. May be None (won't be changed)
+        :param object train_selection: A :class:`~dataikuapi.dss.utils.DSSDatasetSelectionBuilder` to build the settings of the extract of the train dataset. May be None (won't be changed)
+        :param object test_selection: A :class:`~dataikuapi.dss.utils.DSSDatasetSelectionBuilder` to build the settings of the extract of the test dataset. May be None (won't be changed)
         :param str dataset_name: Name of dataset to use for the extracts. If None, the main dataset used to create the ML Task will be used.
         :param str test_dataset_name: Name of a second dataset to use for the test data extract. If None, both extracts are done from dataset_name
-        :param object train_filter: A :class:`DSSFilterBuilder` to build the settings of the filter of the train dataset. May be None (won't be changed)
-        :param object test_filter: A :class:`DSSFilterBuilder` to build the settings of the filter of the test dataset. May be None (won't be changed)
+        :param object train_filter: A :class:`~dataikuapi.dss.utils.DSSFilterBuilder` to build the settings of the filter of the train dataset. May be None (won't be changed)
+        :param object test_filter: A :class:`~dataikuapi.dss.utils.DSSFilterBuilder` to build the settings of the filter of the test dataset. May be None (won't be changed)
         """
         sp = self.mltask_settings["splitParams"]
         if dataset_name is None:
@@ -206,8 +206,15 @@ def get_algorithm_settings(self, algorithm_name):
         Gets the training settings for a particular algorithm. This returns a reference to the
         algorithm's settings, not a copy, so changes made to the returned object will be reflected when saving.
 
-        All algorithms have at least an "enabled" setting. Other settings are algorithm-dependent. You can print
-        the returned object to learn more about the settings of each particular algorithm
+        This method returns a dictionary of the settings for this algorithm.
+        All algorithm dicts have at least an "enabled" key in the dictionary.
+        The 'enabled' key indicates whether this algorithm will be trained
+
+        Other settings are algorithm-dependent and are the various hyperparameters of the 
+        algorithm. The precise keys for each algorithm are not all documented. You can print
+        the returned dictionary to learn more about the settings of each particular algorithm
+
+        Please refer to the documentation for details on available algorithms.
 
         :param str algorithm_name: Name (in capitals) of the algorithm.
         :return: A dict of the settings for an algorithm
@@ -220,17 +227,19 @@ def get_algorithm_settings(self, algorithm_name):
 
     def set_algorithm_enabled(self, algorithm_name, enabled):
         """
-        Enables or disables an algorithm.
+        Enables or disables an algorithm based on its name.
+
+        Please refer to the documentation for details on available algorithms.
 
         :param str algorithm_name: Name (in capitals) of the algorithm.
         """
         self.get_algorithm_settings(algorithm_name)["enabled"] = enabled
 
     def set_metric(self, metric=None, custom_metric=None, custom_metric_greater_is_better=True, custom_metric_use_probas=False):
         """
-        Set a metric on a prediction ML task
+        Sets the score metric to optimize for a prediction ML Task
 
-        :param str metric: metric to use. Leave empty for custom_metric
+        :param str metric: metric to use. Leave empty to use a custom metric. You need to set the ``custom_metric`` value in that case
         :param str custom_metric: code of the custom metric
         :param bool custom_metric_greater_is_better: whether the custom metric is a score or a loss
         :param bool custom_metric_use_probas: whether to use the classes' probas or the predicted value (for classification)
@@ -778,7 +787,7 @@ def start_ensembling(self, model_ids=[], method=None):
         :param list model_ids: A list of model identifiers
         :param str method: the ensembling method (AVERAGE, PROBA_AVERAGE, MEDIAN, VOTE, LINEAR_MODEL, LOGISTIC_MODEL)
 
-        This returns immediately, before train is complete. To wait for train to complete, use ``wait_train_complete()``
+        This returns immediately, before train is complete. To wait for train to complete, use :meth:`wait_train_complete`
 
         :return: the model identifier of the ensemble
         :rtype: string
@@ -794,7 +803,7 @@ def start_ensembling(self, model_ids=[], method=None):
 
     def wait_train_complete(self):
         """
-        Waits for train to be complete.
+        Waits for train to be complete (if started with :meth:`start_train`)
         """
         while True:
             status = self.get_status()
@@ -807,7 +816,7 @@ def get_trained_models_ids(self, session_id=None, algorithm=None):
         """
         Gets the list of trained model identifiers for this ML task.
 
-        These identifiers can be used for ``get_trained_model_snippet`` and ``deploy_to_flow``
+        These identifiers can be used for :meth:`get_trained_model_snippet` and :meth:`deploy_to_flow`
 
         :return: A list of model identifiers
         :rtype: list of strings
@@ -824,7 +833,7 @@ def get_trained_models_ids(self, session_id=None, algorithm=None):
 
     def get_trained_model_snippet(self, id=None, ids=None):
         """
-        Gets a quick summary of a trained model, as a dict. For complete information and a structured object, use :meth:get_trained_model_details
+        Gets a quick summary of a trained model, as a dict. For complete information and a structured object, use :meth:`get_trained_model_detail`
 
         :param str id: a model id
         :param list ids: a list of model ids
@@ -856,14 +865,13 @@ def get_trained_model_details(self, id):
         
         :param str id: Identifier of the trained model, as returned by :meth:`get_trained_models_ids`
 
-        :return: A :class:`DSSTrainedModelDetails` representing the details of this trained model id
-        :rtype: :class:`DSSTrainedModelDetails`
+        :return: A :class:`DSSTrainedPredictionModelDetails` or :class:`DSSTrainedClusteringModelDetails` representing the details of this trained model id
+        :rtype: :class:`DSSTrainedPredictionModelDetails` or :class:`DSSTrainedClusteringModelDetails`
         """
         ret = self.client._perform_json(
             "GET", "/projects/%s/models/lab/%s/%s/models/%s/details" % (self.project_key, self.analysis_id, self.mltask_id,id))
         snippet = self.get_trained_model_snippet(id)
 
-
         if "facts" in ret:
             return DSSTrainedClusteringModelDetails(ret, snippet, mltask=self, mltask_model_id=id)
         else: