diff --git a/posthog/client.py b/posthog/client.py index 426bb24c..ce503822 100644 --- a/posthog/client.py +++ b/posthog/client.py @@ -36,6 +36,9 @@ from posthog.request import ( DEFAULT_HOST, APIError, + QuotaLimitError, + RequestsConnectionError, + RequestsTimeout, batch_post, determine_server_host, flags, @@ -53,6 +56,7 @@ ) from posthog.types import ( FeatureFlag, + FeatureFlagError, FeatureFlagResult, FlagMetadata, FlagsAndPayloads, @@ -1506,6 +1510,19 @@ def feature_enabled( return None return bool(response) + def _get_stale_flag_fallback( + self, distinct_id: ID_TYPES, key: str + ) -> Optional[FeatureFlagResult]: + """Returns a stale cached flag value if available, otherwise None.""" + if self.flag_cache: + stale_result = self.flag_cache.get_stale_cached_flag(distinct_id, key) + if stale_result: + self.log.info( + f"[FEATURE FLAGS] Using stale cached value for flag {key}" + ) + return stale_result + return None + def _get_feature_flag_result( self, key: str, @@ -1539,6 +1556,7 @@ def _get_feature_flag_result( flag_details = None request_id = None evaluated_at = None + feature_flag_error: Optional[str] = None flag_value = self._locally_evaluate_flag( key, distinct_id, groups, person_properties, group_properties @@ -1563,7 +1581,7 @@ def _get_feature_flag_result( ) elif not only_evaluate_locally: try: - flag_details, request_id, evaluated_at = ( + flag_details, request_id, evaluated_at, errors_while_computing = ( self._get_feature_flag_details_from_server( key, distinct_id, @@ -1573,6 +1591,14 @@ def _get_feature_flag_result( disable_geoip, ) ) + errors = [] + if errors_while_computing: + errors.append(FeatureFlagError.ERRORS_WHILE_COMPUTING) + if flag_details is None: + errors.append(FeatureFlagError.FLAG_MISSING) + if errors: + feature_flag_error = ",".join(errors) + flag_result = FeatureFlagResult.from_flag_details( flag_details, override_match_value ) @@ -1586,19 +1612,26 @@ def _get_feature_flag_result( self.log.debug( f"Successfully computed flag remotely: #{key} -> #{flag_result}" ) + except QuotaLimitError as e: + self.log.warning(f"[FEATURE FLAGS] Quota limit exceeded: {e}") + feature_flag_error = FeatureFlagError.QUOTA_LIMITED + flag_result = self._get_stale_flag_fallback(distinct_id, key) + except RequestsTimeout as e: + self.log.warning(f"[FEATURE FLAGS] Request timed out: {e}") + feature_flag_error = FeatureFlagError.TIMEOUT + flag_result = self._get_stale_flag_fallback(distinct_id, key) + except RequestsConnectionError as e: + self.log.warning(f"[FEATURE FLAGS] Connection error: {e}") + feature_flag_error = FeatureFlagError.CONNECTION_ERROR + flag_result = self._get_stale_flag_fallback(distinct_id, key) + except APIError as e: + self.log.warning(f"[FEATURE FLAGS] API error: {e}") + feature_flag_error = FeatureFlagError.api_error(e.status) + flag_result = self._get_stale_flag_fallback(distinct_id, key) except Exception as e: self.log.exception(f"[FEATURE FLAGS] Unable to get flag remotely: {e}") - - # Fallback to cached value if remote evaluation fails - if self.flag_cache: - stale_result = self.flag_cache.get_stale_cached_flag( - distinct_id, key - ) - if stale_result: - self.log.info( - f"[FEATURE FLAGS] Using stale cached value for flag {key}" - ) - flag_result = stale_result + feature_flag_error = FeatureFlagError.UNKNOWN_ERROR + flag_result = self._get_stale_flag_fallback(distinct_id, key) if send_feature_flag_events: self._capture_feature_flag_called( @@ -1612,6 +1645,7 @@ def _get_feature_flag_result( request_id, evaluated_at, flag_details, + feature_flag_error, ) return flag_result @@ -1814,9 +1848,10 @@ def _get_feature_flag_details_from_server( person_properties: dict[str, str], group_properties: dict[str, str], disable_geoip: Optional[bool], - ) -> tuple[Optional[FeatureFlag], Optional[str], Optional[int]]: + ) -> tuple[Optional[FeatureFlag], Optional[str], Optional[int], bool]: """ - Calls /flags and returns the flag details, request id, and evaluated at timestamp + Calls /flags and returns the flag details, request id, evaluated at timestamp, + and whether there were errors while computing flags. """ resp_data = self.get_flags_decision( distinct_id, @@ -1828,9 +1863,10 @@ def _get_feature_flag_details_from_server( ) request_id = resp_data.get("requestId") evaluated_at = resp_data.get("evaluatedAt") + errors_while_computing = resp_data.get("errorsWhileComputingFlags", False) flags = resp_data.get("flags") flag_details = flags.get(key) if flags else None - return flag_details, request_id, evaluated_at + return flag_details, request_id, evaluated_at, errors_while_computing def _capture_feature_flag_called( self, @@ -1844,6 +1880,7 @@ def _capture_feature_flag_called( request_id: Optional[str], evaluated_at: Optional[int], flag_details: Optional[FeatureFlag], + feature_flag_error: Optional[str] = None, ): feature_flag_reported_key = ( f"{key}_{'::null::' if response is None else str(response)}" @@ -1878,6 +1915,8 @@ def _capture_feature_flag_called( ) if flag_details.metadata.id: properties["$feature_flag_id"] = flag_details.metadata.id + if feature_flag_error: + properties["$feature_flag_error"] = feature_flag_error self.capture( "$feature_flag_called", diff --git a/posthog/request.py b/posthog/request.py index 7199f3a8..138d621d 100644 --- a/posthog/request.py +++ b/posthog/request.py @@ -312,6 +312,12 @@ class QuotaLimitError(APIError): pass +# Re-export requests exceptions for use in client.py +# This keeps all requests library imports centralized in this module +RequestsTimeout = requests.exceptions.Timeout +RequestsConnectionError = requests.exceptions.ConnectionError + + class DatetimeSerializer(json.JSONEncoder): def default(self, obj: Any): if isinstance(obj, (date, datetime)): diff --git a/posthog/test/test_feature_flag_result.py b/posthog/test/test_feature_flag_result.py index d1e9b464..7de428c6 100644 --- a/posthog/test/test_feature_flag_result.py +++ b/posthog/test/test_feature_flag_result.py @@ -4,7 +4,13 @@ from posthog.client import Client from posthog.test.test_utils import FAKE_TEST_API_KEY -from posthog.types import FeatureFlag, FeatureFlagResult, FlagMetadata, FlagReason +from posthog.types import ( + FeatureFlag, + FeatureFlagError, + FeatureFlagResult, + FlagMetadata, + FlagReason, +) class TestFeatureFlagResult(unittest.TestCase): @@ -189,7 +195,6 @@ def tearDownClass(cls): def set_fail(self, e, batch): """Mark the failure handler""" - print("FAIL", e, batch) # noqa: T201 self.failed = True def setUp(self): @@ -241,6 +246,9 @@ def test_get_feature_flag_result_boolean_local_evaluation(self, patch_capture): groups={}, disable_geoip=None, ) + # Verify error property is NOT present on successful evaluation + captured_properties = patch_capture.call_args[1]["properties"] + self.assertNotIn("$feature_flag_error", captured_properties) @mock.patch.object(Client, "capture") def test_get_feature_flag_result_variant_local_evaluation(self, patch_capture): @@ -295,6 +303,9 @@ def test_get_feature_flag_result_variant_local_evaluation(self, patch_capture): groups={}, disable_geoip=None, ) + # Verify error property is NOT present on successful evaluation + captured_properties = patch_capture.call_args[1]["properties"] + self.assertNotIn("$feature_flag_error", captured_properties) another_flag_result = self.client.get_feature_flag_result( "person-flag", "another-distinct-id", person_properties={"region": "USA"} @@ -360,6 +371,9 @@ def test_get_feature_flag_result_boolean_decide(self, patch_capture, patch_flags groups={}, disable_geoip=None, ) + # Verify error property is NOT present on successful evaluation + captured_properties = patch_capture.call_args[1]["properties"] + self.assertNotIn("$feature_flag_error", captured_properties) @mock.patch("posthog.client.flags") @mock.patch.object(Client, "capture") @@ -403,6 +417,9 @@ def test_get_feature_flag_result_variant_decide(self, patch_capture, patch_flags groups={}, disable_geoip=None, ) + # Verify error property is NOT present on successful evaluation + captured_properties = patch_capture.call_args[1]["properties"] + self.assertNotIn("$feature_flag_error", captured_properties) @mock.patch("posthog.client.flags") @mock.patch.object(Client, "capture") @@ -438,6 +455,428 @@ def test_get_feature_flag_result_unknown_flag(self, patch_capture, patch_flags): "$feature_flag_response": None, "locally_evaluated": False, "$feature/no-person-flag": None, + "$feature_flag_error": FeatureFlagError.FLAG_MISSING, + }, + groups={}, + disable_geoip=None, + ) + + @mock.patch("posthog.client.flags") + @mock.patch.object(Client, "capture") + def test_get_feature_flag_result_with_errors_while_computing_flags( + self, patch_capture, patch_flags + ): + """Test that errors_while_computing_flags is included in the $feature_flag_called event. + + When the server returns errorsWhileComputingFlags=true, it indicates that there + was an error computing one or more flags. We include this in the event so users + can identify and debug flag evaluation issues. + """ + patch_flags.return_value = { + "flags": { + "my-flag": { + "key": "my-flag", + "enabled": True, + "variant": None, + "reason": {"description": "Matched condition set 1"}, + "metadata": {"id": 1, "version": 1, "payload": None}, + }, + }, + "requestId": "test-request-id-789", + "errorsWhileComputingFlags": True, + } + + flag_result = self.client.get_feature_flag_result("my-flag", "some-distinct-id") + + self.assertEqual(flag_result.enabled, True) + patch_capture.assert_called_with( + "$feature_flag_called", + distinct_id="some-distinct-id", + properties={ + "$feature_flag": "my-flag", + "$feature_flag_response": True, + "locally_evaluated": False, + "$feature/my-flag": True, + "$feature_flag_request_id": "test-request-id-789", + "$feature_flag_reason": "Matched condition set 1", + "$feature_flag_id": 1, + "$feature_flag_version": 1, + "$feature_flag_error": FeatureFlagError.ERRORS_WHILE_COMPUTING, + }, + groups={}, + disable_geoip=None, + ) + + @mock.patch("posthog.client.flags") + @mock.patch.object(Client, "capture") + def test_get_feature_flag_result_flag_not_in_response( + self, patch_capture, patch_flags + ): + """Test that when a flag is not in the API response, we capture flag_missing error. + + This happens when a flag doesn't exist or the user doesn't match any conditions. + """ + patch_flags.return_value = { + "flags": { + "other-flag": { + "key": "other-flag", + "enabled": True, + "variant": None, + "reason": {"description": "Matched condition set 1"}, + "metadata": {"id": 1, "version": 1, "payload": None}, + }, + }, + "requestId": "test-request-id-456", + } + + flag_result = self.client.get_feature_flag_result( + "missing-flag", "some-distinct-id" + ) + + self.assertIsNone(flag_result) + patch_capture.assert_called_with( + "$feature_flag_called", + distinct_id="some-distinct-id", + properties={ + "$feature_flag": "missing-flag", + "$feature_flag_response": None, + "locally_evaluated": False, + "$feature/missing-flag": None, + "$feature_flag_request_id": "test-request-id-456", + "$feature_flag_error": FeatureFlagError.FLAG_MISSING, + }, + groups={}, + disable_geoip=None, + ) + + @mock.patch("posthog.client.flags") + @mock.patch.object(Client, "capture") + def test_get_feature_flag_result_errors_computing_and_flag_missing( + self, patch_capture, patch_flags + ): + """Test that both errors are reported when errorsWhileComputingFlags=true AND flag is missing. + + This can happen when the server encounters errors computing flags AND the requested + flag is not in the response. Both conditions should be reported for debugging. + """ + patch_flags.return_value = { + "flags": {}, # Flag is missing + "requestId": "test-request-id-999", + "errorsWhileComputingFlags": True, # But errors also occurred + } + + flag_result = self.client.get_feature_flag_result( + "missing-flag", "some-distinct-id" + ) + + self.assertIsNone(flag_result) + patch_capture.assert_called_with( + "$feature_flag_called", + distinct_id="some-distinct-id", + properties={ + "$feature_flag": "missing-flag", + "$feature_flag_response": None, + "locally_evaluated": False, + "$feature/missing-flag": None, + "$feature_flag_request_id": "test-request-id-999", + "$feature_flag_error": f"{FeatureFlagError.ERRORS_WHILE_COMPUTING},{FeatureFlagError.FLAG_MISSING}", + }, + groups={}, + disable_geoip=None, + ) + + @mock.patch("posthog.client.flags") + @mock.patch.object(Client, "capture") + def test_get_feature_flag_result_unknown_error(self, patch_capture, patch_flags): + """Test that unexpected exceptions are captured as unknown_error.""" + patch_flags.side_effect = Exception("Unexpected error") + + flag_result = self.client.get_feature_flag_result("my-flag", "some-distinct-id") + + self.assertIsNone(flag_result) + patch_capture.assert_called_with( + "$feature_flag_called", + distinct_id="some-distinct-id", + properties={ + "$feature_flag": "my-flag", + "$feature_flag_response": None, + "locally_evaluated": False, + "$feature/my-flag": None, + "$feature_flag_error": FeatureFlagError.UNKNOWN_ERROR, + }, + groups={}, + disable_geoip=None, + ) + + @mock.patch("posthog.client.flags") + @mock.patch.object(Client, "capture") + def test_get_feature_flag_result_timeout_error(self, patch_capture, patch_flags): + """Test that timeout errors are captured specifically.""" + from posthog.request import RequestsTimeout + + patch_flags.side_effect = RequestsTimeout("Request timed out") + + flag_result = self.client.get_feature_flag_result("my-flag", "some-distinct-id") + + self.assertIsNone(flag_result) + patch_capture.assert_called_with( + "$feature_flag_called", + distinct_id="some-distinct-id", + properties={ + "$feature_flag": "my-flag", + "$feature_flag_response": None, + "locally_evaluated": False, + "$feature/my-flag": None, + "$feature_flag_error": FeatureFlagError.TIMEOUT, + }, + groups={}, + disable_geoip=None, + ) + + @mock.patch("posthog.client.flags") + @mock.patch.object(Client, "capture") + def test_get_feature_flag_result_connection_error(self, patch_capture, patch_flags): + """Test that connection errors are captured specifically.""" + from posthog.request import RequestsConnectionError + + patch_flags.side_effect = RequestsConnectionError("Connection refused") + + flag_result = self.client.get_feature_flag_result("my-flag", "some-distinct-id") + + self.assertIsNone(flag_result) + patch_capture.assert_called_with( + "$feature_flag_called", + distinct_id="some-distinct-id", + properties={ + "$feature_flag": "my-flag", + "$feature_flag_response": None, + "locally_evaluated": False, + "$feature/my-flag": None, + "$feature_flag_error": FeatureFlagError.CONNECTION_ERROR, + }, + groups={}, + disable_geoip=None, + ) + + @mock.patch("posthog.client.flags") + @mock.patch.object(Client, "capture") + def test_get_feature_flag_result_api_error(self, patch_capture, patch_flags): + """Test that API errors include the status code.""" + from posthog.request import APIError + + patch_flags.side_effect = APIError(500, "Internal server error") + + flag_result = self.client.get_feature_flag_result("my-flag", "some-distinct-id") + + self.assertIsNone(flag_result) + patch_capture.assert_called_with( + "$feature_flag_called", + distinct_id="some-distinct-id", + properties={ + "$feature_flag": "my-flag", + "$feature_flag_response": None, + "locally_evaluated": False, + "$feature/my-flag": None, + "$feature_flag_error": FeatureFlagError.api_error(500), + }, + groups={}, + disable_geoip=None, + ) + + @mock.patch("posthog.client.flags") + @mock.patch.object(Client, "capture") + def test_get_feature_flag_result_quota_limited(self, patch_capture, patch_flags): + """Test that quota limit errors are captured specifically.""" + from posthog.request import QuotaLimitError + + patch_flags.side_effect = QuotaLimitError(429, "Rate limit exceeded") + + flag_result = self.client.get_feature_flag_result("my-flag", "some-distinct-id") + + self.assertIsNone(flag_result) + patch_capture.assert_called_with( + "$feature_flag_called", + distinct_id="some-distinct-id", + properties={ + "$feature_flag": "my-flag", + "$feature_flag_response": None, + "locally_evaluated": False, + "$feature/my-flag": None, + "$feature_flag_error": FeatureFlagError.QUOTA_LIMITED, + }, + groups={}, + disable_geoip=None, + ) + + +class TestFeatureFlagErrorWithStaleCacheFallback(unittest.TestCase): + """Tests for stale cache fallback behavior when flag evaluation fails. + + When the PostHog API is unavailable (timeout, connection error, etc.), the SDK + falls back to stale cached flag values if available. These tests verify that: + 1. The stale cached value is returned when an error occurs + 2. The $feature_flag_error property is still set (for debugging) + 3. The response reflects the cached value, not None + """ + + def set_fail(self, e, batch): + """Mark the failure handler""" + self.failed = True + + def setUp(self): + self.failed = False + # Create client with memory-based flag cache enabled + self.client = Client( + FAKE_TEST_API_KEY, + on_error=self.set_fail, + flag_fallback_cache_url="memory://local/?ttl=300&size=10000", + ) + + def _populate_stale_cache(self, distinct_id, flag_key, flag_result): + """Pre-populate the flag cache with a value that will be used for stale fallback.""" + self.client.flag_cache.set_cached_flag( + distinct_id, + flag_key, + flag_result, + flag_definition_version=self.client.flag_definition_version, + ) + + @mock.patch("posthog.client.flags") + @mock.patch.object(Client, "capture") + def test_timeout_error_returns_stale_cached_value(self, patch_capture, patch_flags): + """Test that timeout errors return stale cached value when available.""" + from posthog.request import RequestsTimeout + + # Pre-populate cache with a flag result + cached_result = FeatureFlagResult.from_value_and_payload( + "my-flag", "cached-variant", '{"from": "cache"}' + ) + self._populate_stale_cache("some-distinct-id", "my-flag", cached_result) + + # Simulate timeout error + patch_flags.side_effect = RequestsTimeout("Request timed out") + + flag_result = self.client.get_feature_flag_result("my-flag", "some-distinct-id") + + # Should return the stale cached value + self.assertIsNotNone(flag_result) + self.assertEqual(flag_result.variant, "cached-variant") + self.assertEqual(flag_result.payload, {"from": "cache"}) + + # Error should still be tracked for debugging + patch_capture.assert_called_with( + "$feature_flag_called", + distinct_id="some-distinct-id", + properties={ + "$feature_flag": "my-flag", + "$feature_flag_response": "cached-variant", + "locally_evaluated": False, + "$feature/my-flag": "cached-variant", + "$feature_flag_payload": {"from": "cache"}, + "$feature_flag_error": FeatureFlagError.TIMEOUT, + }, + groups={}, + disable_geoip=None, + ) + + @mock.patch("posthog.client.flags") + @mock.patch.object(Client, "capture") + def test_connection_error_returns_stale_cached_value( + self, patch_capture, patch_flags + ): + """Test that connection errors return stale cached value when available.""" + from posthog.request import RequestsConnectionError + + # Pre-populate cache with a boolean flag result + cached_result = FeatureFlagResult.from_value_and_payload("my-flag", True, None) + self._populate_stale_cache("some-distinct-id", "my-flag", cached_result) + + # Simulate connection error + patch_flags.side_effect = RequestsConnectionError("Connection refused") + + flag_result = self.client.get_feature_flag_result("my-flag", "some-distinct-id") + + # Should return the stale cached value + self.assertIsNotNone(flag_result) + self.assertEqual(flag_result.enabled, True) + self.assertIsNone(flag_result.variant) + + # Error should still be tracked + patch_capture.assert_called_with( + "$feature_flag_called", + distinct_id="some-distinct-id", + properties={ + "$feature_flag": "my-flag", + "$feature_flag_response": True, + "locally_evaluated": False, + "$feature/my-flag": True, + "$feature_flag_error": FeatureFlagError.CONNECTION_ERROR, + }, + groups={}, + disable_geoip=None, + ) + + @mock.patch("posthog.client.flags") + @mock.patch.object(Client, "capture") + def test_api_error_returns_stale_cached_value(self, patch_capture, patch_flags): + """Test that API errors return stale cached value when available.""" + from posthog.request import APIError + + # Pre-populate cache + cached_result = FeatureFlagResult.from_value_and_payload( + "my-flag", "control", None + ) + self._populate_stale_cache("some-distinct-id", "my-flag", cached_result) + + # Simulate API error + patch_flags.side_effect = APIError(503, "Service unavailable") + + flag_result = self.client.get_feature_flag_result("my-flag", "some-distinct-id") + + # Should return the stale cached value + self.assertIsNotNone(flag_result) + self.assertEqual(flag_result.variant, "control") + + # Error should still be tracked with status code + patch_capture.assert_called_with( + "$feature_flag_called", + distinct_id="some-distinct-id", + properties={ + "$feature_flag": "my-flag", + "$feature_flag_response": "control", + "locally_evaluated": False, + "$feature/my-flag": "control", + "$feature_flag_error": FeatureFlagError.api_error(503), + }, + groups={}, + disable_geoip=None, + ) + + @mock.patch("posthog.client.flags") + @mock.patch.object(Client, "capture") + def test_error_without_cache_returns_none(self, patch_capture, patch_flags): + """Test that errors return None when no stale cache is available.""" + from posthog.request import RequestsTimeout + + # Do NOT populate cache - no fallback available + + patch_flags.side_effect = RequestsTimeout("Request timed out") + + flag_result = self.client.get_feature_flag_result("my-flag", "some-distinct-id") + + # Should return None since no cache available + self.assertIsNone(flag_result) + + # Error should still be tracked + patch_capture.assert_called_with( + "$feature_flag_called", + distinct_id="some-distinct-id", + properties={ + "$feature_flag": "my-flag", + "$feature_flag_response": None, + "locally_evaluated": False, + "$feature/my-flag": None, + "$feature_flag_error": FeatureFlagError.TIMEOUT, }, groups={}, disable_geoip=None, diff --git a/posthog/types.py b/posthog/types.py index f706eacc..03104278 100644 --- a/posthog/types.py +++ b/posthog/types.py @@ -307,3 +307,42 @@ def to_payloads(response: FlagsResponse) -> Optional[dict[str, str]]: and value.enabled and value.metadata.payload is not None } + + +class FeatureFlagError: + """Error type constants for the $feature_flag_error property. + + These values are sent in analytics events to track flag evaluation failures. + They should not be changed without considering impact on existing dashboards + and queries that filter on these values. + + Error values: + ERRORS_WHILE_COMPUTING: Server returned errorsWhileComputingFlags=true + FLAG_MISSING: Requested flag not in API response + QUOTA_LIMITED: Rate/quota limit exceeded + TIMEOUT: Request timed out + CONNECTION_ERROR: Network connectivity issue + UNKNOWN_ERROR: Unexpected exceptions + + For API errors with status codes, use the api_error() method which returns + a string like "api_error_500". + """ + + ERRORS_WHILE_COMPUTING = "errors_while_computing_flags" + FLAG_MISSING = "flag_missing" + QUOTA_LIMITED = "quota_limited" + TIMEOUT = "timeout" + CONNECTION_ERROR = "connection_error" + UNKNOWN_ERROR = "unknown_error" + + @staticmethod + def api_error(status: Union[int, str]) -> str: + """Generate API error string with status code. + + Args: + status: HTTP status code from the API error + + Returns: + Error string like "api_error_500" + """ + return f"api_error_{status}"