@@ -147,55 +147,105 @@ class Frame:
147147 # Configure if you want to see more in logs. Should be a multiple of 3.
148148 MAX_LOG_SIZE = int (os .environ .get ("WEBSOCKETS_MAX_LOG_SIZE" , "75" ))
149149
150+ DEFAULT_IS_TEXT = {OP_TEXT : True , OP_BINARY : False , OP_CLOSE : True }
151+
150152 def __str__ (self ) -> str :
151153 """
152154 Return a human-readable representation of a frame.
153155
156+ This function is intended for logging and debugging. It doesn't aim to
157+ support round-tripping because payloads can be too long for displaying
158+ conveniently. Instead, it shows the beginning and the end. It's robust
159+ to incorrect data.
160+
161+ It attempts to decode UTF-8 payloads whenever possible, even for binary
162+ frames and control frames, because those frequently contain UTF-8 data.
163+ It applies the same logic to continuation frames, because we don't know
164+ if they continue a text frame or a binary frame.
165+
154166 """
155- coding = None
167+ expect_text = self .DEFAULT_IS_TEXT .get (self .opcode )
168+ data_repr , is_text = self ._data_repr ()
169+
170+ data_type = "" if expect_text == is_text else ("text" if is_text else "binary" )
156171 length = f"{ len (self .data )} byte{ '' if len (self .data ) == 1 else 's' } "
157172 non_final = "" if self .fin else "continued"
173+ metadata = ", " .join (filter (None , [data_type , length , non_final ]))
174+
175+ return f"{ self .opcode .name } { data_repr } [{ metadata } ]"
176+
177+ def _data_repr (self ) -> tuple [str , bool | None ]:
178+ """
179+ Return a human-readable representation of the payload.
180+
181+ Also returns whether the payload is text.
182+
183+ The representation is elided to fit ``MAX_LOG_SIZE``.
184+
185+ This is a helper for the __str__ method.
158186
159- if self .opcode is OP_TEXT :
160- # Decoding only the beginning and the end is needlessly hard.
161- # Decode the entire payload then elide later if necessary.
162- data = repr (bytes (self .data ).decode ())
163- elif self .opcode is OP_BINARY :
164- # We'll show at most the first 16 bytes and the last 8 bytes.
165- # Encode just what we need, plus two dummy bytes to elide later.
187+ """
188+ if not self .data :
189+ return "''" , self .DEFAULT_IS_TEXT .get (self .opcode )
190+
191+ # Special case for close frames: parse close code and reason.
192+ # Fall back to the standard case if the payload is malformed.
193+
194+ if self .opcode is OP_CLOSE :
195+ try :
196+ return str (Close .parse (self .data )), True
197+ except (ProtocolError , UnicodeDecodeError ):
198+ pass
199+
200+ # Guess whether the payload is UTF-8 or binary, regardless of opcode, to
201+ # display UTF-8 text in binary frames nicely and generally to be helpful
202+ # and robust. Also support frames fragmented within UTF-8 sequences.
203+
204+ if len (self .data ) > 4 * self .MAX_LOG_SIZE :
205+ # Process only the start and the end, as the middle will be elided.
206+ # Cast to bytes because self.data could be a memoryview.
207+ data_start = bytes (self .data [: 8 * self .MAX_LOG_SIZE // 3 ])
208+ data_end = bytes (self .data [- 4 * self .MAX_LOG_SIZE // 3 :])
209+ is_text = is_utf8_fragment (
210+ data_start ,
211+ must_start_clean = self .opcode != OP_CONT ,
212+ ) and is_utf8_fragment (
213+ data_end ,
214+ must_end_clean = self .fin ,
215+ )
216+ if is_text :
217+ # Cast to bytes before decoding because data could be a memoryview.
218+ data_repr = repr ((data_start + data_end ).decode (errors = "replace" ))
219+
220+ else :
221+ # Cast to bytes because self.data could be a memoryview.
222+ data = bytes (self .data )
223+ is_text = is_utf8_fragment (
224+ data ,
225+ must_start_clean = self .opcode != OP_CONT ,
226+ must_end_clean = self .fin ,
227+ )
228+ if is_text :
229+ data_repr = repr (data .decode (errors = "replace" ))
230+
231+ # When the payload is text (except perhaps for boundaries), we decoded
232+ # enough in ``data_repr``. Now, do the same for binary payloads.
233+
234+ if not is_text :
166235 binary = self .data
167236 if len (binary ) > self .MAX_LOG_SIZE // 3 :
168237 cut = (self .MAX_LOG_SIZE // 3 - 1 ) // 3 # by default cut = 8
238+ # Encode two dummy bytes to force eliding and adding an ellipsis.
169239 binary = b"" .join ([binary [: 2 * cut ], b"\x00 \x00 " , binary [- cut :]])
170- data = " " .join (f"{ byte :02x} " for byte in binary )
171- elif self .opcode is OP_CLOSE :
172- data = str (Close .parse (self .data ))
173- elif self .data :
174- # We don't know if a Continuation frame contains text or binary.
175- # Ping and Pong frames could contain UTF-8.
176- # Attempt to decode as UTF-8 and display it as text; fallback to
177- # binary. If self.data is a memoryview, it has no decode() method,
178- # which raises AttributeError.
179- try :
180- data = repr (bytes (self .data ).decode ())
181- coding = "text"
182- except (UnicodeDecodeError , AttributeError ):
183- binary = self .data
184- if len (binary ) > self .MAX_LOG_SIZE // 3 :
185- cut = (self .MAX_LOG_SIZE // 3 - 1 ) // 3 # by default cut = 8
186- binary = b"" .join ([binary [: 2 * cut ], b"\x00 \x00 " , binary [- cut :]])
187- data = " " .join (f"{ byte :02x} " for byte in binary )
188- coding = "binary"
189- else :
190- data = "''"
240+ data_repr = " " .join (f"{ byte :02x} " for byte in binary )
191241
192- if len (data ) > self .MAX_LOG_SIZE :
193- cut = self .MAX_LOG_SIZE // 3 - 1 # by default cut = 24
194- data = data [: 2 * cut ] + "..." + data [- cut :]
242+ # Elide the middle of the representation to fit the maximum log size.
195243
196- metadata = ", " .join (filter (None , [coding , length , non_final ]))
244+ if len (data_repr ) > self .MAX_LOG_SIZE :
245+ cut = self .MAX_LOG_SIZE // 3 - 1 # by default cut = 24
246+ data_repr = data_repr [: 2 * cut ] + "..." + data_repr [- cut :]
197247
198- return f" { self . opcode . name } { data } [ { metadata } ]"
248+ return data_repr , is_text
199249
200250 @classmethod
201251 def parse (
0 commit comments