Speed up strip

my 2 cents (after about 10 minutes of work on this) for the speed up strip conversation on Xojo forums Speed Up String Strip Method - General - Xojo Programming Forum

no idea how this compares to others but its pretty straight forward code and seems decently speedy without being obtuse

First my test code (I know OMG test code !!!)

// test driver code
// ======================== TEST 
If True Then
  Dim s As String 
  Const endChar = 49
  
  
  For i As Integer = 0 To endChar
    
    s = s + Encodings.utf8.Chr(i)
    
  Next
  
  Dim start_m As Double = Microseconds
  
  Dim result As String = strip(s)
  
  Dim end_m As Double = Microseconds
  System.debuglog "elapsed test 1 = " + Str(end_m - start_m)
  
  If Left(result,1) <> Encodings.UTF8.Chr(33) Then
    Break
  End If
  If Len(result) <> endchar - 33 + 1 Then
    Break
  End If
   
End If


// ======================== TEST 
If True Then
  Dim s As String 
  
  For i As Integer = 0 To 9
    
    s = s + Encodings.utf8.Chr(i)
    
  Next
  
  Dim start_m As Double = Microseconds
  
  Dim result As String = strip(s)
  
  Dim end_m As Double = Microseconds
  System.debuglog "elapsed test 1 = " + Str(end_m - start_m)
  
  If result <> "" Then
    Break
  End If
  
End If


// ======================== TEST 
If True Then
  Dim s As String 
  Const endChar = 49
  
  For i As Integer = 33 To endChar
    
    s = s + Encodings.utf8.Chr(i)
    
  Next
  
  Dim start_m As Double = Microseconds
  
  Dim result As String = strip(s)
  
  Dim end_m As Double = Microseconds
  System.debuglog "elapsed test 1 = " + Str(end_m - start_m)
  
  If Left(result,1) <> Encodings.UTF8.Chr(33) Then
    Break
  End If
  If Len(result) <> endchar - 33 + 1 Then
    Break
  End If
  
End If

and my strip method

Private Function strip(s as string) as string
  #Pragma BreakOnExceptions False
  #Pragma StackOverflowChecking False
  #Pragma BackgroundTasks False
  #Pragma BoundsChecking False
  
  // since we know that the low code points are single bytes in MOST encodings
  // if the encoding is UTF16 then we have to check mb.Short as UTF16 is UCS2 (2 bytes per char)
  
  Dim mb As memoryblock = s
  
  Dim i As Integer
  
  For i = 0 To mb.Size - 1
    
    If mb.UInt8Value(i) >= 33 Then 
      Exit For
    End If
    
  Next
  
  If i >= mb.size-1 Then
    // do nothing our result is empty
    Return ""
  Elseif i <= 0 Then 
    // first byte was > 33 so we reurn the whole string
    Return s
  Else
    Return DefineEncoding(mb.StringValue(i, mb.size - i), s.Encoding)
  End If
  
  
End Function
1 Like

You would be better using

mb.StringValue( i, mb.size - i, s.Encoding )

It would avoid a function call and string copy.

.StringValue copies the string and DefineEncoding performs another copy.

indeed that is a good idea
seems to improve speed a little

I altered the main testing to perform repeated strips & average over 1000 iterations
the test driver now looks like

Const iterations = 1000

// test driver code
// ======================== TEST 
If True Then
  Dim s As String 
  Const endChar = 49
  
  
  For i As Integer = 0 To endChar
    
    s = s + Encodings.utf8.Chr(i)
    
  Next
  
  Dim result As String
  Dim start_m As Double = Microseconds
  For i As Integer = 1 To iterations
    result = strip(s)
  Next i
  
  Dim end_m As Double = Microseconds
  System.debuglog "elapsed test 1 = " + Str((end_m - start_m)/iterations) + " avg microseconds"
  
  If Left(result,1) <> Encodings.UTF8.Chr(33) Then
    Break
  End If
  If Len(result) <> endchar - 33 + 1 Then
    Break
  End If
  
End If


// ======================== TEST 
If True Then
  Dim s As String 
  
  For i As Integer = 0 To 9
    
    s = s + Encodings.utf8.Chr(i)
    
  Next
  
  Dim result As String
  Dim start_m As Double = Microseconds
  For i As Integer = 1 To iterations
    result = strip(s)
  Next i
  
  Dim end_m As Double = Microseconds
  System.debuglog "elapsed test 2 = " + Str((end_m - start_m)/iterations) + " avg microseconds"
  
  If result <> "" Then
    Break
  End If
  
End If


// ======================== TEST 
If True Then
  Dim s As String 
  Const endChar = 49
  
  For i As Integer = 33 To endChar
    
    s = s + Encodings.utf8.Chr(i)
    
  Next
  
  Dim result As String
  Dim start_m As Double = Microseconds
  For i As Integer = 1 To iterations
    result = strip(s)
  Next i
  
  Dim end_m As Double = Microseconds
  System.debuglog "elapsed test 3 = " + Str((end_m - start_m)/iterations) + " avg microseconds"
  
  If Left(result,1) <> Encodings.UTF8.Chr(33) Then
    Break
  End If
  If Len(result) <> endchar - 33 + 1 Then
    Break
  End If
  
End If

and the strip code like

Const useDefineEncoding = True

#Pragma BreakOnExceptions False
#Pragma StackOverflowChecking False
#Pragma BackgroundTasks False
#Pragma BoundsChecking False

// since we know that the low code points are single bytes in MOST encodings
// if the encoding is UTF16 then we have to check mb.Short as UTF16 is UCS2 (2 bytes per char)

Dim mb As memoryblock = s

Dim i As Integer

For i = 0 To mb.Size - 1
  
  If mb.UInt8Value(i) >= 33 Then 
    Exit For
  End If
  
Next

If i >= mb.size-1 Then
  // do nothing our result is empty
  Return ""
ElseIf i <= 0 Then 
  // first byte was > 33 so we reurn the whole string
  Return s
Else
  
  #If useDefineEncoding
    Return DefineEncoding(mb.StringValue(i, mb.size - i), s.Encoding)
  #Else
    Return mb.StringValue( i, mb.size - i, s.Encoding )
  #EndIf
  
End If

using my original suggestion I get

Jul 17 10:45:58  My Application[59693] <Warning>: elapsed test 1 = 1.027359 avg microseconds
Jul 17 10:45:58  My Application[59693] <Warning>: elapsed test 2 = 0.429799 avg microseconds
Jul 17 10:45:58  My Application[59693] <Warning>: elapsed test 3 = 0.310702 avg microseconds

the new one

Jul 17 10:46:48  My Application[59793] <Warning>: elapsed test 1 = 0.897624 avg microseconds
Jul 17 10:46:48  My Application[59793] <Warning>: elapsed test 2 = 0.436471 avg microseconds
Jul 17 10:46:48  My Application[59793] <Warning>: elapsed test 3 = 0.30063 avg microseconds

so it shaves a few msec off the first test which is always nice
the second is oddly slower - but still so close it might not matter

EDIT : the other thing is that various “optimizations” affect results in different ways

  1. inserting a check for s = “” before the conversion to memory block speeds up some tests and slows others
  2. inserting a check for mb.unit8value(0) >= 33 before the loop to avoid doing the loop also speeds some tests and slows others
  3. checking for an empty string with s = “” is slower in all cases than converting to MB and checking size = 0
  4. checking for mb.size = 0 is slower than checking for <= 0 in every test
  5. removing the trailing check for I <= 0 speeds things up a tiny bit and doesnt appear to alter results in any way (since it was the case of the first char being >= 33 which is now moved ahead of the loop)

which makes the strip code now look like


#Pragma BreakOnExceptions False
#Pragma StackOverflowChecking False
#Pragma BackgroundTasks False
#Pragma BoundsChecking False

// since we know that the low code points are single bytes in MOST encodings
// if the encoding is UTF16 then we have to check mb.Short as UTF16 is UCS2 (2 bytes per char)

Dim mb As memoryblock = s

// empty string ?
If mb.Size <= 0 Then 
  Return ""
End If

// short cut check
// first byte >= 33
If mb.UInt8Value(0) >= 33 Then
  Return s
End If

Dim i As Integer

For i = 0 To mb.Size - 1
  
  If mb.UInt8Value(i) >= 33 Then 
    Exit For
  End If
  
Next

If i >= mb.size-1 Then
  // do nothing our result is empty
  Return ""
Else
    Return mb.StringValue( i, mb.size - i, s.Encoding )
End If

here’s my sample project

http://great-white-software.com/miscellaneous/strip_speed.xojo_binary_project

Lose one loop.

For i = 1 To mb.Size - 1

Since you already know mb(0) fails the test.

Do you? mb(0) can be < chr(33)

You already know that the first character is < 33.
The function exits if it is not.

[quote=“npalardy, post:3, topic:2606”

// short cut check
// first byte >= 33
If mb.UInt8Value(0) >= 33 Then
  Return s
End If

[/quote]

ah yes we can lose the check of 0 since we used that as a short cut above

EDIT : shaves the tiniest bit off but every little bit helps

now if only I could set each method to be optimized differently IF I wanted
I’d compile this one aggressive
so far all test have just been with default optimization

EDIT II :
default, moderate then aggressive results


npalardy@Normans-MacBook-Pro My Application % ./My\ Application
Jul 17 15:46:10  My Application[78143] <Warning>: elapsed test (100000 iterations) = 1.01769 avg microseconds
Jul 17 15:46:10  My Application[78143] <Warning>: elapsed test (100000 iterations) = 0.4125023 avg microseconds
Jul 17 15:46:10  My Application[78143] <Warning>: elapsed test (100000 iterations) = 0.2873552 avg microseconds
npalardy@Normans-MacBook-Pro My Application % cd /Users/npalardy/Desktop/untitled\ folder/Builds\ -\ strip_speed/macOS\ Universal/My\ Application 
npalardy@Normans-MacBook-Pro My Application % ./My\ Application                                                                                   
Jul 17 15:46:30  My Application[78176] <Warning>: elapsed test (100000 iterations) = 0.9679235 avg microseconds
Jul 17 15:46:30  My Application[78176] <Warning>: elapsed test (100000 iterations) = 0.3799189 avg microseconds
Jul 17 15:46:30  My Application[78176] <Warning>: elapsed test (100000 iterations) = 0.2923051 avg microseconds
npalardy@Normans-MacBook-Pro My Application % cd /Users/npalardy/Desktop/untitled\ folder/Builds\ -\ strip_speed/macOS\ Universal/My\ Application 
npalardy@Normans-MacBook-Pro My Application % ./My\ Application                                                                                   
Jul 17 15:46:49  My Application[78206] <Warning>: elapsed test (100000 iterations) = 0.8812051 avg microseconds
Jul 17 15:46:49  My Application[78206] <Warning>: elapsed test (100000 iterations) = 0.365204 avg microseconds
Jul 17 15:46:49  My Application[78206] <Warning>: elapsed test (100000 iterations) = 0.2774543 avg microseconds

Good info, Norman, thanks. All of the suggestions in the original thread offer improvements, especially the third post from Jim Meyer pointing out that it was not smart to be modifying the source string in the loop, but the real time savings came from Karen A’s suggestion of putting the code inline to avoid the function call overhead, and my subsequent restructuring of the app to simply call the parent routine (which calls the Strip method recursively when parsing XML) a lot less frequently.