no idea how this compares to others but its pretty straight forward code and seems decently speedy without being obtuse
First my test code (I know OMG test code !!!)
// test driver code
// ======================== TEST
If True Then
Dim s As String
Const endChar = 49
For i As Integer = 0 To endChar
s = s + Encodings.utf8.Chr(i)
Next
Dim start_m As Double = Microseconds
Dim result As String = strip(s)
Dim end_m As Double = Microseconds
System.debuglog "elapsed test 1 = " + Str(end_m - start_m)
If Left(result,1) <> Encodings.UTF8.Chr(33) Then
Break
End If
If Len(result) <> endchar - 33 + 1 Then
Break
End If
End If
// ======================== TEST
If True Then
Dim s As String
For i As Integer = 0 To 9
s = s + Encodings.utf8.Chr(i)
Next
Dim start_m As Double = Microseconds
Dim result As String = strip(s)
Dim end_m As Double = Microseconds
System.debuglog "elapsed test 1 = " + Str(end_m - start_m)
If result <> "" Then
Break
End If
End If
// ======================== TEST
If True Then
Dim s As String
Const endChar = 49
For i As Integer = 33 To endChar
s = s + Encodings.utf8.Chr(i)
Next
Dim start_m As Double = Microseconds
Dim result As String = strip(s)
Dim end_m As Double = Microseconds
System.debuglog "elapsed test 1 = " + Str(end_m - start_m)
If Left(result,1) <> Encodings.UTF8.Chr(33) Then
Break
End If
If Len(result) <> endchar - 33 + 1 Then
Break
End If
End If
and my strip method
Private Function strip(s as string) as string
#Pragma BreakOnExceptions False
#Pragma StackOverflowChecking False
#Pragma BackgroundTasks False
#Pragma BoundsChecking False
// since we know that the low code points are single bytes in MOST encodings
// if the encoding is UTF16 then we have to check mb.Short as UTF16 is UCS2 (2 bytes per char)
Dim mb As memoryblock = s
Dim i As Integer
For i = 0 To mb.Size - 1
If mb.UInt8Value(i) >= 33 Then
Exit For
End If
Next
If i >= mb.size-1 Then
// do nothing our result is empty
Return ""
Elseif i <= 0 Then
// first byte was > 33 so we reurn the whole string
Return s
Else
Return DefineEncoding(mb.StringValue(i, mb.size - i), s.Encoding)
End If
End Function
indeed that is a good idea
seems to improve speed a little
I altered the main testing to perform repeated strips & average over 1000 iterations
the test driver now looks like
Const iterations = 1000
// test driver code
// ======================== TEST
If True Then
Dim s As String
Const endChar = 49
For i As Integer = 0 To endChar
s = s + Encodings.utf8.Chr(i)
Next
Dim result As String
Dim start_m As Double = Microseconds
For i As Integer = 1 To iterations
result = strip(s)
Next i
Dim end_m As Double = Microseconds
System.debuglog "elapsed test 1 = " + Str((end_m - start_m)/iterations) + " avg microseconds"
If Left(result,1) <> Encodings.UTF8.Chr(33) Then
Break
End If
If Len(result) <> endchar - 33 + 1 Then
Break
End If
End If
// ======================== TEST
If True Then
Dim s As String
For i As Integer = 0 To 9
s = s + Encodings.utf8.Chr(i)
Next
Dim result As String
Dim start_m As Double = Microseconds
For i As Integer = 1 To iterations
result = strip(s)
Next i
Dim end_m As Double = Microseconds
System.debuglog "elapsed test 2 = " + Str((end_m - start_m)/iterations) + " avg microseconds"
If result <> "" Then
Break
End If
End If
// ======================== TEST
If True Then
Dim s As String
Const endChar = 49
For i As Integer = 33 To endChar
s = s + Encodings.utf8.Chr(i)
Next
Dim result As String
Dim start_m As Double = Microseconds
For i As Integer = 1 To iterations
result = strip(s)
Next i
Dim end_m As Double = Microseconds
System.debuglog "elapsed test 3 = " + Str((end_m - start_m)/iterations) + " avg microseconds"
If Left(result,1) <> Encodings.UTF8.Chr(33) Then
Break
End If
If Len(result) <> endchar - 33 + 1 Then
Break
End If
End If
and the strip code like
Const useDefineEncoding = True
#Pragma BreakOnExceptions False
#Pragma StackOverflowChecking False
#Pragma BackgroundTasks False
#Pragma BoundsChecking False
// since we know that the low code points are single bytes in MOST encodings
// if the encoding is UTF16 then we have to check mb.Short as UTF16 is UCS2 (2 bytes per char)
Dim mb As memoryblock = s
Dim i As Integer
For i = 0 To mb.Size - 1
If mb.UInt8Value(i) >= 33 Then
Exit For
End If
Next
If i >= mb.size-1 Then
// do nothing our result is empty
Return ""
ElseIf i <= 0 Then
// first byte was > 33 so we reurn the whole string
Return s
Else
#If useDefineEncoding
Return DefineEncoding(mb.StringValue(i, mb.size - i), s.Encoding)
#Else
Return mb.StringValue( i, mb.size - i, s.Encoding )
#EndIf
End If
using my original suggestion I get
Jul 17 10:45:58 My Application[59693] <Warning>: elapsed test 1 = 1.027359 avg microseconds
Jul 17 10:45:58 My Application[59693] <Warning>: elapsed test 2 = 0.429799 avg microseconds
Jul 17 10:45:58 My Application[59693] <Warning>: elapsed test 3 = 0.310702 avg microseconds
the new one
Jul 17 10:46:48 My Application[59793] <Warning>: elapsed test 1 = 0.897624 avg microseconds
Jul 17 10:46:48 My Application[59793] <Warning>: elapsed test 2 = 0.436471 avg microseconds
Jul 17 10:46:48 My Application[59793] <Warning>: elapsed test 3 = 0.30063 avg microseconds
so it shaves a few msec off the first test which is always nice
the second is oddly slower - but still so close it might not matter
EDIT : the other thing is that various “optimizations” affect results in different ways
inserting a check for s = “” before the conversion to memory block speeds up some tests and slows others
inserting a check for mb.unit8value(0) >= 33 before the loop to avoid doing the loop also speeds some tests and slows others
checking for an empty string with s = “” is slower in all cases than converting to MB and checking size = 0
checking for mb.size = 0 is slower than checking for <= 0 in every test
removing the trailing check for I <= 0 speeds things up a tiny bit and doesnt appear to alter results in any way (since it was the case of the first char being >= 33 which is now moved ahead of the loop)
which makes the strip code now look like
#Pragma BreakOnExceptions False
#Pragma StackOverflowChecking False
#Pragma BackgroundTasks False
#Pragma BoundsChecking False
// since we know that the low code points are single bytes in MOST encodings
// if the encoding is UTF16 then we have to check mb.Short as UTF16 is UCS2 (2 bytes per char)
Dim mb As memoryblock = s
// empty string ?
If mb.Size <= 0 Then
Return ""
End If
// short cut check
// first byte >= 33
If mb.UInt8Value(0) >= 33 Then
Return s
End If
Dim i As Integer
For i = 0 To mb.Size - 1
If mb.UInt8Value(i) >= 33 Then
Exit For
End If
Next
If i >= mb.size-1 Then
// do nothing our result is empty
Return ""
Else
Return mb.StringValue( i, mb.size - i, s.Encoding )
End If
ah yes we can lose the check of 0 since we used that as a short cut above
EDIT : shaves the tiniest bit off but every little bit helps
now if only I could set each method to be optimized differently IF I wanted
I’d compile this one aggressive
so far all test have just been with default optimization
EDIT II :
default, moderate then aggressive results
npalardy@Normans-MacBook-Pro My Application % ./My\ Application
Jul 17 15:46:10 My Application[78143] <Warning>: elapsed test (100000 iterations) = 1.01769 avg microseconds
Jul 17 15:46:10 My Application[78143] <Warning>: elapsed test (100000 iterations) = 0.4125023 avg microseconds
Jul 17 15:46:10 My Application[78143] <Warning>: elapsed test (100000 iterations) = 0.2873552 avg microseconds
npalardy@Normans-MacBook-Pro My Application % cd /Users/npalardy/Desktop/untitled\ folder/Builds\ -\ strip_speed/macOS\ Universal/My\ Application
npalardy@Normans-MacBook-Pro My Application % ./My\ Application
Jul 17 15:46:30 My Application[78176] <Warning>: elapsed test (100000 iterations) = 0.9679235 avg microseconds
Jul 17 15:46:30 My Application[78176] <Warning>: elapsed test (100000 iterations) = 0.3799189 avg microseconds
Jul 17 15:46:30 My Application[78176] <Warning>: elapsed test (100000 iterations) = 0.2923051 avg microseconds
npalardy@Normans-MacBook-Pro My Application % cd /Users/npalardy/Desktop/untitled\ folder/Builds\ -\ strip_speed/macOS\ Universal/My\ Application
npalardy@Normans-MacBook-Pro My Application % ./My\ Application
Jul 17 15:46:49 My Application[78206] <Warning>: elapsed test (100000 iterations) = 0.8812051 avg microseconds
Jul 17 15:46:49 My Application[78206] <Warning>: elapsed test (100000 iterations) = 0.365204 avg microseconds
Jul 17 15:46:49 My Application[78206] <Warning>: elapsed test (100000 iterations) = 0.2774543 avg microseconds
Good info, Norman, thanks. All of the suggestions in the original thread offer improvements, especially the third post from Jim Meyer pointing out that it was not smart to be modifying the source string in the loop, but the real time savings came from Karen A’s suggestion of putting the code inline to avoid the function call overhead, and my subsequent restructuring of the app to simply call the parent routine (which calls the Strip method recursively when parsing XML) a lot less frequently.