#define NULL 0 #define recordLen SizeOf(tNode) Const skipChars = 32 Const treeSubNodeCount = 256 - skipChars Const printInterval = 750 Enum EState undefined word End Enum Type tNode subNodes(1 To treeSubNodeCount) As tNode Ptr count As UInteger = 0 End Type Declare Function isSplittingChar (char As UByte) As Integer Declare Sub putCharsIntoTree (node As tNode Ptr, buffer As String, index As Integer) Declare Sub traverseTree (node As tNode Ptr, index As Ubyte, path As String) Declare Function lookupWordCount (tree() As tNode Ptr, word As String) As Integer Declare Function lookupCharacterCount (node As tNode Ptr, word As String, index As Integer) As Integer Declare Sub deallocateTree (tree() As tNode Ptr) Declare Sub deallocateNodeAndSubNodes (node As tNode Ptr) Dim As UByte char, c Dim As EState state = EState.undefined Dim As UInteger numWords = 0 Dim As UInteger currentWordCharCount = 0, totalCharCount = 0 Dim As String buffer = "" Dim As Double tStart, tEnd Dim tree(1 To treeSubNodeCount) As tNode Ptr For i As Integer = LBound(tree) To UBound(tree) tree(i) = CAllocate(recordLen) Next i tStart = TIMER Open ExePath + "/debian-reference.de.txt" For Binary As #1 Do Until Eof(1) Get #1, , char If (isSplittingChar(char)) Then If (state = EState.word) Then numWords += 1 totalCharCount += currentWordCharCount currentWordCharCount = 0 state = EState.undefined ' Wort in den Baum einfuegen 'Print "Fuege "; chr(34); buffer; chr(34); " ein." putCharsIntoTree(tree(buffer[0]-skipChars), buffer, 0) buffer = "" End If Else If (state = EState.undefined) Then state = EState.word currentWordCharCount = 0 End If currentWordCharCount += 1 buffer = buffer + Chr(char) End If If (numWords Mod printInterval = 0) Then Locate 1,1: Print numWords & " Woerter bisher erfasst..." End If Loop 'Ist noch ein Wort angefangen? If (state = EState.word) Then numWords += 1 totalCharCount += currentWordCharCount End If Close #1 tEnd = TIMER Locate 1,1 Print "Datei eingelesen in "; Print Using "##.###"; (tEnd-tStart); Print " Sekunden." Print Dim As Double averageLength = 0 If (numWords > 0) Then averageLength = (totalCharCount / CDbl(numWords)) End If Print "Es wurden " & numWords & " Woerter gezaehlt. " Print "Die durchschnittliche Wortlaenge betrug "; Print Using "##.### "; averageLength; Print "Buchstaben." Print 'Ganzen Baum ausgeben: 'Print "Haeufigkeiten:" 'For i As Integer = 1 To treeSubNodeCount ' traverseTree(tree(i), i, Chr(skipChars+i)) 'Next i 'Print Print "Gezielter Lookup von Woertern:" Dim lookupWords(1 To ...) As String = { "Shell", "Editor", "der", "die", "das", "Klingone" } tStart = Timer For i As Integer = LBound(lookupWords) To UBound(lookupWords) Print "Wie oft kommt " & chr(34) & lookupWords(i) & chr(34) & " vor? " & lookupWordCount(tree(), lookupWords(i)) & "x" Next i tEnd = Timer Print Print "Das Nachschlagen der Haeufigkeiten dauerte "; Print Using "##.###"; (tEnd-tStart); Print " Sekunden." Print Print "Abbau des Baums aus dem Speicher... "; deallocateTree(tree()) Print "Fertig." Print Print "Druecken Sie eine beliebige Taste zum Beenden." GetKey End 0 'Ermittelt, ob das uebergebene Zeichen eines ist, das das Ende 'eines Wortes kennzeichnet (zum Beispiel ein Leerzeichen, ein 'Zeilenumbruch oder ein Komma). Function isSplittingChar (char As UByte) As Integer ' , . : ; / Return ((char < 33) Or (char = 44) Or (char = 46) Or (char = 58) Or (char = 59) Or (char = 47)) End Function Sub putCharsIntoTree (node As tNode Ptr, buffer As String, index As Integer) Dim As UByte c = buffer[index] If ((index+1) >= Len(buffer)) Then ' Wort endet hier in diesem Knoten node->count += 1 Else c = buffer[index+1] If (node->subNodes(c-skipChars) = NULL) Then node->subNodes(c-skipChars) = CAllocate(recordLen) End if putCharsIntoTree(node->subNodes(c-skipChars), buffer, index+1) End If End Sub Sub traverseTree (node As tNode Ptr, index As Ubyte, path As String) If (node = NULL) Then Return If (node->count > 0) Then Print path & " => " & node->count & " x" End If For i As Integer = 1 To treeSubNodeCount If (node->subNodes(i) <> NULL) Then traverseTree (node->subNodes(i), i, path + Chr(i+skipChars)) End If Next i End Sub Function lookupWordCount (tree() As tNode Ptr, word As String) As Integer If (Len(word) < 1) Then Return 0 Return lookupCharacterCount(tree(word[0]-skipChars), word, 0) End Function Function lookupCharacterCount (node As tNode Ptr, word As String, index As Integer) As Integer If ((index+1) >= Len(word)) Then Return node->count Else Dim As UByte nextChar = word[index+1] Dim As tNode Ptr nextNode = node->subNodes(nextChar-skipChars) If (nextNode = NULL) Then Return 0 'Der Baum geht hier nicht mehr weiter, aber das Wort wurde bisher nicht gefunden. Return lookupCharacterCount (nextNode, word, index+1) End If End Function Sub deallocateTree (tree() As tNode Ptr) For i As Integer = 1 To treeSubNodeCount deallocateNodeAndSubNodes(tree(i)) DeAllocate tree(i) Next i End Sub Sub deallocateNodeAndSubNodes (node As tNode Ptr) If (node = NULL) Then Return deallocateTree(node->subNodes()) End Sub