#define NULL 0 #define recordLen SizeOf(tNode) Const skipChars = 32 Const treeSubNodeCount = 256 - skipChars Const printInterval = 750 Const As Integer schwarz = 0, blau = 1, gruen = 2, zyan = 3, cyan = 3, rot = 4, magenta = 5, _ braun = 6, hellgrau = 7, grau = 8, hellblau = 9, hellgruen = 10, hellzyan = 11, _ hellcyan = 11, hellrot = 12, pink = 13, gelb = 14, weiss = 15 Enum EState undefined word End Enum Type tNode subNodes(1 To treeSubNodeCount) As tNode Ptr count As UInteger = 0 End Type Declare Function isSplittingChar (char As UByte) As Integer Declare Function putCharsIntoTree (node As tNode Ptr, buffer As String, index As Integer) As Integer Declare Sub traverseTree (node As tNode Ptr, index As Ubyte, path As String) Declare Function lookupWordCount (tree() As tNode Ptr, word As String) As Integer Declare Function lookupCharacterCount (node As tNode Ptr, word As String, index As Integer) As Integer Declare Sub deallocateTree (tree() As tNode Ptr) Declare Sub deallocateNodeAndSubNodes (node As tNode Ptr) Dim As UByte char, c Dim As EState state = EState.undefined Dim As UInteger numWords = 0, anfang, ende Dim As UInteger currentWordCharCount = 0, totalCharCount = 0 Dim As String buffer = "" Dim As Double tStart, tEnd Dim As String text Dim As Integer nodeCounter = 1, newWordCounter, newNodes, breite, hoehe Dim As Single xwert, ywert Dim tree(1 To treeSubNodeCount) As tNode Ptr For i As Integer = LBound(tree) To UBound(tree) tree(i) = CAllocate(recordLen) Next i tStart = TIMER 'Open ExePath + "/debian-reference.de.txt" For Binary As #1 'Open ExePath + "/debianreferenz.txt" For Binary As #1 Open "d:\internet\bibel\altes_testament_luther_revidiert.txt" For Binary Access Read As #1 'Open ExePath + "\test.txt" For Binary Access Read As #1 text = Input (Lof(1),1) 'datei einlesen Close 1 text += " " Open "d:\internet\bibel\neues_testament_luther_revidiert.txt" For Binary Access Read As #1 text += Input (Lof(1),1) 'datei einlesen Close 1 'ScreenRes 1200,800 ScreenRes 640,480 screeninfo breite,hoehe Line (10,hoehe-10)-(10,170) Line (10,hoehe-10)-(breite-10,hoehe-10) anfang = -1 Do 'text abarbeiten Do 'nächsten wortanfang suchen anfang += 1 Loop Until (isSplittingChar(text[anfang]) = 0) Or (anfang > Len(text)) ende = anfang Do 'nächstes wortende suchen ende += 1 Loop Until (isSplittingChar(text[ende]) <> 0) Or (ende >= Len(text)) buffer = Mid(text,anfang + 1, ende - anfang) 'wort aus text holen totalCharCount += ende - anfang If buffer = "" Then Exit Do EndIf anfang = ende 'zeiger für nächste suche setzen numWords += 1 newNodes = putCharsIntoTree(tree(buffer[0]-skipChars), buffer, 0) nodeCounter += newNodes If newNodes Then newWordCounter += 1 EndIf 'x -> datei 'y -> baum xwert = 10 + (breite-20)*(anfang/Len(text)) ywert = (hoehe - 10) - (nodeCounter/200) PSet (xwert,ywert) PSet (xwert,(hoehe - 10) - (newWordCounter/70)),gelb Loop Until ende >= Len(text) Locate 40,50 Print "_____ Speicher" Color gelb,schwarz Locate 42,50 Print "_____ Woerter" Color weiss,schwarz text = "" 'speicher freigeben tEnd = TIMER Locate 1,1 Print "Datei eingelesen in "; Print Using "##.###"; (tEnd-tStart); Print " Sekunden." Print Print " Anzahl der Knoten:";nodeCounter Print " Belegter Speicher:"; Print Using "###.#";(nodeCounter * recordLen)/1024^2; Print " MB" Print Dim As Double averageLength = 0 If (numWords > 0) Then averageLength = (totalCharCount / CDbl(numWords)) End If Print " Woerter insgesamt: " & numWords Print " Unterschiedliche Woerter: " & newWordCounter Print "Durchschnittliche Wortlaenge:"; Print Using "##.### "; averageLength; Print "Buchstaben." Print 'Ganzen Baum ausgeben: 'Print "Haeufigkeiten:" 'For i As Integer = 1 To treeSubNodeCount ' traverseTree(tree(i), i, Chr(skipChars+i)) 'Next i 'Print Print "Gezielter Lookup von Woertern:" Dim lookupWords(1 To ...) As String = { "Gott", "Herr", "der", "die", "das", "Klingone" } tStart = Timer For i As Integer = LBound(lookupWords) To UBound(lookupWords) Print "Wie oft kommt " & chr(34) & lookupWords(i) & chr(34) & " vor? " & lookupWordCount(tree(), lookupWords(i)) & "x" Next i tEnd = Timer Print Print "Das Nachschlagen der Haeufigkeiten dauerte "; Print Using "##.######"; (tEnd-tStart); Print " Sekunden." Print Print "Abbau des Baums aus dem Speicher... "; deallocateTree(tree()) Print "Fertig." Print Print "Druecken Sie eine beliebige Taste zum Beenden." GetKey End 0 'Ermittelt, ob das uebergebene Zeichen eines ist, das das Ende 'eines Wortes kennzeichnet (zum Beispiel ein Leerzeichen, ein 'Zeilenumbruch oder ein Komma). Function isSplittingChar (char As UByte) As Integer ' , . : ; / Return ((char < 33) Or (char = 44) Or (char = 46) Or (char = 58) Or (char = 59) Or (char = 47)) End Function Function putCharsIntoTree (node As tNode Ptr, buffer As String, index As Integer) As Integer Dim As UByte c = buffer[index] Dim As Integer newNodes = 0 If ((index+1) >= Len(buffer)) Then ' Wort endet hier in diesem Knoten node->count += 1 Else c = buffer[index+1] If (node->subNodes(c-skipChars) = NULL) Then node->subNodes(c-skipChars) = CAllocate(recordLen) newNodes += 1 End if newNodes += putCharsIntoTree(node->subNodes(c-skipChars), buffer, index+1) End If Return newNodes End Function Sub traverseTree (node As tNode Ptr, index As Ubyte, path As String) If (node = NULL) Then Return If (node->count > 0) Then Print path & " => " & node->count & " x" End If For i As Integer = 1 To treeSubNodeCount If (node->subNodes(i) <> NULL) Then traverseTree (node->subNodes(i), i, path + Chr(i+skipChars)) End If Next i End Sub Function lookupWordCount (tree() As tNode Ptr, word As String) As Integer If (Len(word) < 1) Then Return 0 Return lookupCharacterCount(tree(word[0]-skipChars), word, 0) End Function Function lookupCharacterCount (node As tNode Ptr, word As String, index As Integer) As Integer If ((index+1) >= Len(word)) Then Return node->count Else Dim As UByte nextChar = word[index+1] Dim As tNode Ptr nextNode = node->subNodes(nextChar-skipChars) If (nextNode = NULL) Then Return 0 'Der Baum geht hier nicht mehr weiter, aber das Wort wurde bisher nicht gefunden. Return lookupCharacterCount (nextNode, word, index+1) End If End Function Sub deallocateTree (tree() As tNode Ptr) For i As Integer = 1 To treeSubNodeCount deallocateNodeAndSubNodes(tree(i)) DeAllocate tree(i) Next i End Sub Sub deallocateNodeAndSubNodes (node As tNode Ptr) If (node = NULL) Then Return deallocateTree(node->subNodes()) End Sub