#define NULL 0 #define recordLen SizeOf(tNode) Const skipChars = 32 Const treeSubNodeCount = 256 - skipChars Const printInterval = 750 'Const As Integer schwarz = 0, blau = 1, gruen = 2, zyan = 3, cyan = 3, rot = 4, magenta = 5, _ ' braun = 6, hellgrau = 7, grau = 8, hellblau = 9, hellgruen = 10, hellzyan = 11, _ ' hellcyan = 11, hellrot = 12, pink = 13, gelb = 14, weiss = 15 Const As Integer schwarz = RGB(0,0,0), blau = RGB(0,0,255), gruen = RGB(0,255,0), _ zyan = RGB(0,128,128), cyan = RGB(0,128,128), rot = RGB(128,0,0), _ magenta = RGB(255,0,255), braun = RGB(0,0,0), _ hellgrau = RGB(200,200,200), grau = RGB(128,128,128), _ hellblau = RGB(0,0,128), hellgruen = RGB(0,128,0), _ hellzyan = RGB(0,255,255), hellcyan = RGB(0,255,255), _ hellrot = RGB(255,0,0), pink = RGB(255,0,255), gelb = RGB(255,255,0), _ weiss = RGB(255,255,255) Enum EState undefined word End Enum Type tNode subNodes(1 To treeSubNodeCount) As tNode Ptr count As UInteger = 0 End Type Declare Function isSplittingChar (char As UByte) As Integer Declare Function putCharsIntoTree (node As tNode Ptr, buffer As String, index As Integer) As Integer Declare Sub traverseTree (node As tNode Ptr, index As UByte, path As String, pluginPointer As Any Ptr = 0,flag As ZString*1 = "") Declare Function lookupWordCount (tree() As tNode Ptr, word As String) As Integer Declare Function lookupCharacterCount (node As tNode Ptr, word As String, index As Integer) As Integer Declare Sub deallocateTree (tree() As tNode Ptr) Declare Sub deallocateNodeAndSubNodes (node As tNode Ptr) Declare Sub auswertung1(text As String, count As UInteger) Declare Sub auswertung2(text As String, count As UInteger) Declare Sub auswertung3(text As String, count As UInteger) Declare Sub auswertung4(node As tNode Ptr) Dim As UByte char, c Dim As EState state = EState.undefined Dim As UInteger numWords = 0 Dim As UInteger currentWordCharCount = 0, totalCharCount = 0 Dim As ULongInt totalInputSize Dim As String buffer = "" Dim As Double tStart, tEnd, totalInputTime Dim As String text, samples Dim As Integer nodeCounter = treeSubNodeCount, newWordCounter, newNodes, wordBegin, wordEnd, _ wkorr, nkorr Dim Shared As Integer breite, hoehe Dim As Single xwert, ywert Dim tree(1 To treeSubNodeCount) As tNode Ptr Dim tp As tNode Ptr For i As Integer = LBound(tree) To UBound(tree) tree(i) = Callocate(recordLen) Next i 'Dim As String inputFiles(1 To ...) = { "d:\internet\bibel\altes_testament_luther_revidiert.txt", _ ' "d:\internet\bibel\neues_testament_luther_revidiert.txt", _ ' ExePath + "\debianreferenz.txt" } Dim As String inputFiles(1 To ...) = { ExePath + "\debianreferenz.txt", _ "d:\internet\bibel\neues_testament_luther_revidiert.txt", _ "d:\internet\bibel\altes_testament_luther_revidiert.txt" } Dim As Integer smplen(10), smpzae, txtlen(10) tStart = Timer 'ScreenRes 1200,800,16 ScreenRes 640,480,16 ScreenInfo breite,hoehe Dim As ULongInt msiz,siz Dim As UInteger mnod, nod Dim As ULongInt Ptr sp Dim As UInteger Ptr np For x As Integer = 1 To UBound(inputFiles) tStart = Timer Locate 50,10 Open inputFiles(x) For Binary Access Read As #1 text = Input (Lof(1),1) + " " 'datei einlesen Close 1 wordBegin = -1 Do 'text abarbeiten Do 'nächsten wortanfang suchen wordBegin += 1 Loop Until (isSplittingChar(text[wordBegin]) = 0) Or (wordBegin > Len(text)) wordEnd = wordBegin Do 'nächstes wortende suchen wordEnd += 1 Loop Until (isSplittingChar(text[wordEnd]) <> 0) buffer = Mid(text,wordBegin + 1, wordEnd - wordBegin) 'wort aus text holen totalCharCount += wordEnd - wordBegin If buffer = "" Then Exit Do EndIf wordBegin = wordEnd 'zeiger für nächste suche setzen numWords += 1 newNodes = putCharsIntoTree(tree(buffer[0]-skipChars), buffer, 0) nodeCounter += newNodes If newNodes Then newWordCounter += 1 Locate 52,10 samples += MkLongint(totalInputSize + wordBegin) + Mki(nodeCounter) EndIf Loop Until wordEnd >= Len(text) Locate 51,10 totalInputSize += Len(text) tEnd = Timer totalInputTime += (tEnd - tStart) Cls Line (10,hoehe-10)-(10,170) Line (10,hoehe-10)-(breite-10,hoehe-10) np = Cast(UInteger Ptr,StrPtr(samples)+Len(samples)-4) 'StrPtr(smp) mnod = *np 'maximale knotenzahl nkorr = mnod / (hoehe - 200) sp = Cast(ULongInt Ptr,StrPtr(samples)+Len(samples)-12) 'StrPtr(smp) msiz = *sp 'maximale textlänge wkorr = Len(samples)/12 / (hoehe - 200) wordBegin = 0 For y As Integer = 0 To Len(samples) - 1 Step 12 np = Cast(UInteger Ptr,StrPtr(samples) + y + 8) 'StrPtr(smp) nod = *np sp = Cast(ULongInt Ptr,StrPtr(samples) + y) 'StrPtr(smp) siz = *sp xwert = 10 + (breite-20)*(siz/msiz) ywert = (hoehe - 10) - ((nod/nkorr) ) PSet (xwert,ywert) PSet (xwert,(hoehe - 10) - ((y/12)/wkorr)),gelb 'y = anzahl der wörter * 12 Next Locate hoehe/8 - 8,breite/8 - 20 Print "_____ Speicher" Color gelb,schwarz Locate hoehe/8 - 6,breite/8 - 20 Print "_____ Woerter" Color weiss,schwarz text = "" 'speicher freigeben Locate 1,1 Print "Datei eingelesen in "; Print Using "##.###"; (totalInputTime); Print " Sekunden." Print Print " Anzahl der Knoten:";nodeCounter Print " Belegter Speicher:";Int((nodeCounter * recordLen)/1024^2*10)/10;" MB" Print Dim As Double averageLength = 0 If (numWords > 0) Then averageLength = (totalCharCount / CDbl(numWords)) End If Print " Woerter insgesamt: " & numWords Print " Unterschiedliche Woerter: " & newWordCounter Print "Durchschnittliche Wortlaenge:"; Print Using "##.### "; averageLength; Print "Buchstaben." Print Print "Gezielter Lookup von Woertern:" Dim lookupWords(1 To ...) As String = { "Gott", "Herr", "der", "die", "das", "Klingone" } tStart = Timer For i As Integer = LBound(lookupWords) To UBound(lookupWords) Print "Wie oft kommt " & Chr(34) & lookupWords(i) & Chr(34) & " vor? " & lookupWordCount(tree(), lookupWords(i)) & "x" Next i tEnd = Timer Print Print "Das Nachschlagen der Haeufigkeiten dauerte "; Print Using "##.######"; (tEnd-tStart); Print " Sekunden." smpzae += 1 smplen(smpzae) = Len(samples)'/1700 txtlen(smpzae) = msiz Next text = "" 'speicher freigeben 'Beispiele zur Auswertung Locate hoehe/8-3,15 Color gelb,schwarz Print "Druecken Sie eine beliebige Taste zum Fortsetzen" Color weiss,schwarz GetKey View Print 20 To hoehe/8 Cls 2 Color hellcyan,schwarz Print "Das Wort mit mindestens 2 Buchstaben, das am haeufigsten vorkommt:" Color weiss,schwarz View Print 22 To hoehe/8 For i As Integer = 1 To treeSubNodeCount traverseTree(tree(i), i, Chr(skipChars+i),@auswertung1,"w") Next i Locate hoehe/8-3,15 Color gelb,schwarz Print "Druecken Sie eine beliebige Taste zum Fortsetzen" Color weiss,schwarz GetKey View Print 20 To hoehe/8 Cls 2 Color hellcyan,schwarz Print "Alle Woerter, deren 3. Buchstabe ein ""a"" ist:" Color weiss,schwarz View Print 22 To hoehe/8 For i As Integer = 1 To treeSubNodeCount traverseTree(tree(i), i, Chr(skipChars+i),@auswertung2,"w") Next i Print Color gelb,schwarz Print "Druecken Sie eine beliebige Taste zum Fortsetzen" Color weiss,schwarz GetKey View Print 20 To hoehe/8 Cls 2 Color hellcyan,schwarz Print "Alle Woerter, in denen die Zeichenfolge ""auto"" vorkommt:" Color weiss,schwarz View Print 22 To hoehe/8 For i As Integer = 1 To treeSubNodeCount traverseTree(tree(i), i, Chr(skipChars+i),@auswertung3,"w") Next i Print Color gelb,schwarz Print "Druecken Sie eine beliebige Taste zum Fortsetzen" Color weiss,schwarz GetKey View Print 20 To hoehe/8 Cls 2 Color hellcyan,schwarz Print "Visualisierung des kompletten Baums" Color weiss,schwarz View Print 22 To hoehe/8 For i As Integer = 1 To treeSubNodeCount traverseTree(tree(i), i, Chr(skipChars+i),@auswertung4,"n") Next i locate 20,10,0 Color hellrot,schwarz Print "Druecken Sie eine beliebige Taste zum Beenden" Color weiss,schwarz GetKey Print Print "Abbau des Baums aus dem Speicher... "; deallocateTree(tree()) Print "Fertig." End 0 'Ermittelt, ob das uebergebene Zeichen eines ist, das das Ende 'eines Wortes kennzeichnet (zum Beispiel ein Leerzeichen, ein 'Zeilenumbruch oder ein Komma). Function isSplittingChar (char As UByte) As Integer ' , . : ; / Return ((char < 33) Or (char = 44) Or (char = 46) Or (char = 58) Or (char = 59) Or (char = 47)) End Function Function putCharsIntoTree (node As tNode Ptr, buffer As String, index As Integer) As Integer Dim As UByte c = buffer[index] Dim As Integer newNodes = 0 If ((index+1) >= Len(buffer)) Then ' Wort endet hier in diesem Knoten node->count += 1 Else c = buffer[index+1] If (node->subNodes(c-skipChars) = NULL) Then node->subNodes(c-skipChars) = Callocate(recordLen) newNodes += 1 End If newNodes += putCharsIntoTree(node->subNodes(c-skipChars), buffer, index+1) End If Return newNodes End Function Sub traverseTree (node As tNode Ptr, index As UByte, path As String, pluginPointer As Any Ptr = 0,flag As ZString*1 = "") Dim wordPlugin As Sub (text As String, count As UInteger) Dim nodePlugin As Sub (node As tNode Ptr) If (node = NULL) Then Return If pluginPointer <> 0 And flag = "n" Then nodePlugin = pluginPointer nodePlugin(node) EndIf If (node->count > 0) Then If pluginPointer <> 0 And flag = "w" Then wordPlugin = pluginPointer wordPlugin(path,node->count) ElseIf flag = "" Then Print path & " => " & node->count & " x" EndIf End If For i As Integer = 1 To treeSubNodeCount If (node->subNodes(i) <> NULL) Then traverseTree (node->subNodes(i), i, path + Chr(i+skipChars),pluginPointer,flag) End If Next i End Sub Function lookupWordCount (tree() As tNode Ptr, word As String) As Integer If (Len(word) < 1) Then Return 0 Return lookupCharacterCount(tree(word[0]-skipChars), word, 0) End Function Function lookupCharacterCount (node As tNode Ptr, word As String, index As Integer) As Integer If ((index+1) >= Len(word)) Then Return node->count Else Dim As UByte nextChar = word[index+1] Dim As tNode Ptr nextNode = node->subNodes(nextChar-skipChars) If (nextNode = NULL) Then Return 0 'Der Baum geht hier nicht mehr weiter, aber das Wort wurde bisher nicht gefunden. Return lookupCharacterCount (nextNode, word, index+1) End If End Function Sub deallocateTree (tree() As tNode Ptr) For i As Integer = 1 To treeSubNodeCount deallocateNodeAndSubNodes(tree(i)) DeAllocate tree(i) Next i End Sub Sub deallocateNodeAndSubNodes (node As tNode Ptr) If (node = NULL) Then Return deallocateTree(node->subNodes()) End Sub Sub auswertung1(text As String, count As UInteger) Static As Integer vorkommen, laenge, position Static As String wort position = Locate If (count > vorkommen) And (Len(text) > 1) Then wort = text vorkommen = count Locate 30,5 Print "haeufigstes Wort ist: """;wort;""" (";vorkommen;" mal)";String(breite/8," ") Locate LoByte(position),HiByte(position),HiWord(position) EndIf If Len(text) > laenge Then laenge = Len(text) Locate 35,5 Print "Laengstes Wort (";laenge;" Zeichen) ist: ";text Locate LoByte(position),HiByte(position),HiWord(position) EndIf End Sub Sub auswertung2(text As String, count As UInteger) Static As Integer zaehler If Mid(text,3,1) = "a" Then zaehler += 1 Print zaehler;" ";text EndIf End Sub Sub auswertung3(text As String, count As UInteger) Static As Integer zaehler If InStr(text,"auto") Then zaehler += 1 Print zaehler;" ";text EndIf End Sub Sub auswertung4(node As tNode Ptr) Static merken As String Static As Integer nodenr Dim As Integer h,b,pc Dim As UShort c ScreenInfo b,h nodenr += 1 For x As Integer = 1 To treeSubNodeCount If node->subNodes(x) <> 0 Then pc += 1 EndIf Next c = LoWord(node->count * 200) PSet((nodenr Mod b)+1,h-Int(nodenr/b)-1),RGB(LoByte(pc*20),LoByte(c),HiByte(c)) End Sub