'Die Routinen zum Erzeugen und Durchsuchen des Baumes wurden von Sebastian geschrieben ' (http://users.freebasic-portal.de/sebastian/fb/wordcount/wordcount_tree.bas), die ' Programmteile zur Verbindung mit dem Internet stammen aus dem Codebeispiel ' "Websites selbst verarbeiten" von PMedia ' (http://www.freebasic-portal.de/code-beispiele/internet-netzwerke/websites-selbst-verarbeiten-105.html) #define NULL 0 #define recordLen SizeOf(tNode) #Ifndef recvbufflen #define RECVBUFFLEN 16384 #EndIf #Ifndef newline #define newline Chr(13,10) #EndIf #Include Once "win/winsock2.bi" Const skipChars = 32 Const treeSubNodeCount = 256 - skipChars Const printInterval = 750 Const As Integer schwarz = RGB(0,0,0), rot = RGB(255,0,0), gruen = RGB(0,255,0), _ blau = RGB(0,0,255), cyan = RGB(0,255,255), magenta = RGB(255,0,255), _ gelb = RGB(255,255,0), weiss = RGB(255,255,255) Type tNode subNodes(1 To treeSubNodeCount) As tNode Ptr count As UInteger = 0 End Type Type tSample ratio As Single words As UInteger nodes As UInteger End Type Dim As tSample sample Dim As tSample Ptr samplePtr Declare Function isSplittingChar (char As UByte) As Integer Declare Function putCharsIntoTree (node As tNode Ptr, buffer As String, index As Integer) As Integer Declare Sub traverseTree (node As tNode Ptr, index As UByte, path As String, pluginPointer As Any Ptr = 0) Declare Function lookupWordCount (tree() As tNode Ptr, word As String) As Integer Declare Function lookupCharacterCount (node As tNode Ptr, word As String, index As Integer) As Integer Declare Sub deallocateTree (tree() As tNode Ptr) Declare Sub deallocateNodeAndSubNodes (node As tNode Ptr) Declare Sub auswertung1(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0) Declare Sub auswertung2(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0) Declare Sub auswertung3(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0) Declare Sub auswertung4(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0) Declare Sub auswertung5(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0) Declare Sub auswertung6(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0) Declare Sub InitWinsock Declare Sub ExitWinsock Declare Function httpget(server As String, path As String, hadd As String = "") As String Declare Function httppost(server As String, path As String, toPost As String, hadd As String = "") As String Declare Function httperror(text As String) As Integer Declare Function timeFormat (sekunden As Double, stellen As Integer = 0) As String Declare Sub ausgabe(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0) Declare Function extractPlainText(text As String) As String Declare Function utf8ToAnsi(text As String) As String Dim As UByte char, c Dim As UInteger numWords = 0, currentWordCharCount = 0, totalCharCount = 0, newWords, _ words, wordsMax, nodes, nodesMax, wScale, nScale, rScale, siteCount, _ totalNodeCount = treeSubNodeCount, newWordCount, newNodes, wordBegin, wordEnd Dim Shared As Integer scrWidth, scrHeight Dim As ULongInt totalInputSize, sampleTrigger Dim As String buffer = "", domain, address, g, exposition = "1", text, textm Dim As String samples = String(SizeOf(tSample),Chr(0)) Dim As Double tStart, tEnd, totalInputTime, startTime, averageLength Dim As Single xwert, ywert, ratioMax, ratioMaxTemp, ratioAct Dim tree(1 To treeSubNodeCount) As tNode Ptr '******* URLDownloadToFile einbinden **************************** Dim URLDownloadToFile As Function (_ ByVal pCaller As Long, _ ByVal szURL As ZString Ptr, _ ByVal szFileName As ZString Ptr, _ ByVal dwReserved As Long, _ ByVal lpfnCB As Long) As Long Dim library As Any Ptr library = DylibLoad( "urlmon.dll" ) URLDownloadToFile = DyLibSymbol( library, "URLDownloadToFileA" ) '***************************************************************** For i As Integer = LBound(tree) To UBound(tree) tree(i) = Callocate(recordLen) Next i Dim As String inputFiles(1 To ...) = { "d:\internet\bibel\altes_testament_luther_revidiert.txt", _ "d:\internet\bibel\neues_testament_luther_revidiert.txt", _ ExePath + "\debianreferenz.txt" } 'Dim As String inputFiles(1 To ...) = { ExePath + "\debianreferenz.txt", _ ' "d:\internet\bibel\neues_testament_luther_revidiert.txt", _ ' "d:\internet\bibel\altes_testament_luther_revidiert.txt" } 'domain = "de.wikipedia.org" 'address = "wiki/Spezial:Zuf%C3%A4llige_Seite" 'domain = "en.wikipedia.org" 'address = "wiki/Special:Random" 'textm = httpget(domain,address) + " " 'Open ExePath + "\wiki.txt" For Output As #1 'Print #1, textm 'Close 'Sleep 'End startTime = Timer 'ScreenRes 1200,800,16 ScreenRes 640,480,16 ScreenInfo scrWidth,scrHeight Width scrWidth/8,scrHeight/8 Do 'eingangsdaten von wikipedia holen domain = "de.wikipedia.org" address = "wiki/Spezial:Zuf%C3%A4llige_Seite" g = "Location: http://de.wikipedia.org/wiki/" 'kennung 'domain = "en.wikipedia.org" 'address = "wiki/Special:Random" 'g = "Location: http://en.wikipedia.org/wiki/" 'kennung text = httpget(domain,address) 'zufällige seite holen 'link auf zufällige seite isolieren wordBegin = InStr(text,g) 'pointer auf anfang wordEnd = InStr(wordBegin + Len(g),text,Chr(13,10)) 'pointer auf ende address = Mid(text,wordBegin,wordEnd-wordBegin) 'link holen address = Mid(address,35) ' präfix ("Location: http://de.wikipedia.org/") abtrennen text = httpget(domain,address) ' website herunterladen siteCount += 1 tStart = Timer text = extractPlainText(text) If text = "" Then 'fehler Continue Do 'nächste site EndIf text = LCase(text) 'umwandeln in kleinbuchstaben ''themenbereich eingrenzn 'Dim themeWords(1 To ...) As String = { "computer", "freebasic", "website", "internet" } ' 'For x As Integer = 1 To UBound(themeWords) ' If InStr(text,themeWords(x)) Then ' Exit For ' EndIf ' Continue Do 'Next ''website in datei schreiben 'Open ExePath + "\wiki.txt" For Output As #1 'Print #1, text 'Close wordBegin = -1 Do 'text abarbeiten Do 'nächsten wortanfang suchen wordBegin += 1 Loop Until (isSplittingChar(text[wordBegin]) = 0) Or (wordBegin > Len(text)) wordEnd = wordBegin Do 'nächstes wortende suchen wordEnd += 1 Loop Until (isSplittingChar(text[wordEnd]) <> 0) buffer = Mid(text,wordBegin + 1, wordEnd - wordBegin) 'wort aus text holen 'buffer = LCase(buffer) totalCharCount += wordEnd - wordBegin 'buchstabenzähler aktualisieren wordBegin = wordEnd 'zeiger für nächste suche setzen If buffer = "" Then 'nächstes wort Continue Do EndIf ' wörter ausfiltern 'If (InStr(buffer, Any "0123456789""")) Or (Len(buffer) > 50) Then 'wort auslassen ' Continue Do 'EndIf 'Select Case buffer[0] ' Case Asc("a") To Asc("z"),Asc("A") To Asc("Z"),Asc("ä"),Asc("ö"),Asc("ü"),Asc("Ä"),Asc("Ö"),Asc("Ü") 'nur wörter, die mit buchstaben beginnen ' ' Case Else ' Continue Do 'End Select totalCharCount += wordEnd - wordBegin 'buchstabenzähler aktualisieren numWords += 1 'wortzähler erhöhen newNodes = putCharsIntoTree(tree(buffer[0]-skipChars), buffer, 0) totalNodeCount += newNodes 'knotenzähler aktualisieren If newNodes Then 'neues Wort gefunden newWordCount += 1 'zähler für "unterschiedliche wörter" aktualisieren EndIf 'für graph If totalInputSize + wordBegin >= sampleTrigger + 1000 Then 'schnappschuss alle 1000 zeichen samples += Mks(CSng(newWordCount/numWords)) + Mki(numWords) + Mki(totalNodeCount) sampleTrigger = totalInputSize + wordBegin EndIf Loop Until wordEnd >= Len(text) 'bis text abgearbeitet ist totalInputSize += Len(text) 'zähler für insgesamt eingelesene daten aktualisieren tEnd = Timer totalInputTime += (tEnd - tStart) 'darstellung ScreenLock View Print Cls 'textblock Print " Gesamtzeit: ";timeFormat(Timer - startTime,2) Print "Gesamtzeit zum Einlesen der Daten: ";timeFormat(totalInputTime,2) Print Print " Anzahl der Knoten: ";totalNodeCount g = Str((totalNodeCount * recordLen)/1024^2) g = Left(g,InStr(g,".")+1) Print " Belegter Speicher: ";g;" MB" Print If (numWords > 0) Then averageLength = (totalCharCount / CDbl(numWords)) End If Print " Woerter insgesamt: " & numWords Print " Unterschiedliche Woerter: " & newWordCount Print " Durchschnittliche Wortlaenge:"; Print Using "##.### "; averageLength; Print "Buchstaben." Print Print "Eingelesene Website (";siteCount;"): ";address Locate 14,1 Select Case exposition 'gewählte art der auswertung Case "1" 'graphische darstellung Color cyan,schwarz Print " 1) Graphische Darstellung:" Color weiss,schwarz Line (10,scrHeight-10)-(10,170) Line (10,scrHeight-10)-(scrWidth-10,scrHeight-10) 'darstellung vorbereiten 'maximalwerte samplePtr = Cast(tSample Ptr,StrPtr(samples) + Len(samples) - SizeOf(tSample)) 'pointer auf letzten samplewert wordsMax = samplePtr->words nodesMax = samplePtr->nodes ratioAct = samplePtr->ratio wScale = wordsMax / (scrHeight - 200) 'maßstab für wörter nScale = nodesMax / (scrHeight - 200) 'maßstab für speicherbelegung rScale = (scrHeight - 200) / ratioMax 'maßstab für wortverhältnis 'graphen auf bildschirm schreiben wordBegin = 0 ratioMaxTemp = 0 For y As Integer = 0 To Len(samples) - 1 Step SizeOf(tSample) 'alle samples With *Cast(tSample Ptr,StrPtr(samples) + y) 'pointer auf sample If .ratio > ratioMaxTemp Then 'größtes verhältnis ermitteln ratioMaxTemp = .ratio EndIf xwert = 10 + (scrWidth - 20) * y / Len(samples) ywert = (scrHeight - 10) - .nodes / nScale ' graphen schreiben PSet (xwert,ywert) 'speicherbelegung PSet (xwert,scrHeight - 10 - .words / wScale),gelb 'wörter gesamt PSet (xwert,scrHeight - 10 - Int(.ratio * rScale)),cyan 'unterschiedliche wörter End With Next ratioMax = ratioMaxTemp 'größtes verhältnis merken 'beschriftung Line (10,ywert)-(scrWidth - 10,ywert),weiss,,&b0000000000000011 g = Str((totalNodeCount * recordLen) / 1024^2) g = Left(g,InStr(g,".") + 1) Draw String (30,ywert - 10),g + " " + "MB" 'speicherbedarf Draw String (30,ywert + 5),Str(wordsMax),gelb 'wörter gesamt g = Str(100 * ratioAct ) g = Left(g,InStr(g,".") + 1) + "%" Draw String (scrWidth - 50,scrHeight - 30 - ratioAct * rScale),g,cyan 'wortverhältnis 'legende Draw String (scrWidth / 2,scrHeight - 50),"_____ Speicher",weiss Draw String (scrWidth / 2,scrHeight - 40),"_____ Woerter gesamt",gelb Draw String (scrWidth / 2,scrHeight - 30),"_____ Rel. verschiedene",cyan Case "2" 'gezielt nach wörtern suchen Color cyan,schwarz Print " 2) Gezielter Lookup von Woertern:" Color weiss,schwarz Locate CsrLin + 3,1 Dim lookupWords(1 To ...) As String = { "Computernerd", "freeBasic", "der", "die", "das", _ "und", "in", "Klingone", "Rhabarber", "Haus" } tStart = Timer For i As Integer = LBound(lookupWords) To UBound(lookupWords) Locate CsrLin,5 Print "Wie oft kommt "; Color gelb,schwarz Print lookupWords(i); Color weiss,schwarz Print " vor? "; Color rot,schwarz Print lookupWordCount(tree(), LCase(lookupWords(i))); Color weiss,schwarz Print "x" Next i tEnd = Timer Locate CsrLin + 1,5 Print "Das Nachschlagen der Haeufigkeiten dauerte "; Print Using "##.######"; (tEnd-tStart); Print " Sekunden." Case "3" 'häufigste und längste wörter Color cyan,schwarz Print " 3) Die 20 haeufigsten Woerter:" Color weiss,schwarz View Print CsrLin + 3 To scrHeight/8 tStart = Timer 'zeitmessung starten For i As Integer = 1 To treeSubNodeCount 'alle knoten durchgehen traverseTree(tree(i), i, Chr(skipChars + i),@auswertung1) Next i tEnd = Timer 'zeitmessung stoppen Locate CsrLin + 3,5 Print "Die Auswertung der ";newWordCount;" Woerter dauerte "; Print Using "##.###"; (tEnd-tStart); Print " Sekunden." Case "4" '3. buchstabe "a" Color cyan,schwarz Print " 4) Alle Woerter, deren 3. Buchstabe ein ""a"" ist:" Color weiss,schwarz View Print CsrLin + 3 To scrHeight/8 tStart = Timer For i As Integer = 1 To treeSubNodeCount traverseTree(tree(i), i, Chr(skipChars + i),@auswertung2) Next i tEnd = Timer Locate CsrLin + 1,5 Print "Die Auswertung der ";newWordCount;" Woerter dauerte "; Print Using "##.###"; (tEnd-tStart); Print " Sekunden." Case "5" 'alle wörter mit "auto" Color cyan,schwarz Print " 5) Alle Woerter, in denen die Zeichenfolge ""auto"" vorkommt:" Color weiss,schwarz View Print CsrLin + 3 To scrHeight/8 tStart = Timer For i As Integer = 1 To treeSubNodeCount traverseTree(tree(i), i, Chr(skipChars + i),@auswertung3) Next i tEnd = Timer Locate CsrLin + 1,5 Print "Die Auswertung der ";newWordCount;" Woerter dauerte "; Print Using "##.###"; (tEnd-tStart); Print " Sekunden." Case "6" 'visualisierung --> baum als punkte Color cyan,schwarz Print " 6) Visualisierung des kompletten Baums als Punktefeld (1 Knoten = 1 Punkt)" Color weiss,schwarz For i As Integer = 1 To treeSubNodeCount traverseTree(tree(i), i, Chr(skipChars + i),@auswertung4) Next i Case "7" 'visualisierung --> anfangsbuchstaben Color cyan,schwarz Print " 7) Visualisierung des kompletten Baums als Punktefeld mit Anfangsbuchstaben" Color weiss,schwarz For i As Integer = 1 To treeSubNodeCount traverseTree(tree(i), i, Chr(skipChars + i),@auswertung5) Next i Case "8" 'visualisierung --> anfangsbuchstaben als weisse punkte / baum als punkte Color cyan,schwarz Print " 8) Baum als Punktefeld mit weissen Punkten am Wortanfang" Color weiss,schwarz For i As Integer = 1 To treeSubNodeCount traverseTree(tree(i), i, Chr(skipChars + i),@auswertung6) Next i End Select ScreenUnLock 'tastaturabfrage g = InKey Select Case g Case "1","2","3","4","5","6","7","8" 'darstellung auswählen exposition = g Case "d" 'gesamten baum in datei schreiben Open ExePath + "\wiki.txt" For Output As #1 For i As Integer = 1 To treeSubNodeCount traverseTree(tree(i), i, Chr(skipChars+i),@ausgabe) Next i Close 1 Case "+" 'grosses fenster ScreenRes 1200,800,16 ScreenInfo scrWidth,scrHeight Width scrWidth/8,scrHeight/8 Case "-" 'kleines fenster ScreenRes 640,480,16 ScreenInfo scrWidth,scrHeight Width scrWidth/8,scrHeight/8 Case Chr(27) 'esc Exit Do 'programm beenden End Select Loop View Print 13 To scrHeight/8 Cls 2 Print Print "Abbau des Baums aus dem Speicher... "; deallocateTree(tree()) Print "Fertig." End 0 'Ermittelt, ob das uebergebene Zeichen eines ist, das das Ende 'eines Wortes kennzeichnet (zum Beispiel ein Leerzeichen, ein 'Zeilenumbruch oder ein Komma). Function isSplittingChar (char As UByte) As Integer ' , . : ; / Return ((char < 33) Or (char = 44) Or (char = 46) Or (char = 58) Or (char = 59) Or _ (char = 47) Or (char = Asc("-") Or (char = Asc("+")))) End Function Function putCharsIntoTree (node As tNode Ptr, buffer As String, index As Integer) As Integer Dim As UByte c = buffer[index] Dim As Integer newNodes = 0 If ((index+1) >= Len(buffer)) Then ' Wort endet hier in diesem Knoten node->count += 1 Else c = buffer[index+1] If (node->subNodes(c-skipChars) = NULL) Then node->subNodes(c-skipChars) = Callocate(recordLen) newNodes += 1 End If newNodes += putCharsIntoTree(node->subNodes(c-skipChars), buffer, index+1) End If Return newNodes End Function Sub traverseTree (node As tNode Ptr, index As UByte, path As String, pluginPointer As Any Ptr = 0) 'ruft nacheinander alle knoten des baums auf Dim Plugin As Sub (text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0) 'Print #1, node;" ";index;" ";path;" ";pluginPointer If (node = NULL) And (index <> 0) Then Return If pluginPointer Then 'plugin vorhanden Plugin = pluginPointer Plugin(path,node->count,node,index) 'plugin aufrufen Else If (node->count > 0) Then Print path & " => " & node->count & " x" End If End If For i As Integer = 1 To treeSubNodeCount 'rekursiver aufruf aller knoten, auf die der aktuelle knoten zeigt If (node->subNodes(i) <> NULL) Then traverseTree (node->subNodes(i), i, path + Chr(i+skipChars),pluginPointer) End If Next i End Sub Function lookupWordCount (tree() As tNode Ptr, word As String) As Integer If (Len(word) < 1) Then Return 0 Return lookupCharacterCount(tree(word[0]-skipChars), word, 0) 'pointer auf 1. knoten End Function Function lookupCharacterCount (node As tNode Ptr, word As String, index As Integer) As Integer Dim As String wd = word If ((index+1) >= Len(wd)) Then Return node->count Else Dim As UByte nextChar = wd[index+1] 'nächster buchstabe Dim As tNode Ptr nextNode = node->subNodes(nextChar-skipChars) 'knoten für nächsten buchstaben If (nextNode = NULL) Then Return 0 'Der Baum geht hier nicht mehr weiter, aber das Wort wurde bisher nicht gefunden. Return lookupCharacterCount (nextNode, wd, index+1) End If End Function Sub deallocateTree (tree() As tNode Ptr) For i As Integer = 1 To treeSubNodeCount deallocateNodeAndSubNodes(tree(i)) DeAllocate tree(i) Next i End Sub Sub deallocateNodeAndSubNodes (node As tNode Ptr) If (node = NULL) Then Return deallocateTree(node->subNodes()) End Sub Sub InitWinsock Constructor 'Autor: PMedia http://www.freebasic-portal.de/code-beispiele/internet-netzwerke/websites-selbst-verarbeiten-105.html ' init winsock Dim wsaData As WSAData If( WSAStartup( MAKEWORD( 1, 1 ), @wsaData ) <> 0 ) Then Print "Error: WSAStartup failed" End 1 End If End Sub Sub ExitWinsock Destructor WSACleanup End Sub Function httpget(server As String, path As String, hadd As String = "") As String Dim IP As Integer Dim ia As in_addr Dim s As SOCKET Dim hostentry As hostent Ptr Dim sendbuffer As String Dim recvbuffer As ZString * RECVBUFFLEN+1 Dim bytes As Integer Dim sa As sockaddr_in Dim in As String ia.S_addr = inet_addr( server ) If ( ia.S_addr = INADDR_NONE ) Then hostentry = gethostbyname( server ) If ( hostentry = 0 ) Then Return "Error: IP couldn't be resolved!" End If IP = *Cast( Integer Ptr, *hostentry->h_addr_list ) Else IP = ia.S_addr End If s = OpenSocket( AF_INET, SOCK_STREAM, IPPROTO_TCP ) If( s = 0 ) Then Return "Error: Socket couldn't be opened." End If sa.sin_port = htons( 80 ) sa.sin_family = AF_INET sa.sin_addr.S_addr = ip If ( connect( s, Cast( PSOCKADDR, @sa ), Len( sa )) = SOCKET_ERROR ) Then closesocket( s ) Return "Error: Couldn't connect to host" End If sendBuffer = "GET /" + path + " HTTP/1.0" + NEWLINE + _ "Host: " + server + NEWLINE + _ "Connection: close" + NEWLINE + _ hadd + _ NEWLINE If( send( s, sendBuffer, Len( sendBuffer ), 0 ) = SOCKET_ERROR ) Then closesocket( s ) Return "Error: Couldn't send request" End If Do bytes = recv( s, recvBuffer, RECVBUFFLEN, 0 ) If( bytes <= 0 ) Then Exit Do End If recvbuffer[bytes] = 0 in += recvbuffer Loop shutdown( s, 2 ) closesocket( s ) Return in End Function Function httperror(text As String) As Integer If Left(text,7) = "Error: " Then Return -1 Else Return 0 EndIf End Function Function httppost(server As String, path As String, toPost As String, hadd As String = "") As String Dim IP As Integer Dim ia As in_addr Dim s As SOCKET Dim hostentry As hostent Ptr Dim sendbuffer As String Dim recvbuffer As ZString * RECVBUFFLEN+1 Dim bytes As Integer Dim sa As sockaddr_in Dim in As String ia.S_addr = inet_addr( server ) If ( ia.S_addr = INADDR_NONE ) Then hostentry = gethostbyname( server ) If ( hostentry = 0 ) Then Return "IP couldn't be resolved!" End If IP = *Cast( Integer Ptr, *hostentry->h_addr_list ) Else IP = ia.S_addr End If s = OpenSocket( AF_INET, SOCK_STREAM, IPPROTO_TCP ) If( s = 0 ) Then Return "Socket couldn't be opened." End If sa.sin_port = htons( 80 ) sa.sin_family = AF_INET sa.sin_addr.S_addr = ip If ( connect( s, Cast( PSOCKADDR, @sa ), Len( sa )) = SOCKET_ERROR ) Then closesocket( s ) Return "Couldn't connect to host" End If sendBuffer = "POST /" + path + " HTTP/1.0" + NEWLINE + _ "Host: " + server + NEWLINE + _ "Content-Type: application/x-www-form-urlencoded" + NEWLINE + _ "Content-Length: " + Str(Len(toPost)) + NEWLINE + _ "Connection: close" + NEWLINE + _ hadd + _ NEWLINE + _ toPost + NEWLINE If( send( s, sendBuffer, Len( sendBuffer ), 0 ) = SOCKET_ERROR ) Then closesocket( s ) Return "Couldn't send request" End If Do bytes = recv( s, recvBuffer, RECVBUFFLEN, 0 ) If( bytes <= 0 ) Then Exit Do End If recvbuffer[bytes] = 0 in += recvbuffer Loop shutdown( s, 2 ) closesocket( s ) Return in End Function Function timeFormat (sekunden As Double, stellen As Integer = 0) As String Dim As Integer minuten, stunden Dim As String zeit stunden = Int(sekunden / 3600) sekunden -= (stunden * 3600) minuten = Int(sekunden / 60) sekunden -= (minuten * 60) zeit = Str(stunden) + ":" + Right("0" + Str(minuten),2) + ":" + Mid("0" + Str(sekunden),InStr("0" + Str(sekunden),".") - 2) If stellen Then zeit = Left(zeit, InStr(zeit,".") + stellen) EndIf Return zeit End Function Sub auswertung1(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0) 'häufigste wörter und längstes wort suchen Static As Integer vorkommen(21), laenge Static As String wort(21), lwort If (index = 1) And (text = Chr(skipChars + 1)) Then '1. aufruf --> reset For x As Integer = 1 To UBound(wort) vorkommen(x) = 0 wort (x) = "" Next laenge = 0 lwort = "" EndIf If Len(text) > 1 Then vorkommen(UBound(wort)) = count wort(UBound(wort)) = text For x As Integer = UBound(wort) To 2 Step -1 If vorkommen(x) > vorkommen(x - 1) Then Swap vorkommen(x),vorkommen(x - 1) Swap wort(x),wort(x - 1) Else Exit For EndIf Next If Len(text) > laenge Then lwort = text laenge = Len(text) EndIf EndIf If index = treeSubnodeCount Then 'letzter aufruf --> ergebnis ausgeben For x As Integer = 1 To UBound(wort) - 1 Locate CsrLin,5 Print x;" ";wort(x); Locate CsrLin,(Pos + 5)-(Pos + 10)Mod 5 Print vorkommen(x);" x " Next Print Print Print Print " Das laengste Wort (";laenge;" Zeichen ) ist: " Print Print lwort EndIf End Sub Sub auswertung2(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0) 'alle wörter mit 3. buchstaben "a" Static As Integer zaehler If (index = 1) And (text = Chr(skipChars + 1)) Then '1. aufruf --> reset zaehler = 0 EndIf If (count > 0) And (Len(text) >= 3) And (text[2] = Asc("a")) Then zaehler += 1 Print zaehler;" ";text EndIf End Sub Sub auswertung3(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0) 'alle wörter, in denen "auto" vorkommt Static As Integer zaehler If (index = 1) And (text = Chr(skipChars + 1)) Then '1. aufruf --> reset zaehler = 0 EndIf If (count > 0) And InStr(text,"auto") Then zaehler += 1 Print zaehler;" ";text EndIf End Sub Sub auswertung4(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0) 'baum als punktefeld darstellen Static As Integer nodenr Dim As Integer h, b, pc Dim As UShort c If (index = 1) And (text = Chr(skipChars + 1)) Then '1. aufruf --> reset nodenr = 0 EndIf ScreenInfo b,h nodenr += 1 For x As Integer = 1 To treeSubNodeCount If node->subNodes(x) <> 0 Then pc += 1 EndIf Next c = LoWord(node->count) c = c * c PSet((nodenr Mod b)+1,h-Int(nodenr/b)-1),RGB(HiByte(c),LoByte(c),LoByte(pc*pc)) End Sub Sub auswertung5(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0) 'baum als punktefeld mit anfangsbuchstaben darstellen Static As Integer nodenr Dim As Integer h, b, pc Dim As UShort c If (index = 1) And (text = Chr(skipChars + 1)) Then '1. aufruf --> reset nodenr = 0 EndIf ScreenInfo b,h nodenr += 1 For x As Integer = 1 To treeSubNodeCount If node->subNodes(x) <> 0 Then pc += 1 EndIf Next c = LoWord(node->count) c = c * c If Len(text) = 1 Then Draw String((nodenr Mod b)+1,h-Int(nodenr/b)-10),Chr(text[0] + skipChars),RGB(255,255,255) Else PSet((nodenr Mod b)+1,h-Int(nodenr/b)-10),RGB(HiByte(c),LoByte(c),LoByte(pc*pc)) EndIf End Sub Sub auswertung6(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0) 'baum als punktefeld mit weissen punkten am wortanfang Static As Integer nodenr Dim As Integer h, b, pc Dim As UShort c If (index = 1) And (text = Chr(skipChars + 1)) Then '1. aufruf --> reset nodenr = 0 EndIf ScreenInfo b,h nodenr += 1 For x As Integer = 1 To treeSubNodeCount If node->subNodes(x) <> 0 Then pc += 1 EndIf Next c = LoWord(node->count) c = c * c If Len(text) = 1 Then PSet((nodenr Mod b)+1,h-Int(nodenr/b)-1),RGB(255,255,255) PSet((nodenr Mod b)+0,h-Int(nodenr/b)-1),RGB(255,255,255) PSet((nodenr Mod b)+1,h-Int(nodenr/b)-0),RGB(255,255,255) PSet((nodenr Mod b)+0,h-Int(nodenr/b)-0),RGB(255,255,255) Else PSet((nodenr Mod b)+1,h-Int(nodenr/b)-1),RGB(HiByte(c),LoByte(c),LoByte(pc*pc)) EndIf End Sub Sub ausgabe(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0) 'wörterliste in datei schreiben Print #1, text;" ";count End Sub Function extractPlainText(text As String) As String 'normalen text aus website extrahieren Dim As Integer anfang, ende, rptr, flag, x Dim As String umlaut ende = 1 Do anfang = InStr(ende,text,"

") + 3 'beginn des textes If anfang = 3 Then Exit Do EndIf ende = InStr(anfang,text,"

") - 1 'ende des textes flag = 1 For x As Integer = anfang - 1 To ende - 1 If text[x] = Asc("<") Then 'beginn eines tags --> folgenden text ignorieren flag = 0 ElseIf text[x] = Asc(">") Then 'ende eines tags --> folgenden text übertragen flag = 1 Else If flag Then 'folgendes zeichen an den anfang des strings verschieben text[rptr] = text[x] rptr += 1 If rptr >= Len(text) Then 'fehler Return "" EndIf EndIf EndIf Next text[rptr] = Asc(" ") 'leerzeichen einfügen rptr += 1 Loop Until ende >= Len(text) If (rptr > 0) And (rptr < Len(text)) Then text = utf8ToAnsi(Left(text,rptr)) 'string kürzen und UTF-8-zeichen nach ANSI konvertieren Else text = "" 'fehler EndIf Return text End Function Function utf8ToAnsi(text As String) As String Dim As Integer x Dim As String umlaut Do While InStr(text,Chr(195)) x = InStr(text,Chr(195)) Select Case text[x] Case 164 umlaut = "ä" Case 132 umlaut = "Ä" Case 182 umlaut = "ö" Case 150 umlaut = "Ö" Case 178 umlaut = "ò" Case 188 umlaut = "ü" Case 156 umlaut = "Ü" Case 186 umlaut = "ú" Case 159 umlaut = "ß" Case 169 umlaut = "é" Case 168 umlaut = "è" Case 160 umlaut = "à" Case 161 umlaut = "á" Case 167 umlaut = "c" 'eigentlich c mit apostroph unten Case Else 'Print "*** unbekanntes Sonderzeichen ***" 'Print text 'Print x;" ";Mid(text,x+1,1);" ";Asc(Mid(text,x+1,1)) 'Sleep umlaut = "?" End Select text = Left(text,x - 1) + umlaut + Mid(text,x + 2) 'utf-8 durch ansi-zeichen ersetzen Loop Return text End Function