'Die Routinen zum Erzeugen und Durchsuchen des Baumes wurden von Sebastian geschrieben ' (http://users.freebasic-portal.de/sebastian/fb/wordcount/wordcount_tree.bas), die ' Programmteile zur Verbindung mit dem Internet stammen aus dem Codebeispiel ' "Websites selbst verarbeiten" von PMedia ' (http://www.freebasic-portal.de/code-beispiele/internet-netzwerke/websites-selbst-verarbeiten-105.html) #define NULL 0 #define recordLen SizeOf(tNode) #Ifndef recvbufflen #define RECVBUFFLEN 16384 #EndIf #Ifndef newline #define newline Chr(13,10) #EndIf #Include Once "win/winsock2.bi" Const skipChars = 32 'Const treeSubNodeCount = 256 - skipChars 'Const textChars = "abcdefghijklmnopqrstuvwxyzäöüABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÜß1234567890_-+" Const textChars = "abcdefghijklmnopqrstuvwxyzäöüABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÜß_-+" 'Const textChars = "aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZäÄöÖüÜß1234567890_-+" 'Const textChars = "aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZäÄöÖüÜß_-+" Dim Shared As UByte Ptr tcp = @textChars Const textCharsCount = Len(textChars) Const treeSubNodeCount = textCharsCount '? textCharsCount 'Sleep '72 Const As Integer schwarz = RGB(0,0,0), rot = RGB(255,0,0), gruen = RGB(0,255,0), _ blau = RGB(0,0,255), cyan = RGB(0,255,255), magenta = RGB(255,0,255), _ gelb = RGB(255,255,0), weiss = RGB(255,255,255) Type tNode subNodes(1 To treeSubNodeCount) As tNode Ptr count As UInteger = 0 preNode As tNode Ptr = NULL index As UByte = 0 End Type Type tSnapshot numWords As UInteger currentWordCharCount As UInteger totalCharCount As UInteger siteCount As UInteger totalNodeCount As UInteger newWordCount As UInteger totalInputSize As ULongInt totalInputTime As Double End Type Dim As tSnapshot snapshot Dim As tSnapshot Ptr snapshotPtr Type tList nextWord As tList Ptr = NULL prevWord As tList Ptr = NULL word As String = "" count As UInteger = 0 End Type Type tWordList word As String = "" count As UInteger = 0 End Type ReDim As tWordList wordList(1) Type tStrDescr txtPtr As Byte Ptr txtLen As UInteger txtMem As UInteger End Type Type tIndex nextIndex As tIndex Ptr = NULL iPtr As tList Ptr = NULL Union count As UInteger length As UInteger End Union End Type Dim As tIndex Ptr ip Declare Function isSplittingChar (char As UByte) As Integer Declare Function putCharsIntoTree (node As tNode Ptr, buffer As String, index As Integer) As Integer Declare Sub traverseTree (node As tNode Ptr, index As UByte, path As String, parameter() As Any Ptr) Declare Function lookupWordCount (tree() As tNode Ptr, word As String) As Integer Declare Function lookupCharacterCount (node As tNode Ptr, word As String, index As Integer) As Integer Declare Sub deallocateTree (tree() As tNode Ptr) Declare Sub deallocateNodeAndSubNodes (node As tNode Ptr) Declare Sub sortByCount(parameter() As Any Ptr) Declare Sub sortByLength(parameter() As Any Ptr) Declare Sub sortAlphabetical(parameter() As Any Ptr) Declare Sub auswertung4(parameter() As Any Ptr) Declare Sub auswertung5(parameter() As Any Ptr) Declare Sub auswertung6(parameter() As Any Ptr) Declare Sub ausgabe1(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0) Declare Sub ausgabe2(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0) Declare Sub InitWinsock Declare Sub ExitWinsock Declare Function httpget(server As String, path As String, hadd As String = "") As String Declare Function httppost(server As String, path As String, toPost As String, hadd As String = "") As String Declare Function httperror(text As String) As Integer Declare Function timeFormat (sekunden As Double, stellen As Integer = 0) As String Declare Function extractPlainText(text As String) As String Declare Function utf8ToAnsi(text As String) As String Declare Function a2i(text As String) As String Declare Function i2a(text As String) As String Declare Sub makeSortedList(tree() As tNode Ptr, array() As tWordList, plugin As Any Ptr, filter As Any Ptr = 0) Declare Function filter_3a(text As String) As Integer Declare Function filter_auto(text As String) As Integer Dim As UByte char, c Dim As UInteger numWords = 0, currentWordCharCount = 0, totalCharCount = 0, _ wordsMax, nodesMax, wScale, nScale, rScale, siteCount, _ totalNodeCount = textCharsCount, newNodes, wordBegin, wordEnd, _ snapshotGap Dim Shared As Integer scrWidth, scrHeight, lowcase = 1 Dim Shared As UInteger newWordCount Dim As ULongInt totalInputSize Dim As String buffer = "", domain, address, g, exposition = "1", text, textm, g2 Dim As String snapshots Dim As Double tStart, tEnd, totalInputTime, startTime, averageLength, xwert, ywert, tMerk Dim As Single ratioMax, ratioMaxTemp, ratioAct, ratio ReDim Shared As Integer vorkommen(1) ReDim Shared As String wort(1) Dim As Any Ptr ergebnis Dim As UByte Ptr snP ReDim As tList Ptr par(1) Dim tree(1 To treeSubNodeCount) As tNode Ptr '******* URLDownloadToFile einbinden **************************** Dim URLDownloadToFile As Function (_ ByVal pCaller As Long, _ ByVal szURL As ZString Ptr, _ ByVal szFileName As ZString Ptr, _ ByVal dwReserved As Long, _ ByVal lpfnCB As Long) As Long Dim library As Any Ptr library = DylibLoad( "urlmon.dll" ) URLDownloadToFile = DylibSymbol( library, "URLDownloadToFileA" ) '***************************************************************** For i As Integer = LBound(tree) To UBound(tree) tree(i) = Callocate(recordLen) tree(i)->index = i Next i Dim As String inputFiles(1 To ...) = { "d:\internet\bibel\altes_testament_luther_revidiert.txt", _ "d:\internet\bibel\neues_testament_luther_revidiert.txt", _ ExePath + "\debianreferenz.txt" } 'Dim As String inputFiles(1 To ...) = { ExePath + "\debianreferenz.txt", _ ' "d:\internet\bibel\neues_testament_luther_revidiert.txt", _ ' "d:\internet\bibel\altes_testament_luther_revidiert.txt" } startTime = Timer 'ScreenRes 1200,800,16 ScreenRes 640,480,16 ScreenInfo scrWidth,scrHeight Width scrWidth/8,scrHeight/8 Do 'eingangsdaten von wikipedia holen domain = "de.wikipedia.org" address = "wiki/Spezial:Zuf%C3%A4llige_Seite" g = "Location: http://de.wikipedia.org/wiki/" 'kennung 'englische wikipedia-site 'domain = "en.wikipedia.org" 'address = "wiki/Special:Random" 'g = "Location: http://en.wikipedia.org/wiki/" 'kennung text = httpget(domain,address) 'zufällige seite holen 'link auf zufällige seite isolieren wordBegin = InStr(text,g) 'pointer auf anfang wordEnd = InStr(wordBegin + Len(g),text,Chr(13,10)) 'pointer auf ende address = Mid(text,wordBegin,wordEnd-wordBegin) 'link holen address = Mid(address,35) ' präfix ("Location: http://de.wikipedia.org/") abtrennen If address = "" Then ?" NO ADDRESS " Sleep 1000 Continue Do EndIf text = httpget(domain,address) ' website herunterladen siteCount += 1 tStart = Timer text = extractPlainText(text) If text = "" Then 'kein text ?" CONTINUE " Sleep 1000 Continue Do 'nächste site EndIf If lowcase Then text = LCase(text) 'umwandeln in kleinbuchstaben EndIf ''themenbereich eingrenzn 'Dim themeWords(1 To ...) As String = { "computer", "freebasic", "website", "internet" } ' 'For x As Integer = 1 To UBound(themeWords) ' If InStr(text,themeWords(x)) Then ' Exit For ' EndIf ' Continue Do 'Next wordBegin = -1 Do 'text abarbeiten Do 'nächsten wortanfang suchen wordBegin += 1 If wordBegin >= Len(text) Then 'textende erreicht Exit Do,Do EndIf Loop Until isSplittingChar(text[wordBegin]) = 0 wordEnd = wordBegin Do 'nächstes wortende suchen wordEnd += 1 Loop Until isSplittingChar(text[wordEnd]) buffer = Mid(text,wordBegin + 1, wordEnd - wordBegin) 'wort aus text holen '? " BUFFER ";buffer 'Sleep 100 totalCharCount += wordEnd - wordBegin 'buchstabenzähler aktualisieren wordBegin = wordEnd 'zeiger für nächste suche setzen If buffer = "" Then 'nächstes wort Continue Do EndIf numWords += 1 'wortzähler erhöhen '? " BUFFER ";buffer buffer = a2i(buffer) 'in internes format wandeln '? " BUFFER ";i2a(buffer) '? 'Sleep 1000 newNodes = putCharsIntoTree(tree(buffer[0]), buffer, 0) totalNodeCount += newNodes 'knotenzähler aktualisieren If newNodes Then 'neues Wort gefunden newWordCount += 1 'zähler für "unterschiedliche wörter" aktualisieren EndIf tStart = Timer With snapshot 'variablenwerte merken .numWords = numWords .currentWordCharCount = currentWordCharCount .totalCharCount = totalCharCount .siteCount = siteCount .totalNodeCount = totalNodeCount .newWordCount = newWordCount .totalInputSize = totalInputSize .totalInputTime = totalInputTime End With 'aktuellen snapshot in string umwandeln und an snapshotstring anhängen Dim As String snTemp = "" Dim As tStrDescr Ptr snTempPtr = Cast(tStrDescr Ptr,@snTemp) 'pointer auf stringdescriptor von 'snTemp' snTempPtr->txtPtr = Cast(Byte Ptr,@snapshot) 'pointer auf 'snapshot' in stringdescriptor von 'snTemp' snTempPtr->txtLen = SizeOf(snapshot) 'länge setzen snapshots += snTemp 'snapshot anhängen tEnd = Timer tMerk = tEnd - tStart Loop totalInputSize += Len(text) 'zähler für insgesamt eingelesene daten aktualisieren tEnd = Timer totalInputTime += (tEnd - tStart) 'darstellung ScreenLock '*********************** View Print Cls 'textblock Print " Gesamtzeit: ";timeFormat(Timer - startTime,2) Print "Gesamtzeit zum Einlesen der Daten: ";timeFormat(totalInputTime,2) Print Print " Anzahl der Knoten: ";totalNodeCount g = Str((totalNodeCount * recordLen)/1024^2) g = Left(g,InStr(g,".")+1) Print " Belegter Speicher: ";g;" MB" Print If (numWords > 0) Then averageLength = (totalCharCount / CDbl(numWords)) End If Print " Woerter insgesamt: " & numWords Print " Unterschiedliche Woerter: " & newWordCount Print " Durchschnittliche Wortlaenge:"; Print Using "##.### "; averageLength; Print "Buchstaben." Print Print "Eingelesene Website (";siteCount;"): ";address 'Locate 15,1 ? "string"; Len(snapshots);" / ";Len(snapshots)/SizeOf(snapshot);" ("; Print Using "##.###### "; Len(snapshots)/(1024^2); ? "MB) Gap ";snapshotGap;" ";tMerk Locate 14,1 Select Case exposition 'gewählte art der auswertung Case "1" 'graphische darstellung If Len(snapshots) > (10 * SizeOf(tSnapshot)) Then Color cyan,schwarz Print " 1) Graphische Darstellung:" tStart = Timer Color weiss,schwarz Line (10,scrHeight-10)-(10,170) Line (10,scrHeight-10)-(scrWidth-10,scrHeight-10) 'darstellung vorbereiten 'maximalwerte snapshotPtr = Cast(tSnapshot Ptr,StrPtr(snapshots) + Len(snapshots) - SizeOf(tSnapshot)) 'pointer auf letzten samplewert wordsMax = snapshotPtr->numWords nodesMax = snapshotPtr->totalNodeCount ratioAct = snapshotPtr->newWordCount / snapshotPtr->numWords wScale = wordsMax / (scrHeight - 200) 'maßstab für wörter nScale = nodesMax / (scrHeight - 200) 'maßstab für speicherbelegung rScale = (scrHeight - 200) / ratioMax 'maßstab für wortverhältnis 'graphen auf bildschirm schreiben wordBegin = 0 ratioMaxTemp = 0 snapshotGap = (Len(snapshots) / SizeOf(tSnapshot) / scrWidth) + 1 For y As Integer = 0 To Len(snapshots) - SizeOf(tSnapshot) Step SizeOf(tSnapshot) * snapshotGap 'alle snapshots With *Cast(tSnapshot Ptr,StrPtr(snapshots) + y) 'pointer auf snapshot ratio = (.newWordCount / .numWords) If ratio > ratioMaxTemp Then 'größtes verhältnis ermitteln ratioMaxTemp = ratio EndIf 'rechnung nicht weiter zusammenfassen, xwert wird sonst bei einer stringlänge ' von mehr als ca. 2,5 MB falsch berechnet xwert = scrWidth - 20 xwert = xwert * y / Len(snapshots) xwert = xwert + 10 ywert = (scrHeight - 10) - .totalNodeCount / nScale ' graphen schreiben PSet (xwert,ywert) 'speicherbelegung PSet (xwert,scrHeight - 10 - .numWords / wScale),gelb 'wörter gesamt PSet (xwert,scrHeight - 10 - Int(ratio * rScale)),cyan 'unterschiedliche wörter End With Next ratioMax = ratioMaxTemp 'größtes verhältnis merken 'beschriftung Line (10,ywert)-(scrWidth - 10,ywert),weiss,,&b0000000000000011 g = Str((totalNodeCount * recordLen) / 1024^2) g = Left(g,InStr(g,".") + 1) Draw String (30,ywert - 10),g + " " + "MB" 'speicherbedarf Draw String (30,ywert + 5),Str(wordsMax),gelb 'wörter gesamt g = Str(100 * ratioAct ) g = Left(g,InStr(g,".") + 1) + "%" Draw String (scrWidth - 50,scrHeight - 30 - ratioAct * rScale),g,cyan 'wortverhältnis 'legende Draw String (scrWidth / 2,scrHeight - 50),"_____ Speicher",weiss Draw String (scrWidth / 2,scrHeight - 40),"_____ Woerter gesamt",gelb Draw String (scrWidth / 2,scrHeight - 30),"_____ Rel. verschiedene",cyan tEnd = Timer Locate 14,30 Print "("; Print Using "##.######"; (tEnd-tStart); Print " s)" EndIf Case "2" 'gezielt nach wörtern suchen Color cyan,schwarz Print " 2) Gezielter Lookup von Woertern:" Color weiss,schwarz Locate CsrLin + 3,1 Dim lookupWords(1 To ...) As String = { "Computernerd", "freeBasic", "der", "die", "das", _ "und", "in", "Klingone", "Rhabarber", "Haus" } tStart = Timer For i As Integer = LBound(lookupWords) To UBound(lookupWords) Locate CsrLin,5 Print "Wie oft kommt "; Color gelb,schwarz Print lookupWords(i); Color weiss,schwarz Print " vor? "; Color rot,schwarz If lowcase Then Print lookupWordCount(tree(), a2i(LCase(lookupWords(i)))); Else Print lookupWordCount(tree(), a2i(lookupWords(i))); EndIf Color weiss,schwarz Print "x" Next i tEnd = Timer Locate CsrLin + 1,5 Print "Das Nachschlagen der Haeufigkeiten dauerte "; Print Using "##.######"; (tEnd-tStart); Print " Sekunden." Case "3" 'häufigste wörter Color cyan,schwarz Print " 3) Die 20 haeufigsten Woerter:" Color weiss,schwarz View Print CsrLin + 3 To scrHeight/8 tStart = Timer 'zeitmessung starten makeSortedList(tree(), wordList(),@sortByCount) tEnd = Timer 'zeitmessung stoppen Dim y As Integer For x As Integer = 1 To UBound(wordlist) 'liste auf bildschirm Locate CsrLin,5 Print x;" ";wordList(x).word; Locate CsrLin,(Pos + 5)-(Pos + 10)Mod 5 Print wordList(x).count;" x"; y = UBound(wordList) - 20 + x Locate CsrLin,40 Print y;" ";wordList(y).word; Locate CsrLin,(Pos + 5)-(Pos + 10)Mod 5 Print wordList(y).count;" x" If x > 19 Then Exit For Next Locate CsrLin + 3,5 Print "Die Auswertung der ";newWordCount;" Woerter dauerte "; Print Using "##.###"; (tEnd-tStart); Print " Sekunden." ReDim wordList(1) Case "a" 'alphabetische liste Color cyan,schwarz Print " 3) Alphabetisch:" Color weiss,schwarz View Print CsrLin + 3 To scrHeight/8 tStart = Timer 'zeitmessung starten makeSortedList(tree(), wordList(),@sortAlphabetical) tEnd = Timer 'zeitmessung stoppen Dim As Integer y For x As Integer = 1 To UBound(wordlist) 'liste auf bildschirm Locate CsrLin,5 Print x;" ";wordList(x).word 'Locate CsrLin,(Pos + 5)-(Pos + 10)Mod 5 'Print wordList(x).count;" x"; y = UBound(wordList) - 20 + x Locate CsrLin,40 Print y;" ";wordList(y).word; 'Locate CsrLin,(Pos + 5)-(Pos + 10)Mod 5 'Print wordList(y).count;" x" If x > 19 Then Exit For Next Locate CsrLin + 3,5 Print "Die Auswertung der ";newWordCount;" Woerter dauerte "; Print Using "##.###"; (tEnd-tStart); Print " Sekunden." ReDim wordList(1) Case "y" 'nach länge geordnet Color cyan,schwarz Print " 3) Nach Laenge geordnet:" Color weiss,schwarz View Print CsrLin + 3 To scrHeight/8 tStart = Timer 'zeitmessung starten makeSortedList(tree(), wordList(),@sortByLength) tEnd = Timer 'zeitmessung stoppen For x As Integer = 1 To UBound(wordlist) 'liste auf bildschirm Locate CsrLin,5 Print x;" ";wordList(x).word;" "; Color gelb,schwarz Print Len(wordList(x).word) Color weiss,schwarz If x > 19 Then Exit For Next Locate CsrLin + 3,5 Print "Die Auswertung der ";newWordCount;" Woerter dauerte "; Print Using "##.###"; (tEnd-tStart); Print " Sekunden." ReDim wordList(1) Case "4" '3. buchstabe "a" Color cyan,schwarz Print " 4) Alle Woerter, deren 3. Buchstabe ein ""a"" ist:" Color weiss,schwarz View Print CsrLin + 3 To scrHeight/8 tStart = Timer 'zeitmessung starten makeSortedList(tree(), wordList(),@sortAlphabetical,@filter_3a) tEnd = Timer 'zeitmessung stoppen For x As Integer = 1 To UBound(wordlist) 'liste auf bildschirm Locate CsrLin,5 Print x;" ";wordList(x).word;" "; Color gelb,schwarz Print Len(wordList(x).word) Color weiss,schwarz If x > 19 Then Exit For Next Locate CsrLin + 1,5 Print "Die Auswertung der ";newWordCount;" Woerter dauerte "; Print Using "##.###"; (tEnd-tStart); Print " Sekunden." Case "5" 'alle wörter mit "auto" Color cyan,schwarz Print " 5) Alle Woerter, in denen die Zeichenfolge ""auto"" vorkommt:" Color weiss,schwarz View Print CsrLin + 3 To scrHeight/8 tStart = Timer 'zeitmessung starten makeSortedList(tree(), wordList(),@sortAlphabetical,@filter_auto) tEnd = Timer 'zeitmessung stoppen For x As Integer = 1 To UBound(wordlist) 'liste auf bildschirm Locate CsrLin,5 Print x;" ";wordList(x).word;" "; Color rot,schwarz Print wordList(x).count; Color weiss,schwarz Print "x" If x > 19 Then Exit For Next Locate CsrLin + 1,5 Print "Die Auswertung der ";newWordCount;" Woerter dauerte "; Print Using "##.###"; (tEnd-tStart); Print " Sekunden." Case "6" 'visualisierung --> baum als punkte Color cyan,schwarz Print " 6) Visualisierung des kompletten Baums als Punktefeld (1 Knoten = 1 Punkt)" Color weiss,schwarz 'par(0) = plugin 'pointer auf plugin 'par(3) = NULL 'pointer auf 1. listenknoten 'par(4) = NULL 'pointer auf letzten listenknoten 'par(5) = NULL 'pointer auf 1. index 'par(6) = Cast(Any Ptr,@listItemCount) 'pointer auf variable für listenlänge 'par(7) = filter 'ReDim As tNode Ptr erg(newWordCount) Dim As Integer nodenr = 0 ReDim par(3) par(0) = Cast(Any Ptr,@auswertung4) 'pointer auf plugin par(3) = Cast(Any Ptr,@nodenr) 'pointer auf knotenzähler For i As Integer = 1 To treeSubNodeCount traverseTree(tree(i), i, Chr(tcp[i-1]),par()) Next i Case "7" 'visualisierung --> anfangsbuchstaben Color cyan,schwarz Print " 7) Visualisierung des kompletten Baums als Punktefeld mit Anfangsbuchstaben" Color weiss,schwarz Dim As Integer nodenr = 0 ReDim par(3) par(0) = Cast(Any Ptr,@auswertung5) 'pointer auf plugin par(3) = Cast(Any Ptr,@nodenr) 'pointer auf knotenzähler For i As Integer = 1 To treeSubNodeCount traverseTree(tree(i), i, Chr(tcp[i-1]),par()) Next i Case "8" 'visualisierung --> anfangsbuchstaben als weisse punkte / baum als punkte Color cyan,schwarz Print " 8) Baum als Punktefeld mit weissen Punkten am Wortanfang" Color weiss,schwarz Dim As Integer nodenr = 0 ReDim par(3) par(0) = Cast(Any Ptr,@auswertung6) 'pointer auf plugin par(3) = Cast(Any Ptr,@nodenr) 'pointer auf knotenzähler For i As Integer = 1 To treeSubNodeCount traverseTree(tree(i), i, Chr(tcp[i-1]),par()) Next i End Select ScreenUnlock 'tastaturabfrage g = Inkey Select Case g Case "1","2","3","4","5","6","7","8","a","y" 'darstellung auswählen exposition = g '?"a" Case "d" 'ergebnisse in datei schreiben Select Case exposition 'art der auswertung Case "3" 'wörter nach häufigkeit 'wörter nach häufigkeit geordnet in datei schreiben Print " Schreibe Liste in Datei..." ReDim vorkommen(newWordCount + 1) ReDim wort(newWordCount + 1) ReDim As tNode Ptr erg(newWordCount) Open "g:\nach_haeufigkeit.txt" For Output As #1 For i As Integer = 1 To treeSubNodeCount 'alle knoten durchgehen 'traverseTree(tree(i), i, Chr(skipChars + i),@ausgabe2,erg()) Next i Close 1 ReDim As Integer vorkommen(1) 'speicher freigeben ReDim As String wort(1) Case Else 'gesamten baum in datei schreiben ReDim As tNode Ptr erg(newWordCount) Open ExePath + "\words.txt" For Output As #1 For i As Integer = 1 To treeSubNodeCount 'traverseTree(tree(i), i, Chr(skipChars+i),@ausgabe1,erg()) Next i Close 1 End Select Case "c" 'gross- kleinbuchstaben lowcase = lowcase Xor 1 Case "+" 'grosses fenster ScreenRes 1200,800,16 ScreenInfo scrWidth,scrHeight Width scrWidth/8,scrHeight/8 Case "-" 'kleines fenster ScreenRes 640,480,16 ScreenInfo scrWidth,scrHeight Width scrWidth/8,scrHeight/8 Case Chr(27) 'esc Exit Do 'programm beenden End Select Loop View Print 13 To scrHeight/8 Cls 2 Print Print "Abbau des Baums aus dem Speicher... "; deallocateTree(tree()) Print "Fertig." End 0 'Ermittelt, ob das uebergebene Zeichen eines ist, das das Ende 'eines Wortes kennzeichnet (zum Beispiel ein Leerzeichen, ein 'Zeilenumbruch oder ein Komma). Function isSplittingChar (char As UByte) As Integer For p As UByte Ptr = @textChars To @textChars + textCharsCount If *p = char Then Return 0 EndIf Next Return 1 End Function Function putCharsIntoTree (node As tNode Ptr, buffer As String, index As Integer) As Integer Dim As UByte c = buffer[index] Dim As Integer newNodes = 0 If ((index+1) >= Len(buffer)) Then ' Wort endet hier in diesem Knoten node->count += 1 Else c = buffer[index+1] If (node->subNodes(c) = NULL) Then 'neuen knoten anlegen node->subNodes(c) = Callocate(recordLen) 'knoten erstellen node->subNodes(c)->preNode = node 'pointer des vorgängers node->subNodes(c)->index = c 'index newNodes += 1 End If newNodes += putCharsIntoTree(node->subNodes(c), buffer, index+1) End If Return newNodes End Function Sub traverseTree (node As tNode Ptr, index As UByte, path As String, par() As Any Ptr) 'ruft nacheinander alle knoten des baums auf Dim Plugin As Sub (par() As Any Ptr) If (node = NULL) Then Return If (node->count > 0) Then If par(0) Then Plugin = par(0) par(1) = node par(2) = @path Plugin(par()) 'plugin aufrufen Else Print path & " => " & node->count & " x" EndIf End If For i As Integer = 1 To treeSubNodeCount 'rekursiver aufruf aller knoten, auf die der aktuelle knoten zeigt If (node->subNodes(i) <> NULL) Then traverseTree (node->subNodes(i), i, path + Chr(tcp[i-1]),par()) End If Next i End Sub Function lookupWordCount (tree() As tNode Ptr, word As String) As Integer If (Len(word) < 1) Then Return 0 Return lookupCharacterCount(tree(word[0]), word, 0) 'pointer auf 1. knoten End Function Function lookupCharacterCount (node As tNode Ptr, word As String, index As Integer) As Integer Dim As String wd = word If ((index+1) >= Len(wd)) Then Return node->count Else Dim As UByte nextChar = wd[index+1] 'nächster buchstabe Dim As tNode Ptr nextNode = node->subNodes(nextChar) 'knoten für nächsten buchstaben If (nextNode = NULL) Then Return 0 'Der Baum geht hier nicht mehr weiter, aber das Wort wurde bisher nicht gefunden. Return lookupCharacterCount (nextNode, wd, index+1) End If End Function Sub deallocateTree (tree() As tNode Ptr) For i As Integer = 1 To treeSubNodeCount deallocateNodeAndSubNodes(tree(i)) DeAllocate tree(i) Next i End Sub Sub deallocateNodeAndSubNodes (node As tNode Ptr) If (node = NULL) Then Return deallocateTree(node->subNodes()) End Sub Sub InitWinsock Constructor 'Autor: PMedia http://www.freebasic-portal.de/code-beispiele/internet-netzwerke/websites-selbst-verarbeiten-105.html ' init winsock Dim wsaData As WSAData If( WSAStartup( MAKEWORD( 1, 1 ), @wsaData ) <> 0 ) Then Print "Error: WSAStartup failed" End 1 End If End Sub Sub ExitWinsock Destructor WSACleanup End Sub Function recv_message(ByVal ClientSocketx As socket, ByRef nbytes As Integer, ByRef message As String) As Integer Dim As ZString*400 inmessage Dim As Integer lerr, timeout = 0, rcvtimeout = 1000 nbytes = 0 While nbytes <= 0 'keep trying to read data inmessage = "" setsockopt(ClientSocketx, SOL_SOCKET, SO_RCVTIMEO, Cast(Any Ptr,@rcvtimeout), SizeOf(rcvtimeout)) 'set timeout nbytes = recv(ClientSocketx, @inmessage, SizeOf(inmessage), 0) 'try to read data lerr = wsaGetLastError() 'save any error setsockopt(ClientSocketx, SOL_SOCKET, SO_RCVTIMEO, Cast(Any Ptr, @timeout), SizeOf(timeout)) 'clear timeout If lerr = WSAECONNABORTED Or lerr = WSAECONNRESET Or nbytes = 0 Then Return -1 'connection closed Wend message = inmessage Return 0 End Function Function httpget(server As String, path As String, hadd As String = "") As String Dim IP As Integer Dim ia As in_addr Dim s As SOCKET Dim hostentry As hostent Ptr Dim sendbuffer As String Dim recvbuffer As ZString * RECVBUFFLEN+1 Dim bytes As Integer Dim sa As sockaddr_in Dim in As String = "" ia.S_addr = inet_addr( server ) If ( ia.S_addr = INADDR_NONE ) Then hostentry = gethostbyname( server ) If ( hostentry = 0 ) Then Return "Error: IP couldn't be resolved!" End If IP = *Cast( Integer Ptr, *hostentry->h_addr_list ) Else IP = ia.S_addr End If s = OpenSocket( AF_INET, SOCK_STREAM, IPPROTO_TCP ) If( s = 0 ) Then Return "Error: Socket couldn't be opened." End If sa.sin_port = htons( 80 ) sa.sin_family = AF_INET sa.sin_addr.S_addr = ip If ( connect( s, Cast( PSOCKADDR, @sa ), Len( sa )) = SOCKET_ERROR ) Then closesocket( s ) Return "Error: Couldn't connect to host" End If sendBuffer = "GET /" + path + " HTTP/1.0" + NEWLINE + _ "Host: " + server + NEWLINE + _ "Connection: close" + NEWLINE + _ hadd + _ NEWLINE If( send( s, sendBuffer, Len( sendBuffer ), 0 ) = SOCKET_ERROR ) Then closesocket( s ) Return "Error: Couldn't send request" End If Do bytes = recv( s, recvBuffer, RECVBUFFLEN, 0 ) If( bytes <= 0 ) Then Exit Do End If recvbuffer[bytes] = 0 in += recvbuffer Loop shutdown( s, 2 ) closesocket( s ) Return in End Function Function httperror(text As String) As Integer If Left(text,7) = "Error: " Then Return -1 Else Return 0 EndIf End Function Function httppost(server As String, path As String, toPost As String, hadd As String = "") As String Dim IP As Integer Dim ia As in_addr Dim s As SOCKET Dim hostentry As hostent Ptr Dim sendbuffer As String Dim recvbuffer As ZString * RECVBUFFLEN'+1 Dim bytes As Integer Dim sa As sockaddr_in Dim in As String ia.S_addr = inet_addr( server ) If ( ia.S_addr = INADDR_NONE ) Then hostentry = gethostbyname( server ) If ( hostentry = 0 ) Then Return "IP couldn't be resolved!" End If IP = *Cast( Integer Ptr, *hostentry->h_addr_list ) Else IP = ia.S_addr End If s = OpenSocket( AF_INET, SOCK_STREAM, IPPROTO_TCP ) If( s = 0 ) Then Return "Socket couldn't be opened." End If sa.sin_port = htons( 80 ) sa.sin_family = AF_INET sa.sin_addr.S_addr = ip If ( connect( s, Cast( PSOCKADDR, @sa ), Len( sa )) = SOCKET_ERROR ) Then closesocket( s ) Return "Couldn't connect to host" End If sendBuffer = "POST /" + path + " HTTP/1.0" + NEWLINE + _ "Host: " + server + NEWLINE + _ "Content-Type: application/x-www-form-urlencoded" + NEWLINE + _ "Content-Length: " + Str(Len(toPost)) + NEWLINE + _ "Connection: close" + NEWLINE + _ hadd + _ NEWLINE + _ toPost + NEWLINE If( send( s, sendBuffer, Len( sendBuffer ), 0 ) = SOCKET_ERROR ) Then closesocket( s ) Return "Couldn't send request" End If Do bytes = recv( s, recvBuffer, RECVBUFFLEN, 0 ) If( bytes <= 0 ) Then Exit Do End If recvbuffer[bytes] = 0 in += recvbuffer Loop shutdown( s, 2 ) closesocket( s ) Return in End Function Function timeFormat (sekunden As Double, stellen As Integer = 0) As String Dim As Integer minuten, stunden Dim As String zeit stunden = Int(sekunden / 3600) sekunden -= (stunden * 3600) minuten = Int(sekunden / 60) sekunden -= (minuten * 60) zeit = Str(stunden) + ":" + Right("0" + Str(minuten),2) + ":" + Mid("0" + Str(sekunden),InStr("0" + Str(sekunden),".") - 2) If stellen Then zeit = Left(zeit, InStr(zeit,".") + stellen) EndIf Return zeit End Function #Macro mSort(mType) 'macro für verschiedene sortierkriterien Dim As tNode Ptr node = par(1) 'pointer auf knoten Dim As tStrDescr Ptr textPtr, ttPtr, tp = par(2) 'pointer auf text (path) Dim As tStrDescr ttemp, tmerk Dim As tList Ptr p, n, v, q, firstItem = par(3), lastItem = par(4) Dim As tIndex Ptr ip, in, iv, itp, firstIndex = par(5) Dim As UInteger Ptr i = par(6) Dim As Integer x, y Dim As String text, tt Dim filter As Function(word As String) As Integer Dim As UByte Ptr merken ''"path" in "text" übertragen ''tpPtr = Cast(tStrDescr Ptr,tp) 'tmerk = *tp 'ttemp = *tp ''text = *ttemp 'ttPtr = tp '? ttPtr 'Sleep 'End ' ttPtr = Cast(tStrDescr Ptr,@tt) merken = ttPtr->txtPtr ttPtr->txtPtr = tp->txtPtr ttPtr->txtLen = tp->txtLen 'ttPtr->txtMem = tp->txtMem text = tt ttPtr->txtPtr = merken '? "text ";text 'Sleep 'text = String(tp->txtLen,Chr(0)) 'speicherplatz reservieren 'For x As Integer = 0 To tp->txtLen - 1 'string übertragen ' text[x] = *(tp->txtPtr + x) 'Next If node = NULL Then Return EndIf If par(7) Then filter = par(7) If filter(text) = 0 Then Return EndIf p = firstItem 'pointer auf 1. listenknoten 'neuen listenknoten anlegen n = New tList 'listenknoten anlegen n->word = text 'wort eintragen n->count = node->count 'anzahl eintragen #If mType = "alpha" If p = NULL Then '1. knoten anlegen firstItem = n 'pointer auf 1. knoten *i = 1 'zähler initialisieren Else 'neuen knoten hinter die liste setzen n->prevWord = lastItem 'vorgängerknoten (bisheriger letzter) lastItem->nextWord = n 'folgeknoten für bisherigen letzten (bisher NULL) *i += 1 EndIf par(3) = firstItem par(4) = n 'pointer auf ende der liste #Else If p = NULL Then '1. listeneintrag firstItem = n 'pointer auf 1. listeneintrag lastItem = n 'pointer auf letzten listeneintrag *i = 1 'zähler initialisieren 'indexliste anlegen ip = New tIndex '1. index anlegen ip->iPtr = n 'pointer auf listeneintrag #If mType = "count" ip->count = node->count '##################### #ElseIf mType = "length" ip->length = tp->txtLen '############################ #EndIf firstIndex = ip 'pointer auf indexliste Else *i += 1 'zähler erhöhen #If mType = "count" If node->count > p->count Then 'neuen knoten vor die liste setzen ##################### #ElseIf mType = "length" If tp->txtLen > Len(p->word) Then 'neuen knoten vor die liste setzen #################### #EndIf n->nextWord = p 'folgeknoten (bisheriger 1.) p->prevWord = n 'vorgängerknoten für folgeknoten (bisher NULL) firstItem = n 'neuer pointer auf anfang der liste 'neuen index vor indexliste setzen ip = New tIndex 'index anlegen ip->iPtr = n 'pointer auf neuen knoten #If mType = "count" ip->count = node->count 'anzahl ################################ #ElseIf mType = "length" ip->length = tp->txtLen 'länge ############################ #EndIf ip->nextIndex = firstIndex 'alter listenbeginn firstIndex = ip 'neuer listenbeginn #If mType = "count" ElseIf (node->count = 1) Or (node->count <= lastItem->count) Then 'neuen knoten hinter die liste setzen ######################### #ElseIf mType = "length" ElseIf tp->txtLen <= Len(lastItem->word) Then 'neuen knoten hinter die liste setzen ################# #EndIf n->prevWord = lastItem 'vorgängerknoten (bisheriger letzter) lastItem->nextWord = n 'folgeknoten für bisherigen letzten (bisher NULL) lastItem = n 'neuer letzter knoten 'indexliste prüfen/erweitern ip = firstIndex 'beginn der indexliste Do While ip->nextIndex <> NULL 'letzten knoten suchen ip = ip->nextIndex Loop If ip->count <> node->count Then 'wenn noch kein index für diese anzahl existiert --> index anlegen ################ in = New tIndex 'index anlegen in->iPtr = n 'pointer auf neuen knoten #If mType = "count" in->count = node->count 'anzahl ############################ #ElseIf mType = "length" in->length = tp->txtLen 'wortlänge ############################## #EndIf ip->nextIndex = in 'pointer auf neuen index in vorhergehenden index schreiben EndIf Else 'neuen knoten in die liste einfügen 'indexliste prüfen/erweitern ip = firstIndex 'pointer auf beginn der indexliste Do 'indexliste aktualisieren #If mType = "count" If ip->count = node->count Then 'index für anzahl schon vorhanden ################## #ElseIf mType = "length" If ip->length = tp->txtLen Then 'index für länge schon vorhanden ##################### #EndIf ip = ip->nextIndex 'index von nächsthöherer anzahl p = ip->iPtr 'poiner des folgeknotens Exit Do #If mType = "count" ElseIf (ip->count < node->count) Or (ip->nextIndex = NULL) Then 'index für aktuelle anzahl noch nicht vorhanden ############### #ElseIf mType = "length" ElseIf (ip->length < tp->txtLen) Or (ip->nextIndex = NULL) Then 'index für aktuelle länge noch nicht vorhanden ################### #EndIf in = New tIndex 'neuen index anlegen in->iPtr = n 'pointer auf neuen knoten #If mType = "count" in->count = node->count 'anzahl ####################### #ElseIf mType = "length" in->length = tp->txtLen 'wortlänge ####################### #EndIf in->nextIndex = ip 'pointer des folgeindex in neuen index schreiben iv->nextIndex = in 'pointer auf neuen index in vorhergehenden index schreiben p = ip->iPtr 'pointer des folgeknotens Exit Do EndIf iv = ip 'pointer merken ip = ip->nextIndex 'nächster index Loop 'listenknoten einfügen v = p->prevWord n->prevWord = v 'pointer des vorgängerknotens in neuen knoten schreiben n->nextWord = p 'pointer des folgeknotens in neuen knoten schreiben v->nextWord = n 'pointer des neuen knotens in vorgängerknoten schreiben p->prevWord = n 'pointer des neuen knotens in nachfolgeknoten schreiben EndIf EndIf par(3) = firstItem 'pointer auf anfang der liste par(4) = lastItem 'pointer auf ende der liste par(5) = firstIndex 'pointer auf anfang der indexliste #EndIf #EndMacro Sub sortByCount(par() As Any Ptr) 'nach häufigkeit ordnen mSort("count") 'macro aufrufen End Sub Sub sortAlphabetical(par() As Any Ptr) 'alphabetisch ordnen mSort("alpha") 'macro aufrufen End Sub Sub sortByLength(par() As Any Ptr) 'nach länge ordnen mSort("length") 'macro aufrufen End Sub Sub auswertung4(par() As Any Ptr) 'Sub auswertung4(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0) 'baum als punktefeld darstellen Dim As Integer h,b,pc Dim As Integer Ptr nodenrPtr = par(3) Dim As UShort c Dim As tNode Ptr node = par(1) ScreenInfo b,h *nodenrPtr += 1 For x As Integer = 1 To treeSubNodeCount If node->subNodes(x) <> 0 Then pc += 1 EndIf Next c = LoWord(node->count) c = c * c PSet((*nodenrPtr Mod b)+1,h-Int(*nodenrPtr/b)-1),RGB(HiByte(c),LoByte(c),LoByte(pc*pc)) End Sub Sub auswertung5(par() As Any Ptr) 'Sub auswertung5(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0) 'baum als punktefeld mit anfangsbuchstaben darstellen Dim As String text, tt Dim As Integer h, b, pc Dim As Integer Ptr nodenrPtr = par(3) Dim As UShort c Dim As tNode Ptr node = par(1) Dim As tStrDescr Ptr ttPtr, tp = par(2) 'pointer auf text (path) Dim As UByte Ptr remind ttPtr = Cast(tStrDescr Ptr,@tt) remind = ttPtr->txtPtr ttPtr->txtPtr = tp->txtPtr ttPtr->txtLen = tp->txtLen text = tt ttPtr->txtPtr = remind ScreenInfo b,h *nodenrPtr += 1 For x As Integer = 1 To treeSubNodeCount If node->subNodes(x) <> 0 Then pc += 1 EndIf Next c = LoWord(node->count) c = c * c If Len(text) = 1 Then Draw String((*nodenrPtr Mod b)+1,h-Int(*nodenrPtr/b)-10),text,RGB(255,255,255) Else PSet((*nodenrPtr Mod b)+1,h-Int(*nodenrPtr/b)-10),RGB(HiByte(c),LoByte(c),LoByte(pc*pc)) EndIf End Sub Sub auswertung6(par() As Any Ptr) 'Sub auswertung6(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0) 'baum als punktefeld mit weissen punkten am wortanfang Dim As String text, tt Dim As Integer h, b, pc Dim As Integer Ptr nodenrPtr = par(3) Dim As UShort c Dim As tNode Ptr node = par(1) Dim As tStrDescr Ptr ttPtr, tp = par(2) 'pointer auf text (path) Dim As UByte Ptr remind ttPtr = Cast(tStrDescr Ptr,@tt) remind = ttPtr->txtPtr ttPtr->txtPtr = tp->txtPtr ttPtr->txtLen = tp->txtLen text = tt ttPtr->txtPtr = remind ScreenInfo b,h *nodenrPtr += 1 For x As Integer = 1 To treeSubNodeCount If node->subNodes(x) <> 0 Then pc += 1 EndIf Next c = LoWord(node->count) c = c * c If Len(text) = 1 Then Circle ((*nodenrPtr Mod b)+1,h-Int(*nodenrPtr/b)-1),2,RGB(255,255,255)',,,,F 'PSet((*nodenrPtr Mod b)+1,h-Int(*nodenrPtr/b)-1),RGB(255,255,255) 'PSet((*nodenrPtr Mod b)+0,h-Int(*nodenrPtr/b)-1),RGB(255,255,255) 'PSet((*nodenrPtr Mod b)+1,h-Int(*nodenrPtr/b)-0),RGB(255,255,255) 'PSet((*nodenrPtr Mod b)+0,h-Int(*nodenrPtr/b)-0),RGB(255,255,255) Else PSet((*nodenrPtr Mod b)+1,h-Int(*nodenrPtr/b)-1),RGB(HiByte(c),LoByte(c),LoByte(pc*pc)) EndIf End Sub Sub ausgabe1(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0) 'wörterliste in datei schreiben Print #1, text;" ";count End Sub Function extractPlainText(text As String) As String 'normalen text aus website extrahieren Dim As Integer wordBegin, wordEnd, rptr, flag, x wordEnd = 1 Do wordBegin = InStr(wordEnd,text,"

") + 3 'beginn des textes If wordBegin = 3 Then Exit Do EndIf wordEnd = InStr(wordBegin,text,"

") - 1 'ende des textes flag = 1 For x As Integer = wordBegin - 1 To wordEnd - 1 If text[x] = Asc("<") Then 'beginn eines tags --> folgenden text ignorieren flag = 0 ElseIf text[x] = Asc(">") Then 'ende eines tags --> folgenden text übertragen flag = 1 Else If flag Then 'folgendes zeichen an den anfang des strings verschieben text[rptr] = text[x] rptr += 1 If rptr >= Len(text) Then 'fehler Return "" EndIf EndIf EndIf Next text[rptr] = Asc(" ") 'leerzeichen einfügen rptr += 1 Loop Until wordEnd >= Len(text) If (rptr > 0) And (rptr < Len(text)) Then text = utf8ToAnsi(Left(text,rptr)) 'string kürzen und UTF-8-zeichen nach ANSI konvertieren Else text = "" 'fehler EndIf Return text End Function Function utf8ToAnsi(text As String) As String Dim As Integer x Dim As String umlaut Do While InStr(text,Chr(195)) x = InStr(text,Chr(195)) Select Case text[x] Case 164 umlaut = "ä" Case 132 umlaut = "Ä" Case 182 umlaut = "ö" Case 150 umlaut = "Ö" Case 178 umlaut = "ò" Case 188 umlaut = "ü" Case 156 umlaut = "Ü" Case 186 umlaut = "ú" Case 159 umlaut = "ß" Case 169 umlaut = "é" Case 168 umlaut = "è" Case 160 umlaut = "à" Case 161 umlaut = "á" Case 167 umlaut = "c" 'eigentlich c mit apostroph unten Case Else 'Print "*** unbekanntes Sonderzeichen ***" 'Print text 'Print x;" ";Mid(text,x+1,1);" ";Asc(Mid(text,x+1,1)) 'Sleep umlaut = "?" End Select text = Left(text,x - 1) + umlaut + Mid(text,x + 2) 'utf-8 durch ansi-zeichen ersetzen Loop Return text End Function Sub ausgabe2(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0) 'häufigste wörter und längstes wort suchen If (index = 1) And (text = Chr(skipChars + 1)) Then '1. aufruf --> reset For x As Integer = 1 To UBound(wort) vorkommen(x) = 0 wort (x) = "" Next EndIf If Len(text) > 1 Then vorkommen(UBound(wort)) = count wort(UBound(wort)) = text For x As Integer = UBound(wort) To 2 Step -1 If vorkommen(x) > vorkommen(x - 1) Then Swap vorkommen(x),vorkommen(x - 1) Swap wort(x),wort(x - 1) Else Exit For EndIf Next EndIf If index = treeSubnodeCount Then 'letzter aufruf --> ergebnis ausgeben For x As Integer = 1 To UBound(wort) - 1 Print #1, vorkommen(x);" ";wort(x) Next EndIf '?"***" 'Sleep End Sub Function a2i(text As String) As String Dim As String ret = Space(Len(text)) For x As Integer = 0 To Len(text) - 1 For y As Integer = 0 To textCharsCount - 1 If tcp[y] = text[x] Then ret[x] = y + 1 Exit For EndIf Next Next Return ret End Function Function i2a(text As String) As String Dim As String ret = Space(Len(text)) For x As Integer = 0 To Len(text) - 1 ret[x] = tcp[text[x]-1] Next Return ret End Function Sub makeSortedList(tree() As tNode Ptr, array() As tWordList, plugin As Any Ptr, filter As Any Ptr = 0) 'variablenliste für sortByCount ReDim As tList Ptr par(7) 'übergabe von 7 variablen Dim As UInteger listItemCount = 0, x = 0 Dim As tIndex Ptr ip par(0) = plugin 'pointer auf plugin par(3) = NULL 'pointer auf 1. listenknoten par(4) = NULL 'pointer auf letzten listenknoten par(5) = NULL 'pointer auf 1. index par(6) = Cast(Any Ptr,@listItemCount) 'pointer auf variable für listenlänge par(7) = filter For i As Integer = 1 To treeSubNodeCount 'alle knoten durchgehen traverseTree(tree(i), i, Chr(tcp[i-1]),par()) Next i 'indexliste löschen Do While par(5) ip = Cast(tIndex Ptr,par(5)) 'beginn der indexliste par(5) = Cast(Any Ptr,ip->nextIndex) 'pointer auf nächsten index Delete ip x += 1 Loop ReDim array(listItemCount) 'array für wortliste erstellen 'ergebnisse ausgeben Dim As tList Ptr p = par(3), q x = 1 Do While p <> NULL 'liste in array übertragen array(x).word = p->word array(x).count = p->count q = p->nextWord 'pointer auf nächsten knoten merken Delete p 'knoten löschen p = q x += 1 Loop End Sub Function filter_3a(text As String) As Integer If (Len(text) >= 3) And (text[2] = Asc("a")) Then 'wort erfüllt das suchkriterium Return 1 Else Return 0 EndIf End Function Function filter_auto(text As String) As Integer If InStr(text,"auto") Then Return 1 Else Return 0 EndIf End Function