stream.php -Suchmaschine braucht hilfe

    Diese Seite verwendet Cookies. Durch die Nutzung unserer Seite erklären Sie sich damit einverstanden, dass wir Cookies setzen. Weitere Informationen

    • stream.php -Suchmaschine braucht hilfe

      Hallo Leute,

      ich brauche hilfe ,hier sieht ihr den Code einer Datei von meiner Suchmaschine. Sie sucht mit curL die Streams und trägt sie in der Datenbank ein. Und ich wollte gerne statt 2 seiten mehre seiten eintragen.

      Und onlinestreams und onlinemoviez wollte ich ganz weg haben.

      Folgendes Seiten wollte ich eintragen:





      Quellcode

      1. <?
      2. //var_dump(stream::lookup("rambo"));
      3. /*
      4. var_dump(stream::onlinestreams_org("rambo"));
      5. $url="http://www.onlinestreams.org/ajax/suche.php";
      6. $post="suche="."rambo";
      7. $ref="http://www.onlinestreams.org/index.php";
      8. $osorg=stream::curl_post($url,$post,$ref);
      9. preg_match_all("#<tr><td>(.*?)</td></tr></table>#ims", $osorg, $div);
      10. echo $div[1][0]." ".$div[1][1];
      11. die();
      12. */
      13. class stream{
      14. //searches db, if not found, scrapes and adds
      15. function lookup($title){
      16. $found=stream::search_title_db($title);
      17. if (count($found) == 0 || $found == "" ){
      18. //nothing in db, scrape it
      19. $found=stream::scrape_sites($title);
      20. if (count($found) == 0 || $found == "" ){return;}
      21. for ($i=0;$i<count($found);$i++){
      22. //watch out for other array structure as before!!!
      23. //$obj=$found[$i][2];
      24. $obj=str_replace('</center>','',$found[$i][2]);
      25. stream::add_db($found[$i][0],strtolower($found[$i][1]), $obj ,$found[$i][3]);
      26. }
      27. }
      28. return $found;
      29. }
      30. function scrape_sites($title){
      31. $erg1=stream::onlinemoviezzz_blogspot_com($title);
      32. $erg2=stream::onlinestreams_org($title);
      33. // FROM
      34. // array[0]=title, [1]=playerurl_object [2]=info
      35. // TO
      36. // url, title, obj, desc
      37. $j=0;
      38. for($i=0;$i<count($erg1);$i++){
      39. $comp[$j][0]=stream::extract_link($erg1[$i][1]);
      40. $comp[$j][1]=strtolower($erg1[$i][0]);
      41. $comp[$j][2]="<object>".str_replace('<param name="wmode" value="transparent"></param>','',$erg1[$i][1]);
      42. $comp[$j][3]=$erg1[$i][2];
      43. $j++;
      44. }
      45. for($i=0;$i<count($erg2);$i++){
      46. $comp[$j][0]=stream::extract_link($erg2[$i][1]);
      47. $comp[$j][1]=strtolower($erg2[$i][0]);
      48. $comp[$j][2]="<object>".str_replace('<param name="wmode" value="transparent"></param>','',$erg2[$i][1]);
      49. $comp[$j][3]=$erg2[$i][2];
      50. $j++;
      51. }
      52. //for additional scrapesites
      53. /*
      54. for($i=0;$i<count($erg3);$i++){
      55. $comp[$j][0]=stream::extract_link($erg3[$i][1]);
      56. $comp[$j][1]=strtolower($erg3[$i][0]);
      57. $comp[$j][2]="<object>".str_replace('<param name="wmode" value="transparent"></param>','',$erg3[$i][1]);
      58. $comp[$j][3]=$erg3[$i][2];
      59. $j++;
      60. }
      61. */
      62. return $comp;
      63. }
      64. //adds to db
      65. function add_db($url, $title, $object, $desc){
      66. if ($url == '' || $title == '' || $object == '' ) {return;}
      67. if (strlen($url) < 3 || strlen($title) < 3 || strlen($object) < 3 ) {return;}
      68. $link=mysql_connect('localhost', '', '') or die("Could not connect to MYSQL host");
      69. mysql_select_db('streamsearcher', $link) or die("Couldnt connect to databank.");
      70. $result = mysql_query("INSERT INTO data VALUES ('".$url."','".$title."','".$object."','".$desc."',1)");
      71. //if (!$result) { die("Couldnt send data to db: ". mysql_error());}
      72. }
      73. //searches db for title
      74. function search_title_db($title){
      75. $title=strip_tags($title);
      76. $title=trim(strtolower($title));
      77. $link=mysql_connect('localhost', '', '') or die("Could not connect to MYSQL host");
      78. mysql_select_db('streamsearcher', $link) or die("Couldnt connect to databank.");
      79. $result = mysql_query("SELECT DISTINCT * FROM data WHERE title LIKE '%".$title."%'");
      80. if (!$result) { die("Couldnt send data to db: ". mysql_error());}
      81. while ($i=mysql_fetch_array($result)){
      82. $out[]=$i;
      83. $i++;
      84. }
      85. return $out;
      86. }
      87. function titleonly_db(){
      88. $link=mysql_connect('localhost', '', '') or die("Could not connect to MYSQL host");
      89. mysql_select_db('streamsearcher', $link) or die("Couldnt connect to databank.");
      90. $result = mysql_query("SELECT DISTINCT title FROM data");
      91. if (!$result) { die("Couldnt send data to db: ". mysql_error());}
      92. while ($i=mysql_fetch_array($result)){
      93. $out[]=$i;
      94. $i++;
      95. }
      96. return $out;
      97. }
      98. //returns array[0]=title, [1]=playerurl_object [2]=info
      99. function onlinestreams_org($search){
      100. $url="http://www.onlinestreams.org/ajax/suche.php";
      101. $post="suche=".$search;
      102. $ref="http://www.onlinestreams.org/index.php";
      103. $osorg=stream::curl_post($url,$post,$ref);
      104. preg_match_all("#playMovie\('filme',(.*?)\)#ims", $osorg,$match);
      105. $url="http://www.onlinestreams.org/ajax/movie_play.php";
      106. $ref="http://www.onlinestreams.org/index.php";
      107. preg_match_all("#<tr><td>(.*?)</td></tr></table>#ims", $osorg, $titles);
      108. $found=array();
      109. $ret=array();
      110. for ($i=0;$i<count($match[1]);$i++){
      111. $found[]=substr($match[1][$i], strpos($match[1][$i], ",") );
      112. $post="id=".$found[$i];
      113. $ergsite=stream::curl_post($url,$post,$ref);
      114. preg_match_all("#<div (.*?)left: -40px\">#ims", $ergsite, $div);
      115. for ($j=0;$j<count($div[1]);$j++){
      116. $tdiv=$div[1][$j];
      117. $tdiv= substr( $tdiv, strpos($tdiv, "<param name="));
      118. $endp=strpos($tdiv, "</object>")+9;
      119. $purl=substr($tdiv, 0, $endp);
      120. $info=strip_tags(substr($tdiv, $endp));
      121. if (substr_count($info, "Megavideo") > 0) {
      122. $info=substr($info,236);
      123. }
      124. if (substr_count($info, "Veoh") > 0) {
      125. $info=substr($info,213);
      126. }
      127. $kat=array("Alle gemischt","Abenteuer","Action","Action-Komödie",
      128. "Anime","Drama","Fantasy","Horror","Kinderfilme","Kino","Kinofilme",
      129. "Komödie","Konzerte","Kriegsfilme","Krimikomödie","Melodram","Romantik",
      130. "Satire","Science Fiction","Special","Sport","Thriller","TVRiP's","XXX","Zeichentrick",
      131. "kategorie:","Kategorie:");
      132. for($p=0;$p<count($kat);$p++){
      133. $info=str_replace($kat[$p],"",$info);
      134. }
      135. //echo $purl."<br>\n".$info."<br><br>\n\n";
      136. $ret[$i][0]=$titles[1][$i];
      137. $ret[$i][1]=$purl;
      138. $ret[$i][2]=$info;
      139. }
      140. }
      141. return $ret;
      142. }
      143. //returns array[0]=title, [1]=playerurl_object [2]=info
      144. function onlinemoviezzz_blogspot_com($search){
      145. $url="http://onlinemoviezzz.blogspot.com/search?q=".$search;
      146. $ref="http://onlinemoviezzz.blogspot.com";
      147. $omz=stream::curl_get($url,$ref);
      148. $ret=array();
      149. preg_match_all("#Permanent Link to (.*?)'#ims", $omz,$titles);
      150. preg_match_all("#<embed(.*?)/>#ims", $omz,$match);
      151. $m1count=count($match[1]);
      152. for ($i=0;$i<$m1count;$i++){
      153. $m1="<embed".$match[1][$i];
      154. $endp=strrpos($m1,">");
      155. $m1=substr($m1,0,$endp+1);
      156. $thistitle= $titles[1][$i];
      157. $thistitle=str_replace("(","",$thistitle);
      158. $thistitle=str_replace(")","",$thistitle);
      159. $thistitle=str_replace("Veoh","",$thistitle);
      160. $thistitle=str_replace("(veoh)","",$thistitle);
      161. $thistitle=str_replace("Megavideo","",$thistitle);
      162. $thistitle=str_replace("megavideo","",$thistitle);
      163. $ret[$i][0]=$thistitle;
      164. $ret[$i][1]=$m1;
      165. }
      166. preg_match_all("#<div style=\"text-align: center; (.*?)</div>#ims", $omz,$match2);
      167. $m2count=count($match2[1]);
      168. if ($m2count != $m1count)
      169. {
      170. preg_match_all("#Beschreibung:(.*?)</div>#ims", $omz,$match2);
      171. $m2count=count($match2[1]);
      172. }
      173. for ($i=0;$i<$m2count;$i++){
      174. $m2=$match2[1][$i];
      175. $m2=preg_replace('#<center>(.*?)<\/center>#ims','', $m2);
      176. $m2=str_replace("color: rgb(0, 0, 0);\">","",$m2);
      177. $m2=strip_tags($m2);
      178. $ret[$i][2]=trim($m2);
      179. //echo $m2;
      180. }
      181. return $ret;
      182. }
      183. function extract_link($obj){
      184. preg_match_all("#src=\"(.*?)\" #ims", $obj, $erg);
      185. return $erg[1][0];
      186. }
      187. function curl_get($url,$ref="")
      188. {
      189. $ch = curl_init();
      190. curl_setopt($ch, CURLOPT_URL, $url);
      191. curl_setopt($ch, CURLOPT_REFERER, $ref);
      192. curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.8.1.14) Gecko/20080404 Firefox/2.0.0.14" );
      193. curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
      194. curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
      195. $output = curl_exec($ch);
      196. curl_close($ch);
      197. return $output;
      198. }
      199. function curl_post($url, $params, $ref="") {
      200. $handle = fopen( "cookiejar.txt", "w+" );
      201. $ch = curl_init();
      202. curl_setopt($ch, CURLOPT_URL, $url);
      203. curl_setopt($ch, CURLOPT_REFERER, $ref);
      204. curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.8.1.14) Gecko/20080404 Firefox/2.0.0.14" );
      205. curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
      206. curl_setopt($ch, CURLOPT_POST, true);
      207. curl_setopt($ch, CURLOPT_COOKIEJAR, 'cookiejar.txt');
      208. curl_setopt($ch, CURLOPT_COOKIEFILE, 'cookiejar.txt');
      209. curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
      210. curl_setopt($ch, CURLOPT_POSTFIELDS, $params);
      211. $output = curl_exec($ch);
      212. curl_close($ch);
      213. @fclose($handle);
      214. return $output;
      215. }
      216. }
      217. ?>
      Alles anzeigen
    • wo ist denn konkret das Problem? Jede Seite hat ja ihr eigenes Regelwerk um die Items aus dem Quelltext zu extrahieren.
      Das musst du für deine neuen Seiten anlegen.

      Generell würde ich das ganze Klassenkonzept überdenken. Dein Konstrukt ist nicht erweiterbar.
      Am besten du definierst ein Interface mit den Standardmethoden eines Crawlers und implementierst das dann.

      Quellcode

      1. $this->crawler = array();
      2. function addCrawler(Crawler $c) {
      3. $this->crawler[] = $c;
      4. }
      5. function scrape_sites() {
      6. $return = array();
      7. forach($this->crawler as $c) {
      8. $return[] = $c->crawl();
      9. }
      10. return $return;
      11. }
      Alles anzeigen


      Desweiteren solltest du die Methoden nicht statisch nutzen.