array('^G' => 'Garçon', '^F' => 'Fille'), 'partie' => array( 'cheveu' => 'cheveux', '(ventr|abdomen)' => 'ventre', '^G.*(torce|torse)' => 'torse', '^G.*(penis|bite|sexe|queu)' => 'pénis', '^F.*(sein|nichon)' => 'seins', '^F.*(chatte|vulve|sexe|vagin)' => 'vulve', 'poitrin' => 'poitrine', '(anch|tail)' => 'hanches', '(paume|main)' => 'mains', '(levre|leve)' => 'lèvres', '(cou|c0u)' => 'cou', '(do$|do |dos)' => 'dos', 'fess' => 'fesses', 'visag' => 'visage', 'oreil' => 'oreilles', 'joue' => 'joues', 'coude', 'bra' => 'bras', 'epaul' => 'épaules', 'cuiss' => 'cuisses', 'nuqu' => 'nuque', 'bouch' => 'bouche', 'taill' => 'taille', 'jamb' => 'jambes', 'bassin', 'genou' => 'genoux', 'nombril', 'chevil' => 'chevilles', ), ); function clean_comment($str) { $str = trim($str); $str = str_replace(''', "'", $str); $str = html_entity_decode($str); $str = strtr($str, "\x82\x83\x84\x85\x86\x87\x89\x8a\x8b\x8c\x8e\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9e\x9f", "'f\".**^\xa6<\xbc\xb4''\"\"---~ \xa8>\xbd\xb8\xbe"); $str = strtr($str, array('’' => "'")); $str = strtr($str, "ÀÁÂÃÄÅàáâãäåÇçÒÓÔÕÖØòóôõöøÈÉÊËèéêëÌÍÎÏìíîïÙÚÛÜùúûü¾ÝÿýÑñ", "AAAAAAaaaaaaCcOOOOOOooooooEEEEeeeeIIIIiiiiUUUUuuuuYYyyNn"); $str = str_replace('Æ','AE',$str); $str = str_replace('æ','ae',$str); $str = str_replace('¼','OE',$str); $str = str_replace('½','oe',$str); //$str = preg_replace('/[^A-Za-z0-9_\s\'\:\/\[\]-]/', '', $str); //$str = preg_replace('/[\s\'\:\/\[\]-]+/',' ',trim($str)); //$res = str_replace(' ','-',$str); return $str; } $current = 1; $scores = array(); $words = array(); $total = 0; while ($current) { $page = $id_article.'_comment_'.$current.'.html'; $url = 'http://'.$pseudo.'.skyrock.com/'.$page; if (!file_exists('/tmp/'.$page)) { copy($url, '/tmp/'.$page); } $content = file_get_contents('/tmp/'.$page); if (!isset($nb_pages)) { if (!preg_match('!!', $content, $match)) die("Impossible de trouver le nombre de pages"); $nb_pages = (int)$match[1]; } preg_match_all('!

([^<]+)

!', $content, $match, PREG_SET_ORDER); foreach ($match as &$row) { $comment = clean_comment($row[1]); $genre = 'unknow'; $total++; foreach ($criteres as $name=>$crits) { foreach ($crits as $check=>$crit_name) { if (is_int($check)) $check = $crit_name; if (preg_match('§'.$check.'§i', $comment)) { if ($name == 'genre') { $genre = $crit_name; $scores[$name][$crit_name] += 1; } else { $scores[$name][$crit_name] += 1; $scores['g_'.$genre][$crit_name] += 1; } } } } //echo $comment . "
"; $comment = preg_replace('![^a-z0-9 ]+!i', '', $comment); $comment = preg_replace('![ ]+!', ' ', $comment); $c_words = explode(' ', strtolower($comment)); foreach ($c_words as $word) $words[$word] += 1; } if ($current > 650) { echo '
';
        print_r($scores);
        break;
    }

    $current++;

    if ($current > $nb_pages)
        $current = false;
}

echo '
';
arsort($words);
print_r($words);

echo '
'; print_r($scores); echo '
'; ksort($scores['partie']); foreach ($scores['partie'] as $name=>$nb) { echo ' "; } echo '
Partie Total Garçons Filles
'.$name.' '; echo round(($nb / $total) * 100, 1); echo " "; echo round(($scores['g_Garçon'][$name] / $scores['genre']['Garçon']) * 100, 1); echo " "; echo round(($scores['g_Fille'][$name] / $scores['genre']['Fille']) * 100, 1); echo "
-- Nombre de sondés '.$total.' '.$scores['genre']['Garçon'].' '.$scores['genre']['Fille'].'
'; ?>