Algorithm to visualise data (multidimensional scaling?) in PHP project

477 Views Asked by At

I have small matrix of +-100 points/values, results of test and their distances (0-10, 10 is the closest) to each other: http://vis.arcs.cz.

I would like to visualize it in 2D to quickly find a groups of close values. So I need to process this matrix and get the coordinates of points in 2D.

The way is propably multidimensional scaling but I wasnt able to find an algorithm, library or extension nor use the math formulas into PHP code.

Im googling last two days and the closest results of it is http://www.php.net/manual/en/lapack.leastsquaresbysvd.php - ? (cant share more links as novice)

I'll be grateful for any solution applicable in php project (compiled MathLab code in C++...).

1

There are 1 best solutions below

0
On
index.php:

<?php
require("data.php");
require("Mds.php");
$mds = new Mds();
$mds->prepareData($data);
//cislo udava pocet iteraci algoritmu,
//idealne by melo mit hodnotu pocetUzlu na druhou, prakticky muze byt o dost mensi, 5 az 10 krat pocet uzlu.
$mds->scale(100);
//mira splneni podminek - vraci cislo < mensi nez 1. Cim blizsi k 1, tim lepe vyhovuji vzdalenosti
//z vypocteneho rozlozeni zadanym vzdalenostem.
//v nemetrickych datech ovsem dochazi rychle k poklesu az do zapornych hodnot, protoze ve 2D neexistuje
//pro nektere body zadne spravne misto.
echo $mds->getRSquared();
?>
<!DOCTYPE HTML>
<html>
<head>
<script type="text/javascript" src="http://canvasjs.com/assets/script/canvasjs.min.js"></script>
<script type="text/javascript">
window.onload = function () {
var chart = new CanvasJS.Chart("chartContainer",
{
title:{
text: "MDS",
fontFamily: "arial black",
fontColor: "black"
},
axisX: {
title:"",
titleFontFamily: "arial"

},
axisY:{
title: "",
titleFontFamily: "arial",
titleFontSize: 12
},

data: [
{
type: "scatter",
toolTipContent: "<span style='"'color: {color};'"'><strong>{name}</strong></span> x: {y}, y: {y}",
dataPoints:

<?php
echo $mds->printCoords('{x: %X, y: %Y, name: "%LABEL"}');
?>
}]
});

chart.render();
}
</script>

</head>
<body>
<div id="chartContainer" style="height: 600px; width: 600px;">
</div>

<a href="mds.zip">mds.zip</a>
</body>
</html>


data.php:



$data = array( 
array ("Jablko", "Voda", 2),
array("Jablko", "Ohen", 7),
array("Jablko", "Cervena", 3),
array("Jablko", "Zelena", 2),

array("Voda", "Ohen", 8),
array("Voda", "Cervena", 8),
array("Voda", "Zelena", 2),

array("Ohen", "Cervena", 1),
array("Ohen", "Zelena", 5),

array("Cervena", "Zelena", 3)
);


Mds.php:

<?php

class Mds {
//node - pole tvaru [[x, y, label], ...]
public $nodes = array();

//tvaru [source][target]=distance
public $givenDistances = array();
public $currentDistances = array();


public function prepareData($data) {
$nodesMap = array();
$xxxx = 10000;
$xxx= 1000;

// $xxxx = 5000;
// $xxx = 600;

foreach($data as $link) {
$source = $link[0];
$target = $link[1];
if(!isset($nodesMap[$source])) {
$nodesMap[$source] = true;
$this->nodes[]=array((float) rand(1,$xxxx) / $xxx, (float) rand(1,$xxxx) / $xxx, $source);
}
if(!isset($nodesMap[$target])) {
$nodesMap[$target] = true;
$this->nodes[]= array((float) rand(1,$xxxx) / $xxx, (float) rand(1,$xxxx) / $xxx, $target);
}
}
//vytvori matici pro ulozeni vzdalenosti
foreach($this->nodes as $node) {
$this->givenDistances[$node[2]] = array();
$this->currentDistances[$node[2]] = array();
}
foreach($data as $link) {
$source = $link[0];
$target = $link[1];
$distance = $link[2];
$this->givenDistances[$source][$target] = $distance;
$this->givenDistances[$target][$source] = $distance;
}
}

protected function countCurrentDistances() {
$mean = 0;
$i = 0;

foreach($this->nodes as $nodeA) {
foreach($this->nodes as $nodeB) {
$dist = sqrt(($nodeB[0]-$nodeA[0])*($nodeB[0]-$nodeA[0]) + ($nodeB[1]-$nodeA[1])*($nodeB[1]-$nodeA[1]));
$this->currentDistances[$nodeA[2]][$nodeB[2]] = $dist;

if($nodeA[2]!==$nodeB[2]) {
$mean += $this->givenDistances[$nodeA[2]][$nodeB[2]];
}
}
}
// echo "<pre>";
// var_dump($this->currentDistances);
// echo "</pre>";
$check = array();

$nodesCount = count($this->nodes);
$this->mean = $mean/($nodesCount*$nodesCount);
}

public function getRSquared() {
$this->countCurrentDistances();

$sum = 0;
$SStot = 0;
$SSres = 0;
foreach($this->nodes as $nodeA) {
foreach($this->nodes as $nodeB) {
$aLab = $nodeA[2];
$bLab = $nodeB[2];
if($aLab===$bLab) {
continue;
}

$given = $this->givenDistances[$aLab][$bLab];
$computed = $this->currentDistances[$aLab][$bLab];

$SSres+=(($given-$computed)*($given-$computed));
$SStot+= ($given-$this->mean)*($given-$this->mean);
}
}

return 1 - ($SSres/$SStot);
}

protected function iterate($inverseCoefStrenth=1) {
for($i=0; $i<count($this->nodes); $i++) {
$nodeA = $this->nodes[$i];
$move = array(0,0);
$moves = 0;

for($j=0; $j<count($this->nodes); $j++) {
if($j===$i) {
continue;
}

$nodeB = $this->nodes[$j];
$dist = $this->givenDistances[$nodeA[2]][$nodeB[2]];

$AB = array($nodeB[0]-$nodeA[0], $nodeB[1]-$nodeA[1]);
$lAB = sqrt(($AB[0]*$AB[0])+($AB[1]*$AB[1]));

$coef = ($lAB - $dist)/$lAB;

$AA1 = array($AB[0]*$coef, $AB[1]*$coef);

$moves++;
$move[0]+=$AA1[0];
$move[1]+=$AA1[1];
}

if($moves>0) {
$resultingMoveX = $move[0]/($moves*$inverseCoefStrenth);
$resultingMoveY = $move[1]/($moves*$inverseCoefStrenth);
$this->nodes[$i][0]+=$resultingMoveX;
$this->nodes[$i][1]+=$resultingMoveY;
}
}
}


public function scale($iterations, $coefIncrease=0) {
$basicCoef=1;
for($i=0; $i<$iterations; $i++) {
$this->iterate($basicCoef);
//echo $this->getRSquared();
//echo " | ";

$basicCoef+=$coefIncrease;
}
}

public function coordsAsString() {
$coords = array();
foreach($this->nodes as $node) {
$coords[]="[{$node[0]}, {$node[1]}]";
}

$res = join(", ",$coords);
return "[$res]";
}

public function printCoords($pattern='[%X,%Y,"%LABEL"]') {
$coords = array();
foreach($this->nodes as $node) {
$x = $node[0];
$y = $node[1];
$label = $node[2];
$res = str_replace('%X', $x, $pattern);
$res = str_replace('%Y', $y, $res);
$res = str_replace('%LABEL', $label, $res);
$coords[]=$res;
}

$res = join(", ",$coords);
return "[$res]";
}

public function pointsCoords() {
$coords = array();
foreach($this->nodes as $node) {
$res['x'] = round($node[0]*100);
$res['y'] = round($node[1]*100);
$res['label'] = $node[2];
$coords[] = $res;
}
return $coords;
}

}