NOESIS vs. NetworkX

Simple performance test using a Wikipedia GML network (available at http://spark-public.s3.amazonaws.com/sna/other/wikipedia.gml):


NETWORK STATISTICS
- Nodes: 27475
- Links: 85729
Degree distributions
- Out-degrees: [n=27475 min=0.0 max=565.0 avg=3.1202547770700635 dev=9.038219683086334]
- In-degrees:  [n=27475 min=0.0 max=367.0 avg=3.1202547770700635 dev=8.99990229087909]
Node of maximum out-degree: 565.0 out-links
- id: 8436
- wikiid: 1807178
- label: List of mathematics articles (S)
Node of maximum in-degree: 367.0 in-links
- id: 10807
- wikiid: 7250299
- label: Geometry
Betweenness
[n=27475 min=2.0 max=2.1696583120297905E7 avg=79690.76047315744 dev=404883.40160651755]
Node of maximum betweenness: 2.1696583120297905E7
- id: 12533
- wikiid: 5176
- label: Calculus

Time: 77067 ms

Same experiment running NetworkX...

40 minutes !!!


NetworkX source code

# see http://networkx.lanl.gov/ for a tutorial on NetworkX

import networkx as nx

import sys

from plfit import *

# Installation: pyparsing @ http://sourceforge.net/projects/pyparsing/files/pyparsing/pyparsing-1.5.6/pyparsing-1.5.6.win32-py2.7.exe/download


# your GML file
filename = 'wikipedia.gml'

# read in the graph using networkX
G = nx.read_gml(filename,relabel=True)

# Graph size

num_nodes = G.number_of_nodes()
print 'number of nodes: ' + str(num_nodes)

num_edges = G.number_of_edges()
print 'number of edges: ' + str(num_edges)
sys.stdout.flush()

# Degree distribution

degrees = list(G.degree().values())
dmax = max(degrees)
dmin = min(degrees)

print 'Degree distribution: [' + str(dmin) + ',' + str(dmax) + ']'
sys.stdout.flush()


# Power-law

# fit the power-law distribution. If $D < 0.05, the Kolmogorov Smirnov test tells 
# us that the distribution is power-law.
# Also, look for the estimated power-law exponent $alpha and $xmin (the point at 
# which you should start fitting the distribution)
# make sure you have executed all the code in plfit.R before running this function
# an explanation is here: http://tuvalu.santafe.edu/~aaronc/powerlaws/
# you can download the file directly here: http://tuvalu.santafe.edu/~aaronc/powerlaws/plfit.r
# You will also need the VGAM R package which you can download via the installer

[alpha, xmin, L]= plfit(degrees)

print 'Power-law degree distribution: ' + str(alpha)
print ' - xmin: ', xmin
print ' - L: ', L
sys.stdout.flush()

# In-degree
# MAX: Geometry

indegree = nx.in_degree_centrality(G)

maxkey,maxvalue= max(indegree.iteritems(), key=lambda x:x[1])
print 'Max in-degree: ' + str(maxkey) + ' ' + str(maxvalue)
sys.stdout.flush()

# Out-degree
# MAX: List of mathematics articles

outdegree = nx.out_degree_centrality(G)

inverse = [(value, key) for key, value in outdegree.items()]
print 'Max out-degree: ', max(inverse)[1], max(inverse)[0]
sys.stdout.flush()

# Betweenness centrality
# MAX = "Calculus"
# WARNING! Slow... 40 minutes for a 27475-node network with 85729 links !!!

between = nx.betweenness_centrality(G)

maxkey,maxvalue= max(between.iteritems(), key=lambda x:x[1])
print 'Max betweenness: ' + str(maxkey) + ' ' + str(maxvalue)
print ' - In-degree: ', indegree[maxkey]
print ' - Out-degree: ', outdegree[maxkey]
sys.stdout.flush()


NOESIS source code

package noesis.ui.console;

import java.io.*;

import ikor.util.Benchmark;

import noesis.Network;
import noesis.algorithms.traversal.ConnectedComponents;
import noesis.Attribute;
import noesis.AttributeNetwork;
import noesis.analysis.NodeScore;
import noesis.analysis.structure.Betweenness;
import noesis.analysis.structure.InDegree;
import noesis.analysis.structure.OutDegree;

/**
 * Network statistics.
 * 
 * @author Fernando Berzal
 */

public class NetworkStats 
{

  public static void main(String[] args)
    throws IOException
  {
    
    if (args.length==0) {
      
      System.err.println("NOESIS Network Statistics:");
      System.err.println();
      System.err.println("  java noesis.ui.console.NetworkStats <file>");
      
    } else {
      
      Benchmark  crono = new Benchmark();
      
      crono.start();
      
      Network net = NetworkUtilities.read(args[0]);
      
      NetworkUtilities.printNetworkInformation(net);
      
      // Degree distributions
      
      OutDegree outDegrees = new OutDegree(net);
      InDegree  inDegrees = new InDegree(net);
      
      outDegrees.compute();
      inDegrees.compute();
      
      System.out.println("Degree distributions");
      System.out.println("- Out-degrees: "+outDegrees.getResult());
      System.out.println("- In-degrees:  "+inDegrees.getResult());
      
      if (net instanceof AttributeNetwork) {
                
        System.out.println("Node of maximum out-degree:");
        printNode ( (AttributeNetwork) net, outDegrees.getResult().maxIndex());        

        System.out.println("Node of maximum in-degree:");
        printNode ( (AttributeNetwork) net, inDegrees.getResult().maxIndex());
      }
            
      // Betweenness 

      Betweenness betweenness = new Betweenness(net);
      betweenness.compute();
      System.out.println("Betweenness");
      System.out.println(betweenness.getResult());

      if (net instanceof AttributeNetwork) {
        System.out.println("Node of maximum betweenness:");
        printNode ( (AttributeNetwork) net, betweenness.getResult().maxIndex());        
      }

      crono.stop();
      
      System.out.println();
      System.out.println("Time: "+crono);
    }

  }


  public static void printNode (AttributeNetwork net, int index)
  {
    if (index<net.size()) {
      //System.out.println("- index: "+index);
      System.out.println("- out-degree: "+net.outDegree(index)+" out-links");
      System.out.println("- in-degree: "+net.inDegree(index)+" in-links");

      for (int i=0; i<net.getNodeAttributeCount(); i++) {
        Attribute attribute = net.getNodeAttribute(i);
        System.out.println("- "+attribute.getID()+": "+attribute.get(index));
      }
    }    
  }
}