px

PC-Axis file parsing in JavaScript

Px.js

Px.js is a JavaScript library for extracting and manipulating data stored in PC-Axis files. It is intended as a generic solution which can handle any well-formed PC-Axis file.

Px.js is primarily intended for use in a web browser but it can also be used as a Node.js module.

PC-Axis is a file format used for dissemination of statistical information. The format is used by a large number of national statistical organisations to disseminate official statistics. For general information on PC-Axis refer to the PC-Axis web site and for information on the file format specifically, see the PC-Axis file format specification.

Px.js is dependent on the Underscore JavaScript utility library.

Download the minified production version or the development version.

Include Underscore and Px.js in your HTML:

<script type="text/javascript" src="path/to/underscore.js"></script>
<script type="text/javascript" src="path/to/px.js"></script>

then in your JavaScript:

For remote PC-Axis files:

Pass the Px constructor the responseText from an XMLHttpRequest, for example:

var xhr = new XMLHttpRequest();
xhr.onreadystatechange = function() {
    if (xhr.readyState === 4 && xhr.status === 200) {
        var px = new Px(xhr.responseText);
    }
};
 
xhr.open('GET', 'path/to/remote/PC-Axis/file');
xhr.send();

For local PC-Axis files:

Use the FileReader API to pass the file to the Px constructor. For example, assuming a file input tag with the id 'pxfile' in your HTML...

<input type="file" id="pxfile" />

...construct a new Px object in a callback triggered when a new file is selected:

document.getElementById('pxfile').onchange = handlePxfile;
 
var px = {};
function handlePxfile() {
    var reader = new FileReader();
 
    reader.onload = function() {
        return px = new Px(reader.result);
    };
    reader.readAsText(this.files[0]);
}

Install the module with npm:

npm install px

then, in your code:

var Px = require('px'),
    fs = require('fs');
    
fs.readFile('path/to/PC-Axis/file', 'utf8', function(errdata) {
    px = new Px(data);
});
// Constructor 
var px = new Px(pxString);
 
// return values for passed keyword 
var keyword = px.keyword('KEYWORD');
 
// return array containing all keywords 
var keys = px.keywords();
 
// return array containing all variables (STUBs & HEADINGs) 
var vars = px.variables();
 
// return variable at index 0 in variables array 
var variable = px.variable(0);
 
// return index of Region variable in variables array  
var variable = px.variable('Region');
 
// return array of values for passed variable 
// (can take array index or variable name) 
var values = px.values(variable);
 
// return array of value codes for passed variable 
// (can take array index or variable name) 
var codes = px.codes(variable);
    
// return the data value for a passed array of variable values 
var datapoint = px.datum([0,0,0,0]);
 
// return a column of data 
var column = px.dataCol(['*',0,0,0]);
 
// return an associative array of data in the form: {valueName: data} 
var column = px.dataDict(['*',0,0,0]);
 
// Return an array of data objects, one for each datum 
var entries = px.entries();
 
// Remove values and associated data from Px object 
px.truncate([[2,3,4,5],['*'],['*'],['*']]);

A new PC-Axis object is constructed by passing a string containing a PC-Axis file's contents to the Px constructor. This will usually be done in the callback of a FileReader.readAsText() or an XMLHttpRequest (resultText) call, as both of these return a string containing the target file's contents.

var pxString; // String containing PC-Axis file's contents 
var px = new Px(pxString);

The Px constructor parses the PC-Axis file's data and metadata into two attributes, data and metadata, and returns an object equipped with a number of methods to access and manipulate its contents.

Px attributes are not intended to be accessed directly. Data and metadata are generally accessed more easily and more consistently via the object's methods.

metadata

The metadata attribute is an object containing all of the PC-Axis file's metadata. Each of the metadata object's keys is a metadata keyword from the original PC-Axis file, each of its values is an object. Where a keyword in the original PC-Axis file has a single string value (meaning that the value applies to the entire dataset - e.g. the 'TITLE' keyword), then that keyword's value object contains a single key, 'TABLE', the value of which is the string to which that keyword pointed in the original PC-Axis file.

// Return metadata object 
var meta = px.metadata;
 
// Return dataset's TITLE value as string 
var title = px.metadata.TITLE.TABLE;
 
// Return object with variables as keys and arrays of codes as values 
var codes = px.metadata.CODES;
 
// Return array of codes for Region variable 
var regCodes = px.metadata.CODES.Region;

data

The data attribute is an array containing all of the values following the DATA keyword in the original PC-Axis file. The data are stored as strings. Missing or obfuscated data values (encoded usually as a series of dots ("..") or a dash ("-") in Pc-Axis files) are stored unchanged in the data object.

// Return array of data 
var data = px.data;

keyword(String)

The keyword method returns the value of the passed keyword. If the keyword holds a value which refers to the entire table (such as the 'TITLE' keyword), then that value is returned as a string. If the keyword passed to the method has different values for each variable (for example, the 'VALUES' and 'CODES' keywords will have a different list of values for each variable), then a reference to the object holding the entire set of values is returned by the method.

// Return the value of the title keyword as a string 
var title = px.keyword('TITLE');
 
// Return object with variables as keys and arrays of codes as values 
var codes = px.keyword('CODES');

keywords()

The keywords method returns an array containing all of the metadata keywords associated with the PC-Axis dataset represented by the object.

// Return an array of keywords 
var metaKeys = px.keywords();

title()

The title method is a convenience method which returns the TITLE attribute of the dataset. It is equivalent to metadata.TABLE.TITLE.

// Return TITLE of dataset 
var title = px.title();

variables()

The variables method returns an array containing the names of all of the variables present in the current PC-Axis file. The variables in the returned array are ordered as they are in the PC-Axis file; first the STUB variables, followed by the HEADING variables.

// Return an array of variable names 
var variables = px.variables();

variable(String or Array-Index)

When passed an array index the variable method returns the variable name at that index in an array composed of [STUB variables, HEADING variables] - i.e. the array returned by the variables method.

When passed a string (containing a variable name) this method returns the index in the variables array at which the named variable occurs.

// Return the name of the variable at position 0 in the variables array 
var varName = px.variable(0);
 
// Return the position (array index) of the 'Region' variable in the variables array 
var idx = px.variable('Region');

valCounts()

Returns an array of value counts. Each element in the array is the number of possible values in the current PC-Axis dataset for the variable with the same index (in the variables array) as the element.

// Return array of value counts 
var counts = px.valCounts();

values(String or Array-Index)

When passed the name of a variable or the index of a variable in the variables array the values method returns an array containing the names of all possible values for that variable.

The order of the values in the array returned by this method matches the order in the original file.

// Return an array of all possible values (by name) for the variable at position 0 in the variables array 
var vals = px.values(0);
 
// Return an array of possible values (by name) for the 'Region' variable 
var vals = px.values('Region');

codes(String or Array-Index)

When passed the name of a variable or the index of a variable in the variables array the codes method returns an array containing the codes for all possible values for that variable.

The order of the value codes in the array returned by this method matches the order in the original file.

// Return an array of all possible values (by code) for the variable at position 0 in the variables array 
var codes = px.codes(0);
 
// Return an array of possible values (by code) for the 'Region' variable 
var codess = px.codes('Region');

value(String, String)

Returns the value name corresponding to the value code and variable name passed.

// Return the value for the 'Region' variable, for which the code is '01' 
var code = value('01', 'Region');

code(String, String)

Returns the code corresponding to the value name and variable name passed.

// Return the code for the 'Region' variable named 'State' 
var code = value('State', 'Region');

datum(Array-of-Array-Indices)

The datum method takes an array of value indices and returns the data value corresponding to the particular combination of values represented by those indices. The number of elements in the passed array must be equal to the number of variables in the current PC-Axis dataset. Each element must be a positive integer no greater than the number of possible values for the variable it represents.

For example, consider a dataset containing two variables, each of which has two possible values:

// Two variables 
px.variables();    // ['Sex', 'Year'] 
 
// Each variable has two possible values 
px.values('Sex');  // ['Male', 'Female'] 
px.values('Year'); // ['2011', '2012'] 
 
px.datum([0,0]);  // Data value for males in 2011 
px.datum([0,1]);  // Data value for males in 2012 
px.datum([1,0]);  // Data value for females in 2011 
px.datum([1,1]);  // Data value for females in 2012 

dataCol(Array-of-Array-Indices)

The dataCol method is similar to the datum method except that one of the elements in the passed array is replaced with a '*' character and rather than returning a single datum it returns an array of data containing a datum for each possible value for the variable represented by the '*'.

For example, consider a dataset containing two variables, each of which has two possible values:

// Two variables  
px.variables();       // ['Sex', 'Year']  
 
// Each variable has two possible values  
px.values('Sex');     // ['Male', 'Female']  
px.values('Year');    // ['2011', '2012']  
 
px.dataCol(['*',0]);  // [Data value for males in 2011, Data value for females in 2011]  
px.dataCol(['*',1]);  // [Data value for males in 2012, Data value for females in 2012] 
px.dataCol([0,'*']);  // [Data value for males in 2011, Data value for males in 2012] 
px.dataCol([1,'*']);  // [Data value for females in 2011, Data value for females in 2012] 

dataDict(Array-of-Array-Indices)

The dataDict method takes and array of value indices, one of which is replaced with a '*' and returns an object, the keys of which are all of the possible values for the variable represented by the '*', and the values are the data values associated with the key value and the particular combination of other value indices in the passed array.

For example, consider a dataset containing two variables, each of which has two possible values:

// Two variables  
px.variables();       // ['Sex', 'Year']  
 
// Each variable has two possible values  
px.values('Sex');     // ['Male', 'Female']  
px.values('Year');    // ['2011', '2012']  
 
px.dataDict(['*',0]); // { 'Male': Data value for males in 2011,  
                      //   'Female': Data value for females in 2011 } 
                       
px.dataDict(['*',1]); // { 'Male': Data value for males in 2012,  
                      //   'Female': Data value for females in 2012 } 
 
px.dataDict([0,'*']); // { '2011': Data value for males in 2011,  
                      //   '2012': Data value for males in 2012 } 
 
px.dataDict([1,'*']); // { '2011': Data value for females in 2011,  
                      //   '2012': Data value for females in 2012 } 

entries()

The entries method takes no arguments and returns an array of objects containing one object for each datum in the dataset. Each object contains a key for each variable in the dataset, as well as a value for that key, and an additional key, "num", the value of which is the data value associated with that particular combination of variable values.

For example, consider a dataset containing two variables, each of which has two possible values:

// Two variables  
px.variables();       // ['Sex', 'Year']  
 
// Each variable has two possible values  
px.values('Sex');     // ['Male', 'Female']  
px.values('Year');    // ['2011', '2012']  
 
px.entries(); // [  
              //   { 'Sex': 'Male', 'Year': '2011', 'num': Data value for males in 2011 }, 
              //   { 'Sex': 'Male', 'Year': '2012', 'num': Data value for males in 2012 }, 
              //   { 'Sex': 'Female', 'Year': '2011', 'num': Data value for females in 2011 }, 
              //   { 'Sex': 'Female', 'Year': '2012', 'num': Data value for females in 2012 } 
              // ] 

truncate(Array-of-Arrays-of-Array-Indices)

The truncate method removes values and associated data from the Px object. The method takes an array of arrays. Each nested array consists of a list of indices of the values to be kept for the variable represented by that array. A '*' in any variable's array indicates that all of that variable's values should be retained. This method alters the current Px object and returns nothing. Its intended use is to allow a very large dataset, only some of which is required, to be reduced to a more manageable size.

For example, consider a dataset containing three variables, each of which has three possible values:

// Two variables  
px.variables();         // ['Sex', 'Year', 'Age Group']  
 
// Each variable has two possible values  
px.values('Sex');       // ['Male', 'Female', 'Both Sexes']  
px.values('Year');      // ['2010', '2011', '2012'] 
px.values('Age Group'); // ['<16', '17-64', '65+'] 
 
// Retain 'Sex': 'Male', 'Female'; 'Year': '2012'; All Age Groups 
px.truncate([[0,1],[2],['*']]);
 
// OR 
 
// Retain 'Sex': 'Both Sexes'; All Years; 'Age Group': '<16', '65+' 
px.truncate([[2],['*'],[0,2]]);
 

Copyright (c) 2012 Fiachra O'Donoghue
Licensed under the MIT license.