Friday, June 7, 2013

Matlab Fix Style tool to fix formating of code

Below is a script that I wrote to clean up the style of a matlab file.  It inserts whitespace around various operators and structures such as brackets.  This is an essential tool for refactoring old matlab code.



function FixStyle(inFileName, replaceOldWithNew )
    %% This is a collection of rules to re-style the code in an m file.
    % This was developed for refactoring legacy code that I have inherited.
   
    % inputs
    %  - inFileName - The file to be restyled
    %  - replaceOldWithNew - true/false - if true then rename the inFile to
    %  inFile.old.m and replace with the new file.
    %
    % (c) Duncan Blair 2013.
   
    if isempty(inFileName)
        error('No file name provided. Exiting...');
    end
   
    patten = '\.m';
    replaceWith = '\.cleaned\.m';
    outFileName = regexprep(inFileName, patten, replaceWith);

    %try to open the file and read it in line by line
    inFileID = fopen(inFileName,'r');
    outFileID = fopen(outFileName, 'w');
   
    try
        %process the file line by line
        textLine = fgets(inFileID);
        while ischar(textLine)
           
            %reformat the line
            textLine = PrettyFormat(textLine);
           
            % write it to the new file
            fwrite(outFileID, textLine);
           
            textLine = fgets(inFileID);
        end %while
       
        %process the file by block
        %frewind(inFileID);
       
        fclose(inFileID); %close the in file
        fclose(outFileID);%close the out file
       
    catch err
       
        fclose(inFileID); %close the in file
        fclose(outFileID);%close the out file
        rethrow(err);
    end
   
    %change the filenames after success
    if strcmp(replaceOldWithNew, 'true')
        patten = '\.m';
        replaceWith = '\.old\.m';
        newname = regexprep(inFileName, patten, replaceWith);
        movefile(inFileName, newname);
        movefile(outFileName, inFileName);
    end
   
    return;
   
end

%% Apply the formatting rules to a line of text
function cleanLine = PrettyFormat(dirtyLine)
   
    %if its a comment line then ignore it
    [code, comment] = SplitCommentLine(dirtyLine);
   
    if isempty(code)%early exit for comment-only lines
        cleanLine = comment;
        return;
    end
   
    %Relational Operators
    code = Fix2CharOperator(code, '<=');
    code = Fix2CharOperator(code, '>=');
    code = Fix2CharOperator(code, '==');
    code = Fix2CharOperator(code, '~=');
    code = Fix1CharOperator(code, '>');
    code = Fix1CharOperator(code, '<');
   
    %short circut logical operators
    code = Fix2CharOperator(code, '\|\|');
    code = Fix2CharOperator(code, '&&');
   
    %Element wise logical operators
    code = Fix1CharOperator(code, '&');
    code = Fix1CharOperator(code, '\|');
    code = Fix1CharOperator(code, '~'); %may require special handling
   
    %arithmetic operators
    code = Fix1CharOperator(code, '+');
    code = Fix1CharOperator(code, '-');
    code = Fix1CharOperator(code, '/');
    code = Fix2CharOperator(code, '\./');
    code = Fix1CharOperator(code, '*');
    code = Fix2CharOperator(code, '\.\*');
   
    code = Fix1CharOperator(code, '\\');
    code = Fix2CharOperator(code, '\.\\');
   
    code = Fix1CharOperator(code, '\^');
    code = Fix2CharOperator(code, '\.\^');
   
    %cl = Fix1CharOperator(cl, '\'''); %messes up string literals
    %cl = Fix2CharOperator(cl, '.\'''); %messes up string literals
   
    %assignment operator
    code = Fix1CharOperator(code, '\=');
   
    %initialise an array
    code = Fix2CharOperator(code, '\[\]');
   
    %Brackets
    % []
    code = FixBrackets(code, '\[', '\]');
    %{}
    code = FixBrackets(code, '\{', '\}');
    %()
    code = FixBrackets(code, '\(', '\)');
   
    %Special characters
    % ! @ ; : ' ...  .. . .()
   
    %string literals
    code = FixInvertedCommaPairs(code);
   
    code = FixComma(code);
   
    %Fix any leading whitespace before semi-colons
    %these may have been introduced by earlier operations
    code = FixSemiColon(code);
   
    %append any trailing comment
    cleanLine = [code comment];
end

%% Rules for String Literals
function cleanLine = FixInvertedCommaPairs( dirtyLine )
   
    % Add whitespace before string literal
    patten = '(\S)(\''[^\'']*\'') ';
    replaceWith = '$1 $2';
    cleanLine = regexprep(dirtyLine, patten, replaceWith);
   
    % Add whitespace after string literal
    patten = '(\s\''[^\'']*\'')(\S)';
    replaceWith = '$1 $2';
    cleanLine = regexprep(cleanLine, patten, replaceWith);
   
end

%% Rules for commas
function cleanLine = FixComma( dirtyLine )
    %put a trailing space after commas
    patten = '(\,)(\S)';
    replaceWith = '$1 $2';
    cleanLine = regexprep(dirtyLine, patten, replaceWith);
   
    %remove leading white spaces before comma
    patten = '\s*(\,)';
    replaceWith = '$1';
    cleanLine = regexprep(cleanLine, patten, replaceWith);
   
end

%% Rules for Semi-Colons
function cleanLine = FixSemiColon( dirtyLine )
   
    %put a trailing space after semi-colon
    patten = '(\;)(\S)';
    replaceWith = '$1 $2';
    cleanLine = regexprep(dirtyLine, patten, replaceWith);
   
    %remove leading whitespaces before semi-colon
    patten = '\s*(\;)';
    replaceWith = '$1';
    cleanLine = regexprep(cleanLine, patten, replaceWith);
   
end

%% Rules for single character operators
function cleanLine = Fix1CharOperator( dirtyLine, op )
   
    %add both whitespace
    patten = ['([^' op '\s])(' op ')([^' op '\s)'];
    replaceWith = '$1 $2 $3';
    cleanLine = regexprep(dirtyLine, patten, replaceWith);
   
    %add leading whitespace
    patten = ['([^' op '\s\.<>~])(' op '{1})'];
    replaceWith = '$1 $2';
    cleanLine = regexprep(cleanLine, patten, replaceWith);
   
    %add trailing whitespace
    patten = ['(' op '{1})([^' op '\s\=\*\^/\.])'];
    replaceWith = '$1 $2';
    cleanLine = regexprep(cleanLine, patten, replaceWith);
end

%% Rules for two-character operators
function cleanLine = Fix2CharOperator( dirtyLine, op )
   
    %add both whitespace
    patten = ['(\S)(' op ')(\S)'];
    replaceWith = '$1 $2 $3';
    cleanLine = regexprep(dirtyLine, patten, replaceWith);
   
    %add leading whitespace
    patten = ['(\S)(' op ')'];
    replaceWith = '$1 $2';
    cleanLine = regexprep(cleanLine, patten, replaceWith);
   
    %add trailing whitespace
    patten = ['(\s)(' op ')(\S)'];
    replaceWith = '$1$2 $3';
    cleanLine = regexprep(cleanLine, patten, replaceWith);
   
end

%% Rules for brackets
function cleanLine = FixBrackets( dirtyLine, leftBracket, rightBracket )
   
    % no whitespace after left
    patten = ['(' leftBracket ')(?=\S)'];
    replaceWith = '$1 ';
    cleanLine = regexprep(dirtyLine, patten, replaceWith);
   
    % no whitespace before right
    patten = ['(\S)(?=' rightBracket ')'];
    replaceWith = '$1 ';
    cleanLine = regexprep(cleanLine, patten, replaceWith);
   
end

%% Divide lines into code and trailing comments
function [code, comment] = SplitCommentLine(line)
   
    k = strfind(line, '%');
   
    if isempty(k)
        code = line;
        comment = '';
    else
        startOfComment = k(1);
        code = line(1:startOfComment - 1);
        comment = line(startOfComment:end);
    end;
end