Dariusz on Software

Methods and Tools

About This Site

Software development stuff

Archive

Static verificaiton tool for web2py templates
Sun, 15 Jan 2012 23:09:24 +0000

Web2Py is a full-stack Python web framework that can be compared to Django, but is easier to learn due to convention-over-explicit-statement preference. In this article I'll check how static verification techniques developed by me for many different environments (JSP, Django templates, TAL, ...) can be applied for web2py environment.

Static verification means locating simple bugs without running application thus very high (>95%) testing coverage (and high related cost) is not required. Instead with trying to cover by tests every possible screen/workflow/code line/... we can scan all codebase and search for some constraints. Most of them (based on my experience) are static - do not depend on runtime data thus can be effectively checked without running an application.

Here's short example of web2py template language:

<a href="{{=URL('books','show')}}">...</a>

As you can see web2py will substitute {{=<python-expression>}}s by evaluated result. In this example URL points to existing module controllers/books.py and function inside this module named 'show'. I assume you see the problem here: one can select undefined module / function and it will result in a runtime error.

First example is purely static: refence (URL('books','show')) will not change during runtime, neither the source code. Then our static checker might be applied succesfully: check if all URL's in all *.html files have proper functions defined in source code.

Technical solution can be composed to the following steps:

  • locating all resources to check: scanning given directory in filesystem tree
  • locating all interesting fragments in HTML file: I used regexp with arguments to easily extract interesting data
  • locating functions in *.py code: because controllers are not plain python modules (expects some data in global namespace) I decided just to scan them textually

Another check that can be done is references inside HTML files (to CSS resources, JS files, ...). This also can be automated:

<script type="text/javascript" src="svgcanvas.min.js"></script>

Source code refactorings might break your links/references and static scan might ensure you are not breaking application by refactoring.

Complete source code for URL() / SRC= / HREF= checker:

import sys

sys.path.append("web2py/applications")
sys.path.append("web2py")

import os
import copy
import re
import string
from gluon.globals import *
from gluon.http import *
from gluon.html import *

RE_SRC = re.compile(r'src *= *"([^"]*)"')
RE_HREF = re.compile(r'href *= *"([^"]*)"')
RE_URL = re.compile(r'{{=URL\(\'([^\']*)\'')
RE_URL2 = re.compile(r'{{=URL\(\'([^\']*)\' *, *\'([^\']*)\'')

FILENAME= None
FNR = 0

request = Request()

def report_error(s):

    print "%s:%d: %s" % (FILENAME, FNR, s)

def check_src_exists(arg):

    if arg[0] == "{":
        # variable value
        return

    elif arg[0].find("{{"):
        # skip this URL
        pass

    elif arg[0] == "/":
        # absolute file
        fullPath = "web2py/applications" + "/" + arg
        if not os.path.exists(fullPath):
            report_error("file %s doesn't exists" % fullPath)

    else:
        # relative file
        fullPath = os.path.dirname(FILENAME) + "/" + arg
        if not os.path.exists(fullPath):
            report_error("file %s doesn't exists" % fullPath)

def check_href_exists(arg):

    #print arg
    if arg.startswith("{{="):
        pass
    elif arg.find("{{") > 0:
        pass
    elif arg.startswith("/"):
        if not os.path.exists("web2py/applications" + arg):
            report_error("absolute file %s doesn't exists" % arg)
    elif arg.startswith("http://"):
        # external link, do not check
        pass
    elif arg.startswith("https://"):
        # external link, do not check
        pass
    elif arg.startswith("mailto:"):
        # external link, do not check
        pass
    elif arg.startswith("javascript:"):
        # external link, do not check
        pass
    elif arg.find("#") == 0:
        # anchor, skip
        pass
    else:
        fullPath = os.path.dirname(FILENAME) + "/" + arg
        if not os.path.exists(fullPath):
            report_error("relative file %s doesn't exists" % fullPath)

def templatePathToPythonPath(templatePath):

    return string.join(templatePath.replace("/views/", "/controllers/").split("/")[:-1], "/") + ".py"

def eq(got, expected):
    if got != expected:
        print "got:'%s' != expected:'%s'" % (got, expected)
        return False
    return True

assert eq(templatePathToPythonPath(
    "web2py/applications/ad/views/default/details.html"),
    "web2py/applications/ad/controllers/default.py")
assert eq(templatePathToPythonPath(
    "web2py/applications/ad/views/default/index.html"),
    "web2py/applications/ad/controllers/default.py")
assert eq(templatePathToPythonPath(
    "web2py/applications/examples/views/ajax_examples/index.html"),
    "web2py/applications/examples/controllers/ajax_examples.py")

name_to_contents = {}
def get_file_contents(fileName):

    global name_to_contents
    if not name_to_contents.has_key(fileName):
        if os.path.exists(fileName):
            f = file(fileName)
            name_to_contents[fileName] = f.read()
            f.close()
        else:
            report_error("Cannot load %s" % fileName)
            name_to_contents[fileName] = ""
    return name_to_contents[fileName]

def check_url_exists(url):

    if FILENAME.find("appadmin.html") > 0:
        return

    if url.find(".") > 0:
        functionName = url.split(".")[-1]
    else:
        functionName = url

    # print "check_url_exists(%s) moduleName=%s" % (url, moduleName)
    pythonFilePath = templatePathToPythonPath(FILENAME)

    if get_file_contents(pythonFilePath).find("def " + functionName + "()") < 0:
        report_error("cannot find %s in %s" % (functionName, pythonFilePath))

def check_url2_exists(moduleName, functionName):

    if moduleName == "static":
        return

    # print "check_url_exists(%s) moduleName=%s" % (url, moduleName)
    pythonFilePath = string.join(FILENAME.replace("/views/", "/controllers/").split("/")[:-1], "/") + "/" + moduleName + ".py"

    if get_file_contents(pythonFilePath).find("def " + functionName + "()") < 0:
        report_error("cannot find %s in %s" % (functionName, pythonFilePath))

def scan_file(path):

    global FILENAME
    global FNR

    FILENAME = path
    FNR = 0

    f = file(path)

    while 1:
        FNR += 1
        line = f.readline()
        if not line:
            break

        m = RE_SRC.search(line)
        if m:
            check_src_exists(m.group(1))

        m = RE_HREF.search(line)
        if m:
            check_href_exists(m.group(1))

        m = RE_URL2.search(line)
        if m:
            check_url2_exists(m.group(1), m.group(2))
        else:
            m = RE_URL.search(line)
            if m:
                check_url_exists(m.group(1))

    f.close()

def test_html(directory):

    for a in os.listdir(directory):
        if a == "epydoc":
            continue
        p = directory + "/" + a
        if os.path.isdir(p):
            test_html(p)
        if a.endswith(".html"):
            scan_file(p)

test_html("web2py/applications")

Tags

Created by Chronicle v3.5