Created
January 24, 2012 19:02
-
-
Save quandyfactory/1671909 to your computer and use it in GitHub Desktop.
Returns total, nonblank and net lines of code for a Python script or a folder of python scripts.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf-8 | |
""" | |
Calculates total, nonblank and net lines of code for Python scripts. | |
""" | |
import os | |
import re | |
def get_line_count(blob): | |
"""Returns the number of lines of code""" | |
return len(blob.split('\n')) | |
def strip_docstring(blob): | |
"""Removes docstrings from code""" | |
docstring = True | |
while docstring == True: | |
match_docstring = re.search('\n\s*"""[^"""]*"""', blob) | |
if not match_docstring: | |
docstring = False | |
else: | |
blob = blob.replace(blob[match_docstring.span()[0]:match_docstring.span()[1]], '') | |
return blob | |
def strip_blanklines(blob): | |
"""Strips blank lines from the code""" | |
lines = blob.split('\n') | |
return '\n'.join([line for line in lines if line.strip() != '']) | |
def strip_comments(blob, delim='#'): | |
"""Strips comments from the code""" | |
lines = blob.split('\n') | |
return '\n'.join([line for line in lines if line.strip()[0] != delim]) | |
def loc(blob, delim='#'): | |
"""Returns the total line count, nonblank line count, and net line count excluding comments and docstrings""" | |
total = get_line_count(blob) | |
blob = strip_blanklines(blob) | |
nonblank = get_line_count(blob) | |
blob = strip_docstring(blob) | |
blob = strip_comments(blob, delim) | |
net = get_line_count(blob) | |
return { 'total': total, 'nonblank': nonblank, 'net': net } | |
def get_folder_total(path): | |
"""Returns the total, nonblank and net loc for all the python files in a directory""" | |
files = os.listdir(path) | |
pythonfiles = ['%s/%s' % (path, filename) for filename in files if filename[-3:] == '.py'] | |
total = { 'net': 0, 'total': 0, 'nonblank': 0 } | |
for filename in pythonfiles: | |
with open(filename, 'r') as thisfile: | |
blob = thisfile.read() | |
# print filename | |
thisloc = loc(blob) | |
for k, v in thisloc.items(): | |
total[k] += v | |
return total | |
if __name__ == '__main__': | |
import sys | |
args = sys.argv | |
rules = """ | |
Command line arguments: | |
-f - File to be tested (required). Filename if in current directory, or else full path. | |
-c - Character(s) used to delimit a comment (optional - default is #). | |
""" | |
if len(args) == 1: | |
sys.exit(rules) | |
argdict = {} | |
for arg in args: | |
if arg[0] == '-': | |
try: | |
argdict[arg[1]] = args[args.index(arg)+1] | |
except: | |
pass | |
if 'f' not in argdict: | |
sys.exit('Error: no filename (-f) in command line arguments.\n\n%s' % (rules)) | |
if 'c' not in argdict: | |
argdict['c'] = '#' # default comment delimiter | |
with open(argdict['f'], 'r') as thisfile: | |
blob = thisfile.read() | |
if not blob: | |
sys.exit("File %s does not exist or cannot be opened." % (argdict['f'])) | |
results = loc(blob, argdict['c']) | |
print "---------------------------------------------" | |
print "Results for %s" % (argdict['f']) | |
for k, v in results.items(): | |
print '%s: %s' % (k, v) | |
print "---------------------------------------------" | |
sys.exit() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I don't think line 18 does what you expect it to. Repeating characters in a character set has no additional effect, so the character set
[^"""]
has the same meaning as[^"]
. Therefore, any single instance of a"
will cause the match to fail, and indeed if you test it that pattern fails to match a string like"""foo "bar" baz"""
, which is a valid triple-quoted string.What you want for this situation is a "zero-wdith assertion". A regexp like
r'([^"]|"(?!""))*
will match an arbitrarily long sequence of characters that aren't quotes, or characters that are, provided that they aren't followed by two more quotes. Add the triple quotes on either side, and that should catch doc strings that have double-quote characters embedded in them.