I went further and wrote a Python script that displays the number of lines of code added / changed by the user and the average number of lines per change.
Tested on Windows using Python 2.7.2. You can run from the command line - it is assumed that you have p4 in your path.
Usage: codestats.py -u [username]
It also works with git: codestats.py -u [authorname] -g.
It does some blacklists to filter out unnecessary additions (for example, you just added a library), and also imposes a blacklist on certain types of files (for example, .HTML files, etc.). Otherwise, it works very well.
Hope this helps!
######################################################################## # Script that computes the lines of code stats for a perforce/git user. ######################################################################## import argparse import logging import subprocess import sys import re VALID_ARGUMENTS = [ ("user", "-u", "--user", "Run lines of code computation for the specified user.", 1), ("change", "-c", "--change", "Just display lines of code in the passed in change (useful for debugging).", 1), ("git", "-g", "--git", "Use git rather than perforce (which is the default versioning system queried).", 0) ] class PrintHelpOnErrorArgumentParser(argparse.ArgumentParser): def error(self, message): logging.error("error: {0}\n\n".format(message)) self.print_help() sys.exit(2) def is_code_file(depot_path): fstat_output = subprocess.Popen(['p4', 'fstat', depot_path], stdout=subprocess.PIPE).communicate()[0].split('\n') text_file = False head_type_regex = re.compile('^... headType (\S+)\s*$') for line in fstat_output: head_type_line = head_type_regex.match(line) if head_type_line: head_type = head_type_line.group(1) text_file = (head_type.find('text') != -1) if text_file: blacklisted_file_types = ['html', 'css', 'twb', 'twbx', 'tbm', 'xml'] for file_type in blacklisted_file_types: if re.match('^\/\/depot.*\.{}#\d+$'.format(file_type), depot_path): text_file = False break return text_file def parse_args(): parser = PrintHelpOnErrorArgumentParser() for arg_name, short_switch, long_switch, help, num_args in VALID_ARGUMENTS: if num_args != 0: parser.add_argument( short_switch, nargs=num_args, type=str, dest=arg_name) else: parser.add_argument( long_switch, short_switch, action="store_true", help=help, dest=arg_name) return parser.parse_args() file_edited_regex = re.compile('^... .*?#\d+ edit\s*$') file_deleted_regex = re.compile('^... .*?#\d+ delete\s*$') file_integrated_regex = re.compile('^... .*?#\d+ integrate\s*$') file_added_regex = re.compile('^... (.*?#\d+) add\s*$') affected_files_regex = re.compile('^Affected files ...') outliers = [] # Changes that seem as if they weren't hand coded and merit inspection def num_lines_in_file(depot_path): lines = len(subprocess.Popen(['p4', 'print', depot_path], stdout=subprocess.PIPE).communicate()[0].split('\n')) return lines def parse_change(changelist): change_description = subprocess.Popen(['p4', 'describe', '-ds', changelist], stdout=subprocess.PIPE).communicate()[0].split('\n') parsing_differences = False parsing_affected_files = False differences_regex = re.compile('^Differences \.\.\..*$') line_added_regex = re.compile('^add \d+ chunks (\d+) lines.*$') line_removed_regex = re.compile('^deleted \d+ chunks (\d+) lines.*$') line_changed_regex = re.compile('^changed \d+ chunks (\d+) / (\d+) lines.*$') file_diff_regex = re.compile('^==== (\/\/depot.*#\d+)\s*\S+$') skip_file = False num_lines_added = 0 num_lines_deleted = 0 num_lines_changed_added = 0 num_lines_changed_deleted = 0 num_files_added = 0 num_files_edited = 0 for line in change_description: if differences_regex.match(line): parsing_differences = True elif affected_files_regex.match(line): parsing_affected_files = True elif parsing_differences: if file_diff_regex.match(line): regex_match = file_diff_regex.match(line) skip_file = not is_code_file(regex_match.group(1)) elif not skip_file: regex_match = line_added_regex.match(line) if regex_match: num_lines_added += int(regex_match.group(1)) else: regex_match = line_removed_regex.match(line) if regex_match: num_lines_deleted += int(regex_match.group(1)) else: regex_match = line_changed_regex.match(line) if regex_match: num_lines_changed_added += int(regex_match.group(2)) num_lines_changed_deleted += int(regex_match.group(1)) elif parsing_affected_files: if file_added_regex.match(line): file_added_match = file_added_regex.match(line) depot_path = file_added_match.group(1) if is_code_file(depot_path): lines_in_file = num_lines_in_file(depot_path) if lines_in_file > 3000: # Anomaly - probably a copy of existing code - discard this lines_in_file = 0 num_lines_added += lines_in_file num_files_added += 1 elif file_edited_regex.match(line): num_files_edited += 1 return [num_files_added, num_files_edited, num_lines_added, num_lines_deleted, num_lines_changed_added, num_lines_changed_deleted] def contains_integrates(changelist): change_description = subprocess.Popen(['p4', 'describe', '-s', changelist], stdout=subprocess.PIPE).communicate()[0].split('\n') contains_integrates = False parsing_affected_files = False for line in change_description: if affected_files_regex.match(line): parsing_affected_files = True elif parsing_affected_files: if file_integrated_regex.match(line): contains_integrates = True break return contains_integrates ################################################# # Note: Keep this function in sync with # generate_line. ################################################# def generate_output_specifier(output_headers): output_specifier = '' for output_header in output_headers: output_specifier += '| {:' output_specifier += '{}'.format(len(output_header)) output_specifier += '}' if output_specifier != '': output_specifier += ' |' return output_specifier ################################################# # Note: Keep this function in sync with # generate_output_specifier. ################################################# def generate_line(output_headers): line = '' for output_header in output_headers: line += '--' # for the '| ' header_padding_specifier = '{:-<' header_padding_specifier += '{}'.format(len(output_header)) header_padding_specifier += '}' line += header_padding_specifier.format('') if line != '': line += '--' # for the last ' |' return line # Returns true if a change is a bulk addition or a private change def is_black_listed_change(user, changelist): large_add_change = False all_adds = True num_adds = 0 is_private_change = False is_third_party_change = False change_description = subprocess.Popen(['p4', 'describe', '-s', changelist], stdout=subprocess.PIPE).communicate()[0].split('\n') for line in change_description: if file_edited_regex.match(line) or file_deleted_regex.match(line): all_adds = False elif file_added_regex.match(line): num_adds += 1 if line.find('... //depot/private') != -1: is_private_change = True break if line.find('... //depot/third-party') != -1: is_third_party_change = True break large_add_change = all_adds and num_adds > 70 #print "{}: {}".format(changelist, large_add_change or is_private_change) return large_add_change or is_third_party_change change_header_regex = re.compile('^Change (\d+)\s*.*?\s*(\S+)@.*$') def get_user_and_change_header_for_change(changelist): change_description = subprocess.Popen(['p4', 'describe', '-s', changelist], stdout=subprocess.PIPE).communicate()[0].split('\n') user = None change_header = None for line in change_description: change_header_match = change_header_regex.match(line) if change_header_match: user = change_header_match.group(2) change_header = line break return [user, change_header] if __name__ == "__main__": log = logging.getLogger() log.setLevel(logging.DEBUG) args = parse_args() user_stats = {} user_stats['num_changes'] = 0 user_stats['lines_added'] = 0 user_stats['lines_deleted'] = 0 user_stats['lines_changed_added'] = 0 user_stats['lines_changed_removed'] = 0 user_stats['total_lines'] = 0 user_stats['files_edited'] = 0 user_stats['files_added'] = 0 change_log = [] if args.git: git_log_command = ['git', 'log', '--author={}'.format(args.user[0]), '--pretty=tformat:', '--numstat'] git_log_output = subprocess.Popen(git_log_command, stdout=subprocess.PIPE).communicate()[0].split('\n') git_log_line_regex = re.compile('^(\d+)\s*(\d+)\s*\S+$') total = 0 adds = 0 subs = 0 for git_log_line in git_log_output: line_match = git_log_line_regex.match(git_log_line) if line_match: adds += int(line_match.group(1)) subs += int(line_match.group(2)) total = adds - subs num_commits = 0 git_shortlog_command = ['git', 'shortlog', '--author={}'.format(args.user[0]), '-s'] git_shortlog_output = subprocess.Popen(git_shortlog_command, stdout=subprocess.PIPE).communicate()[0].split('\n') git_shortlog_line_regex = re.compile('^\s*(\d+)\s+.*$') for git_shortlog_line in git_shortlog_output: line_match = git_shortlog_line_regex.match(git_shortlog_line) if line_match: num_commits += int(line_match.group(1)) print "Git Stats for {}: Commits: {}. Lines of code: {}. Average Lines Per Change: {}.".format(args.user[0], num_commits, total, total*1.0/num_commits) sys.exit(0) elif args.change: [args.user, change_header] = get_user_and_change_header_for_change(args.change) change_log = [change_header] else: change_log = subprocess.Popen(['p4', 'changes', '-u', args.user, '-s', 'submitted'], stdout=subprocess.PIPE).communicate()[0].split('\n') output_headers = ['Current Change', 'Num Changes', 'Files Added', 'Files Edited'] output_headers.append('Lines Added') output_headers.append('Lines Deleted') if not args.git: output_headers.append('Lines Changed (Added/Removed)') avg_change_size = 0.0 output_headers.append('Total Lines') output_headers.append('Avg. Lines/Change') line = generate_line(output_headers) output_specifier = generate_output_specifier(output_headers) print line print output_specifier.format(*output_headers) print line output_specifier_with_carriage_return = output_specifier + '\r' for change in change_log: change_match = change_header_regex.search(change) if change_match: user_stats['num_changes'] += 1 changelist = change_match.group(1) if not is_black_listed_change(args.user, changelist) and not contains_integrates(changelist): [files_added_in_change, files_edited_in_change, lines_added_in_change, lines_deleted_in_change, lines_changed_added_in_change, lines_changed_removed_in_change] = parse_change(change_match.group(1)) if lines_added_in_change > 5000 and changelist not in outliers: outliers.append([changelist, lines_added_in_change]) else: user_stats['lines_added'] += lines_added_in_change user_stats['lines_deleted'] += lines_deleted_in_change user_stats['lines_changed_added'] += lines_changed_added_in_change user_stats['lines_changed_removed'] += lines_changed_removed_in_change user_stats['total_lines'] += lines_changed_added_in_change user_stats['total_lines'] -= lines_changed_removed_in_change user_stats['total_lines'] += lines_added_in_change user_stats['files_edited'] += files_edited_in_change user_stats['files_added'] += files_added_in_change current_output = [changelist, user_stats['num_changes'], user_stats['files_added'], user_stats['files_edited']] current_output.append(user_stats['lines_added']) current_output.append(user_stats['lines_deleted']) if not args.git: current_output.append('{}/{}'.format(user_stats['lines_changed_added'], user_stats['lines_changed_removed'])) current_output.append(user_stats['total_lines']) current_output.append(user_stats['total_lines']*1.0/user_stats['num_changes']) print output_specifier_with_carriage_return.format(*current_output), print print line if len(outliers) > 0: print "Outliers (changes that merit inspection - and have not been included in the stats):" outlier_headers = ['Changelist', 'Lines of Code'] outlier_specifier = generate_output_specifier(outlier_headers) outlier_line = generate_line(outlier_headers) print outlier_line print outlier_specifier.format(*outlier_headers) print outlier_line for change in outliers: print outlier_specifier.format(*change) print outlier_line