#!/usr/bin/env python # Filepaths # Phil Bordelon import os import sys DEBUG = os.getenv("DEBUG", False) # Use a large number that doesn't matter for matchLevels. BIG_NUMBER = 999 def matchLevel (filename, search): # This function determines just how close a filename is to a given # search request. # A large return value means "not close enough to # care." to_return = BIG_NUMBER # We only care about equal, one character off, or two # characters off. The easiest case is if the filename matches what # we're searching for; return early if that's the case. if search == filename: to_return = 0 # If the search string is longer than the name of the # file, there's no point in looking. In fact, it has # to be at least one character shorter than the length # of the filename (otherwise there's no room for a # single extra character). elif len(search) >= len(filename): to_return = BIG_NUMBER else: # Loop through the characters and count the extras. We do this # with a sentinel-value loop instead of a straight loop, because # there are a lot of exit conditions and breaks offend my delicate # sensibilities. extra_count = 0 search_pos = 0 file_pos = 0 done = False while not done: if filename[file_pos] == search[search_pos]: # A hit! Increment where we are in the filename and where we # are in the search string. file_pos += 1 search_pos += 1 else: # A miss. Increment where we are in the file, but not where # we are in the search string; also, count this as an 'extra' # character. file_pos += 1 extra_count += 1 # Handle the various bailout cases: too high of an extra count, or # at the end of either of the strings. if search_pos >= len(search): # We've gone through the entire search string. This is # potentially good! Any characters at the end of the filename # after having gone through the entire search string are extra. extra_count += len(filename) - file_pos done = True # Provisionally set the return value to the extra character # count; this may immediately be transformed into a BIG_NUMBER if # it's higher than 2. to_return = extra_count # We're done if there are too many extra characters. if extra_count > 2: done = True to_return = BIG_NUMBER # Okay, so, if we're at the end of the filename but not at the end of # the search string, we obviously can't match the rest of the # characters in the search string. Bail if this is the case. elif file_pos >= len(filename) and search_pos < len(search): done = True to_return = BIG_NUMBER return to_return def runSearches (search_list, file_dict, user_dict): # Here we actually run the searches against the list of files and the # paths that users request. for search_tuple in search_list: # Break the search request tuple out into its parts. username, search = search_tuple if username not in user_dict: sys.exit("ERROR: Invalid username %s in search request!" % username) print "%s REQUESTED %s" % (username, search) # Get this user's paths. user_paths = user_dict[username] # Loop through each path in the user's search list. Keep track of the # best matches so far. We do it as a 'done' loop, so that we can bail # early if we find an exact match, as there's no need to go further in # that case. best_match_num = BIG_NUMBER best_match_list = [] path_loc = 0 done = False while not done: curr_path = user_paths[path_loc] # We may have paths that aren't valid. Skip those. if curr_path in file_dict: # Loop through each file in the path, tracking the best matches. match_list = [] match_num = BIG_NUMBER for filename in file_dict[curr_path]: # See just how good a match this file is. filename_match = matchLevel(filename, search) # We only care about small values. if filename_match < BIG_NUMBER: # If this is a better match than any previous one, record # it; otherwise, if it's equal, append it to the list. if filename_match < match_num: match_num = filename_match match_list = [(filename, curr_path)] elif filename_match == match_num: match_list.append((filename, curr_path)) # Now, we see if the best matches in this directory beat the best # matches found so far. In addition, if the match_num is 0, we # bail. We don't have to worry about the case where it's equal, # as this is further down the path list of the previous equal # instance and therefore of lower priority. if match_num < best_match_num: best_match_num = match_num best_match_list = match_list if match_num == 0: done = True # Increment the position in the list of paths. Bail if we're at the # end. path_loc += 1 if path_loc >= len(user_paths): done = True # All right. If we found a decent result, sort and print. if best_match_num < BIG_NUMBER: best_match_list.sort() for filename, path in best_match_list: print "FOUND %s IN %s" % (filename, path) if "__main__" == __name__: dataset_count = int(sys.stdin.readline()) for dataset_loop in range(dataset_count): print "DATA SET #%d" % (dataset_loop + 1) # Read in the files and paths. Track them in a dictionary keyed off # of the path. file_dict = {} file_count = int(sys.stdin.readline()) for x in range(file_count): # Get the constituent bits of this line. path, filename = sys.stdin.readline().split() # If we've never seen this path before, add it; otherwise just toss # this filename onto the list of all files here. if path not in file_dict: file_dict[path] = [filename] else: file_dict[path].append(filename) # Read in the users and their preferred paths. Like the files, we store # these in a dictionary, this time keyed off of the username. user_dict = {} user_count = int(sys.stdin.readline()) for x in range(user_count): # Read the unique username. username = sys.stdin.readline().strip() if username in user_dict: sys.exit("ERROR: Multiple users with the same name (%s)!" % username) else: user_dict[username] = [] # Get the number of search paths they have set, and get said paths # as well. path_count = int(sys.stdin.readline()) for y in range(path_count): path = sys.stdin.readline().strip() user_dict[username].append(path) # Finally, actually do the various searches requested. Read in the # list and pass it off to a function. search_count = int(sys.stdin.readline()) search_list = [] for x in range(search_count): # Get the bits ... username, search = sys.stdin.readline().strip().split() # And put them on the list of searches as a tuple. search_list.append((username, search)) # Lastly, actually run the requested searches. runSearches(search_list, file_dict, user_dict)