#!/usr/bin/python3
SVER = '2.0.3'
##############################################################################
# chktid - Checks for Existing or Suggested TIDs
# Copyright (C) 2023 SUSE LLC
#
# Description:  Searches all current patterns for a given search string to see
#               if a pattern has already been written. Likewise, it can search
#               the SUSE support site for TIDs that currently don't have a 
#               pattern.
# Modified:     2023 Aug 30
#
##############################################################################
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; version 2 of the License.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, see <http://www.gnu.org/licenses/>.
#
#  Authors/Contributors:
#     Jason Record <jason.record@suse.com>
#
##############################################################################

import sys
import os
import re
import getopt
import signal
import configparser
import requests
import patdevel as pd

# Global Definitions
title_string = "Check for Existing or Suggested TIDs"
sca_repo_dir = ''
target_url = ''
target_tid_url = ''
web_range_min = 0
web_range_max = 12

def usage():
	"Displays usage information"
	display = "  {:33s} {}"
	print("Usage: chktid [options] [<search_string> | -s [-p]]")
	print()
	print("Options:")
	print(display.format("-h, --help", "Display this help"))
	print(display.format("-s, --suggestions", "Get suggested TIDs from SUSE support website"))
	print(display.format("-p <0-" + str(web_range_max) + ">, --pages <0-" + str(web_range_max) + ">", "SUSE support web number of pages to evaluate, default: 1"))
	print(display.format('-l <level>, --log_level <level>', "Set log level, default: Minimal"))
	print(display.format('', "0 Quiet, 1 Minimal, 2 Normal, 3 Verbose, 4 Debug"))
	print()

def signal_handler(sig, frame):
	print("\n\nAborting...\n")
	sys.exit(0)

def search_file_content(filepath, _check_pattern):
	result = False
	fd = open(filepath, "r")
	lines = fd.readlines()
	for line in lines:
		if _check_pattern.search(line):
			result = True
			break
	fd.close()
	return result

def retrieve_pattern_file_list():
	pattern_list_filtered = []
	pattern_list = pd.get_pattern_list(sca_repo_dir, _recurse=True)
	include_pattern = re.compile("/sca-patterns.*/patterns")
	for filename in pattern_list:
		if include_pattern.search(filename):
			pattern_list_filtered.append(filename)
	return pattern_list_filtered

def search_patterns(_search):
	pattern_list = []
	matching_filenames = []
	matching_file_content = []
	total_matches_found = 0
	general_matches = {}

	msg.min("Searching Patterns for", "'" + _search + "'")
	msg.min("Repository Directory", sca_repo_dir)
	msg.normal("\nRetrieving pattern file list")
	pattern_list = retrieve_pattern_file_list()
	check_pattern = re.compile(_search)
	msg.verbose("\nSearching for Matching Filenames")
	for filename in pattern_list:
		if check_pattern.search(filename):
			matching_filenames.append(filename)
			general_matches[filename] = True
	for i in matching_filenames:
		msg.verbose("Matched Filename", i)

	msg.verbose("\nSearching for Files with Matching Content")
	for filename in pattern_list:
		if search_file_content(filename, check_pattern):
			matching_file_content.append(filename)
			general_matches[filename] = True
	for i in matching_file_content:
		msg.verbose("Matched File Content", i)
	
	matching_filenames_count = len(matching_filenames)
	matching_file_content_count = len(matching_file_content)
	general_matches_count = len(general_matches)

	if( msg.get_level() >= msg.LOG_VERBOSE ):
		total_matches_found = matching_filenames_count + matching_file_content_count
		if( total_matches_found > 0 ):
			msg.verbose()
		msg.min("Total Matches Found", str(total_matches_found))
		msg.min("+ Filename matches", str(matching_filenames_count))
		msg.min("+ File content matches", str(matching_file_content_count))
	else:
		total_matches_found = general_matches_count
		if( total_matches_found > 0 ):
			if( msg.get_level() >= msg.LOG_NORMAL ):
				for key in general_matches.keys():
					msg.normal("Match found", key)
			msg.normal()
		msg.quiet("Total Matches Found", str(total_matches_found))

def get_web_data(_url):
	try:
		x = requests.get(_url)
	except Exception as error:
		msg.normal('  - ERROR', "Cannot download " + str(_url) + ": " + str(error))
		this_data = []
		return this_data

	if( x.status_code == 200 ):
		this_data = x.text.split('\n')
	else:
		msg.normal("  - ERROR " + str(x.status_code), "URL download failure - " + str(_url))
		this_data = []

	return this_data

def get_suggestions_from_web(pages, _log_file):
	tid_dict = {}
	c_ = {'count': 0, 'total': 0, 'suggested': 0, 'web': 0, 'dup': 0, 'prod': 0 }
	page_entries = 25
	maxIndex = 300
	data = []

	log = configparser.ConfigParser(interpolation=None)
	log.optionxform = str # Ensures manifest keys are saved as case sensitive and not lowercase

	msg.min("Retrieving TIDs from SUSE Support")
	if( msg.get_level() == msg.LOG_MIN ):
		bar1 = pd.ProgressBar("Progress: ", pages)
		bar1.update()

	msg.normal("+ SUSE support pages to evaluate: {}".format(pages))
	for page in range(pages):
		counted_page = page + 1
		if( msg.get_level() == msg.LOG_MIN ):
			bar1.inc_count()
		startIndex = ( page * page_entries ) + 1
		page_url = target_url + '?maxIndex=' + str(maxIndex) + '&startIndex=' + str(startIndex)
		msg.verbose("+ Loading URL {}/{}".format(counted_page, pages), page_url)
		this_page = get_web_data(page_url)
		if( len(this_page) > 0 ):
			data = data + this_page
		else:
			c_['web'] += 1
		if( msg.get_level() == msg.LOG_MIN ):
			bar1.update()
	if( msg.get_level() == msg.LOG_MIN ):
		bar1.finish()
		del bar1

	if( len(data) > 0 ):
		recent_tid = re.compile('class="tid".*\([0-9]*\)', re.IGNORECASE)
		for line in data:
			if recent_tid.search(line):
				# Example: <a href="/support/kb/doc?id=000021120">Bootstrap repo creation fails with error <span class="tid">(000021120)</span></a>
				# Extracts the title, TID number and builds the TID URL. The prod is set to Unknown by default until TID page is evalauted below.
				tid_id = line.split('(')[-1].split(')')[0]
				tid_url = target_tid_url + tid_id
				tid_title = line.split('>')[1].split('<')[0].strip()
				tid_dict[tid_id] = {}
				tid_dict[tid_id]['url'] = tid_url
				tid_dict[tid_id]['title'] = tid_title
				tid_dict[tid_id]['prod'] = "Unknown"
				tid_dict[tid_id]['status'] = "Ignored"
				tid_dict[tid_id]['pattern'] = ''
		c_['total'] = len(tid_dict.keys())
		if( msg.get_level() == msg.LOG_MIN ):
			bar2 = pd.ProgressBar("Evaluating: ", c_['total'])
		msg.normal("+ Total TIDs to evaluate: {}".format(c_['total']))
		msg.normal("Retrieving Pattern List")
		pattern_list = retrieve_pattern_file_list()
		msg.normal("Searching for Suggestions")
		for tid_id in tid_dict.keys():
			msg.normal("+ Evaluating TID [{}/{}]".format(c_['count'], c_['total']), tid_id)
			check_pattern = re.compile(tid_id)
			duplicate = False
			for filename in pattern_list:
				if check_pattern.search(filename):
					msg.normal("  - Pre-existing TID Filename", filename)
					tid_dict[tid_id]['status'] = "Pre-existing Filename"
					tid_dict[tid_id]['pattern'] = filename
					c_['dup'] += 1
					duplicate = True
					break
				elif search_file_content(filename, check_pattern):
					msg.normal("  - Pre-existing TID Content", filename)
					tid_dict[tid_id]['status'] = "Pre-existing Content"
					tid_dict[tid_id]['pattern'] = filename
					c_['dup'] += 1
					duplicate = True
					break
			if duplicate:
				continue
			in_state = False
			get_state = re.compile("\>Modified Date:\<")
			end_state = re.compile("</ul>")
			if( tid_id in tid_dict ):
				msg.verbose("  Loading URL", tid_dict[tid_id]['url'])
				tid_data = get_web_data(tid_dict[tid_id]['url'])
				if( len(tid_data) > 0 ):
					for tid_line in tid_data:
						tid_line = tid_line.strip()
						if( in_state):
							if( end_state.search(tid_line) ):
								break
							elif "SUSE Linux Enterprise Server" in tid_line:
								tid_dict[tid_id]['prod'] = "SLES"
								tid_dict[tid_id]['status'] = "Suggested"
								break
							elif "SUSE Linux Enterprise High Avail" in tid_line:
								tid_dict[tid_id]['prod'] = "HAE"
								tid_dict[tid_id]['status'] = "Suggested"
								break
							elif "SUSE Linux Enterprise Desktop" in tid_line:
								tid_dict[tid_id]['prod'] = "SLED"
								tid_dict[tid_id]['status'] = "Suggested"
								break
							elif "SUSE Manager" in tid_line:
								tid_dict[tid_id]['prod'] = "SUMA"
								tid_dict[tid_id]['status'] = "Suggested"
								break
							elif "SUSE Enterprise Storage" in tid_line:
								tid_dict[tid_id]['prod'] = "SES"
								tid_dict[tid_id]['status'] = "Suggested"
								break
							elif "SUSE Linux Enterprise Micro" in tid_line:
								tid_dict[tid_id]['prod'] = "Micro"
								tid_dict[tid_id]['status'] = "Suggested"
								break
							elif "SUSE Rancher" in tid_line:
								tid_dict[tid_id]['prod'] = "Rancher"
								tid_dict[tid_id]['status'] = "Skipped"
								msg.normal("  - Product Skipped", "Rancher")
								c_['prod'] += 1
								break
							elif "SUSE NeuVector" in tid_line:
								tid_dict[tid_id]['prod'] = "NeuVector"
								tid_dict[tid_id]['status'] = "Skipped"
								msg.normal("  - Product Skipped", "NeuVector")
								c_['prod'] += 1
								break
						elif( get_state.search(tid_line) ):
							in_state = True
				else:
					msg.normal("  - No TID data")
					c_['web'] += 1

			c_['count'] += 1
			if( msg.get_level() == msg.LOG_MIN ):
				bar2.inc_count()
				bar2.update()
			msg.verbose()
			
		if( msg.get_level() == msg.LOG_MIN ):
			bar2.finish()
			del bar2
		msg.normal()

		# Build a sorted index list of suggested TIDs
		tid_index = []
		for key, value in tid_dict.items():
			sort_key = str(value['prod']) + ":" + str(key)
			tid_index.append(sort_key)
		tid_index.sort()

		msg.min("Suggested TIDs for New Patterns")
		for i in tid_index:
			tid = i.split(':')[-1]
			if "Suggested" in tid_dict[tid]['status']:
				c_['suggested'] += 1
				msg.min("TID{} - {}\n + Product: {}\n + {}\n".format(tid, tid_dict[tid]['title'], tid_dict[tid]['prod'], tid_dict[tid]['url']))
			log.add_section(tid)
			log[tid]['Title'] = tid_dict[tid]['title']
			log[tid]['Status'] = tid_dict[tid]['status']
			log[tid]['Product'] = tid_dict[tid]['prod']
			log[tid]['URL'] = tid_dict[tid]['url']
			if len(tid_dict[tid]['pattern']) > 0:
				log[tid]['Pattern'] = tid_dict[tid]['pattern']
		with open(_log_file, 'w') as logfile:
			log.write(logfile)

	msg.min("TID Suggestion Summary")
	msg.min(" Evaluated", str(c_['total']))
	msg.min(" Suggested", str(c_['suggested']))
	msg.min(" Web Errors", str(c_['web']))
	msg.min(" Pre-existing TIDs", str(c_['dup']))
	msg.min(" Products Skipped", str(c_['prod']))
	msg.min()


##############################################################################
# Main
##############################################################################

def main(argv):
	"main entry point"
	global SVER, sca_repo_dir, target_url, target_tid_url, web_range_min, web_range_max, title_string
	get_suggestions = False

	web_pages = 1
	if( os.path.exists(pd.config_file) ):
		config.read(pd.config_file)
		sca_repo_dir = pd.config_entry(config.get("Common", "sca_repo_dir"), '/')
		target_url = pd.config_entry(config.get("Common", "suse_support_url"))
		target_tid_url = pd.config_entry(config.get("Common", "tid_base_url"))
		log_dir = pd.config_entry(config.get("Security", "pat_logs"), '/')
		log_file = log_dir + "suggested_tids.log"
		config_log_level = pd.config_entry(config.get("Common", "log_level"))
		config_logging = msg.validate_level(config_log_level)
		if( config_logging >= msg.LOG_QUIET ):
			msg.set_level(config_logging)
		else:
			print("Warning: Invalid log level in config file, using instance default")
	else:
		pd.title(title_string, SVER)
		print("Error: File not found - " + pd.config_file + "\n")
		sys.exit(1)

	try:
		(optlist, args) = getopt.gnu_getopt(argv[1:], "hp:sl:", ["help", "pages=", "suggestions", "log_level="])
	except getopt.GetoptError as exc:
		pd.title(title_string, SVER)
		print("Error:", exc, file=sys.stderr)
		print("\n")
		usage()
		sys.exit(2)
	for opt, arg in optlist:
		if opt in {"-h", "--help"}:
			pd.title(title_string, SVER)
			usage()
			sys.exit(0)
		elif opt in {"-s", "--suggestions"}:
			get_suggestions = True
		elif opt in {"-p", "--pages"}:
			if( arg.isdigit() ):
				web_pages = int(arg)
				if( web_pages < web_range_min or web_pages > web_range_max ):
					pd.title(title_string, SVER)
					print("Invalid web page range value: {}".format(arg))
					print("Valid range is 1-" + str(web_range_max) + ", where 0 is max pages\n")
					usage()
					sys.exit(5)
				elif( web_pages == 0 ):
					web_pages = web_range_max
			else:
				pd.title(title_string, SVER)
				print("Invalid web page range value: {}".format(arg))
				print("Valid range is 1-" + str(web_range_max) + ", where 0 is max pages\n")
				usage()
				sys.exit(5)
		elif opt in {"-l", "--log_level"}:
			user_logging = msg.validate_level(arg)
			if( user_logging >= msg.LOG_QUIET ):
				msg.set_level(user_logging)
			else:
				print("Warning: Invalid log level, using instance default")

	if len(args) > 0:
		search_string = ' '.join(args)
	else:
		if not get_suggestions:
			pd.title(title_string, SVER)
			print("Error: Search string not found\n")
			usage()
			sys.exit(3)

	if( msg.get_level() > msg.LOG_QUIET ):
		pd.title(title_string, SVER)

	msg.normal("Log Level", msg.get_level_str())

	if get_suggestions:
		get_suggestions_from_web(web_pages, log_file)
	else:
		search_patterns(search_string)

# Entry point
if __name__ == "__main__":
	config = configparser.ConfigParser(interpolation=configparser.ExtendedInterpolation())
	signal.signal(signal.SIGINT, signal_handler)
	msg = pd.DisplayMessages()
	main(sys.argv)


