require 'msf/core'
require 'thread'

module Msf
	
class Auxiliary::Scanner::Misc::Web_search_scan < Msf::Auxiliary
	
	include Auxiliary::Scanner
	include Auxiliary::Report
	
	def initialize()
		super(
			'Name'				=> 'Web Search Engine IP Address Scanner',
			'Description' => %q{
				This scanner will do a web search engine query for
				each IP address (optionally, rDNS names as well) and
				record the number of hits and a URL to the query 
				results.	This is a useful for determining some active 
				hosts and information gathering about a network without 
				having to directly probe the network.	 Common results
				include publicly accessible web access logs, mailing
				list posts, abuse reports, and wikipedia edits.
				(WARNING: If you set LOOKUP to true, your target
				may notice the reverse DNS lookups.)
			},
			'Version'			=> '$Revision: 5614 $',
			'Author'			=> 'Wesley McGrew <wesley@mcgrewsecurity.com>',
			'License'			=> MSF_LICENSE
		)
		register_options(
			[
				OptInt::new('SLEEP',
										[true,
										 'Minimum time to sleep between requests (seconds)',
										 3]),
				OptInt::new('SLEEPRAND',
										[true,
										 'Random additional time to sleep (seconds)',
										 3]),
				OptBool::new('LOOKUP',
										 [true,
											'Reverse lookup IPs and search hostnames too? ' +
											'(Not stealthy)',
											'false']),
				OptString::new('PROXYCHAINS',
											 [false,
												'Pipe-delimited (|) list of proxy chains to use']),
				OptBool::new('QUIET',
										 [true,
											'Quiet output (still logs to db)',
											'false']),
				OptInt::new('RETRIES',
										[true,
										 'Number of times to retry queries if they fail',
										 3])
			], self.class
		)
		register_advanced_options(
			[
				OptString::new('NoHitsRegex',
											 [true,
												'Regex to match a zero-hit search',
												'(?:No results found)|(?:did not match any documents)']),
				OptString::new('NumHitsRegex',
											 [true,
												'Regex to match number of hits',
												'of (?:about )?<b>((?:[,\\d])+)<\\/b> for <b>']),
				OptString::new('SearchHost',
											 [true,
												'Hostname of search engine',
												'www.google.com']),
				OptInt::new('SearchPort',
										[true,
										 'Search Port',
										 80]),
				OptInt::new('Timeout',
										[true,
										 'Timeout for the search engine to respond',
										 10]),
				OptString::new('SearchURI',
											 [true,
												'Search URI (* for query location)',
												'/search?hl=en&q=*&btnG=Google+Search']),
				OptString::new('UserAgent', 
											 [true, 
												'The User-Agent header to use for all requests', 
												'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; ' + 
												'rv:1.9.0.1) Gecko/2008070208 Firefox/3.0.1'])
			], self.class
		)
	end
	
	def num_hits(query)
		
		if @proxy_queue
			# Take a penny ("From the crippled children?!")
			proxy_chain = @proxy_queue.deq
		end
		
		request = "GET #{@search_uri[0] + query + @search_uri[1]}\n" +
							"Host: #{@search_host}\n" +
							"User-Agent: #{@user_agent}\n\n"
							
		retries = 0
		while retries < @max_retries
			# Would have used Rex::Proto:Http::Client here but it seems
			# to drop connections before it gets responses for some hosts.
			# I managed to convince it to give me a response with a dirty
			# hack (turning on pipelining to prevent it from closing early),
			# but I still had weird problems with it occasionally not giving
			# me the full response.
			#
			# ...so, since my needs are simple, I can just use a socket.
			
			begin
				sock = Rex::Socket::Tcp::create({
					'PeerHost' => @search_host,
					'PeerPort' => @search_port,
					'Timeout'	 => @timeout,
					})
				sock.write(request)
				response = ''
				while true
					begin
						response += sock.read(1024)
					rescue
						break
					end
				end
				sock.close()
			rescue
				print_status "Error talking to the search engine." unless @quiet
				retries += 1
				if retries < @max_retries
					print_status "Retrying." unless @quiet
				else
					print_status "Giving up on #{query}." unless @quiet
					num_results = 0
					@failed_queries.push query
				end
			end
			
			# Match the search engine response to known valid responses
			# for numbers of hits or no hits
			# If there's a match, sleep for a bit to keep the search engine
			# happy with us and return the results.
			if response
				if response =~ @num_hits_regex
					num_results = response.scan(@num_hits_regex)[0][0].delete(',').to_i
					sleep (@sleep_time + rand(@sleep_rand+1)) 
					break
				elsif response =~ @no_hits_regex
					num_results = 0
					sleep (@sleep_time + rand(@sleep_rand+1)) 
					break
				end
			
				# If we get to this point, we got a response from the 
				# search engine that didn't match our regexps for valid
				# responses.
				#
				# Search engine may have just flaked on us temporarily,
				# so we sleep and retry up to RETRIES times.	
				print_status "Did not recognize the response to searching #{query}." unless @quiet
				print response
				retries += 1
				if retries < @max_retries
					print_status "Retrying." unless @quiet
				else
					print_status "Giving up on #{query}." unless @quiet
					num_results = 0
					@failed_queries.push query
				end
			end
			# If we don't sleep for a while, the search engine may ban us
			sleep (@sleep_time + rand(@sleep_rand+1)) 
		end
		
		if @proxy_queue 
			# Leave a penny ("No, the pennies for everybody.")
			@proxy_queue.enq proxy_chain
		end
		
		num_results
	end
	
	def reverse_dns(ip)
		# Look up and return a list of names
		# that we can figure out by rDNS for an
		# IP address
		begin
			pkt = @res.search(ip)
		rescue
			print_status "Could not rDNS #{ip}." unless @quiet
		end
		names = []
		pkt.each_ptr { |ptr|
			names.push ptr.chop
		}
		names
	end
	
	def run 
		@sleep_time			= datastore['SLEEP']
		@sleep_rand			= datastore['SLEEPRAND']
		@lookup					= datastore['LOOKUP']
		@quiet					= datastore['QUIET']
		@user_agent			= datastore['UserAgent']
		@search_host		= datastore['SearchHost']
		@search_port		= datastore['SearchPort']
		@timeout				= datastore['Timeout']
		@search_uri			= datastore['SearchURI'].split('*')
		@num_hits_regex = Regexp.new(datastore['NumHitsRegex'])
		@no_hits_regex	= Regexp.new(datastore['NoHitsRegex'])
		@max_retries		= datastore['RETRIES']
		@display_url		= ('http://' + @search_host + datastore['SearchURI']).split('*')
		@failed_queries = []
		
		if @lookup
			begin
				@res = Net::DNS::Resolver.new
			rescue
				print_status "Can't resolve names.	Turning off lookups."
				@lookup = false
			end
		end
		
		# Set up a queue of the proxies that we can use to help ensure
		# that two threads don't wind up using one proxy chain at the same
		# time.	 
		if datastore['PROXYCHAINS']
			@proxy_queue = Queue.new()
			datastore['PROXYCHAINS'].split('|').each { |x| 
				@proxy_queue.enq x
			}
		end
		
		print_status "Beginning web search scan of #{datastore['RHOSTS']}."
		
		# Call whatever it is that Scanner's run does, which includes
		# nice things like splitting this up into threads and calling
		# run_host with each ip in RHOSTS
		ret = super
		
		print_status "Web search scan of #{datastore['RHOSTS']} complete."
		
		if @failed_queries != []
			print_status "The following queries could not be completed:"
			@failed_queries.each { |q|
				print_status "	#{q}"
			}
		end
		
		ret
	end
	
	def run_host(ip)
		results = []
		# Search for the IP address
		ip_hits = num_hits('"'+ip+'"')
		if ip_hits > 0
			report_note(
				:host => ip,
				:type => 'web_search',
				:data => "#{@search_host}: #{ip_hits} hits for #{ip} => " + @display_url[0] +
								 '"' + ip + '"' + @display_url[1]
			)
			if !@quiet
				print_status "#{ip} : #{ip_hits} hits -> " + @display_url[0] + 
										 '"' + ip + '"' + @display_url[1]
			end
		end
		
		# Search for the names, if we're supposed to.
		if @lookup
			names = reverse_dns(ip)
			names.each { |n|
				name_hits = num_hits('"'+n+'"')
				if name_hits > 0
					report_note(
						:host => ip,
						:type => 'web_search',
						:data => "#{@search_host}: #{ip_hits} hits for #{n} => " + @display_url[0] +
										 '"' + n + '"' + @display_url[1]
					)
					if !@quiet
						print_status "#{n} (#{ip}) : #{name_hits} hits -> " + 
												 @display_url[0] + '"' + n + '"' + @display_url[1]
					end
				end
			}
		end		 
	end

end

end
