Given a apache2 access log file like access.log, find unique IP addresses and count for number of requests that returned 301 status code in sorted order.
To solve this challenge we’ll use following steps:
#!/usr/bin/env python3
# +------------------------------------+
# | Program Name: parse_apache2_log.py |
# | Author: Sudu |
# +------------------------------------+
import requests
import os
def parse_log(log_file):
# Returning an dictionary of {'ip_address': 'number of 301 requests'}
result = {}
for line in log_file.readlines():
# use split to parse IP address and status code from log entry
ip, status = line.split(' ')[0], line.split(' ')[8]
if status == "301":
# Check if we already seen that IP and use count for 301 accordingly
if ip in result.keys():
result[ip] += 1
else:
result[ip] = 1
return result
def main():
# Get the apache log file
log_url = 'https://raw.githubusercontent.com/elastic/examples/master/Common%20Data%20Formats/apache_logs/apache_logs'
req = requests.get(log_url)
# Save the log file to disk
with open("access.log", 'wb') as access_log:
access_log.write(req.content)
# Read log file and pass it to parsing function
with open("access.log", "r") as access_log:
result = parse_log(access_log)
# Sort the result dictionary
final_result = sorted(result.items(), key=lambda x:x[1], reverse=True)
# Print the result
print("IP Address # of requests with 301 response code")
for k,v in final_result:
print(f"{k:<15} {v}")
# Cleanup
os.remove("access.log")
if __name__ == "__main__":
main()
$ ./parse_apache2_log.py | head -n 18
IP Address # of requests with 301 response code
144.76.194.187 25
199.168.96.66 25
65.55.213.73 14
106.78.19.160 11
65.55.213.74 8
66.249.73.135 5
68.180.224.225 4
216.152.249.242 3
2.241.35.167 3
108.32.74.68 2
65.55.213.79 2
178.255.215.83 2
88.120.89.50 2
208.43.255.28 2
208.43.251.181 2
150.101.192.144 2
178.255.215.71 2
That’s all folks! I hope this post was informative. :)
~ Amit