In [1]:
# this program will transform our cve and risk score data and append a severity score to a new csv file
import pandas as pd
headers = ['CVE_ID', 'RISK_SCORE']
df = pd.read_csv('Downloads/cve_ids_and_risk_scores.csv', names=headers)
# what our DataFrame looks like before we transform it with an extra column of data
df.head()
Out[1]:
CVE_ID RISK_SCORE
0 CVE-2007-6750 55.3898
1 CVE-2009-2197 24.0022
2 CVE-2009-3270 41.5423
3 CVE-2009-3560 27.6949
4 CVE-2009-3720 27.6949
In [2]:
# setup arrays of each number range for our risk scores
low = list(range(0, 32))
medium = list(range(33, 65))
high = list(range(66, 100))
In [3]:
# function to group our risk scores
def severity(risk_score):
    if risk_score in low:
        return 'low'
    if risk_score in medium:
        return 'medium'
    if risk_score in high:
        return 'high'
In [4]:
# use apply() function with lambda to loop over the risk_score rows, axis=1 is required to loop over each row
# set new column name as SEVERITY
df['SEVERITY'] = df.apply(lambda x: severity(int(x['RISK_SCORE'])), axis=1)
In [5]:
df.head()
Out[5]:
CVE_ID RISK_SCORE SEVERITY
0 CVE-2007-6750 55.3898 medium
1 CVE-2009-2197 24.0022 low
2 CVE-2009-3270 41.5423 medium
3 CVE-2009-3560 27.6949 low
4 CVE-2009-3720 27.6949 low
In [6]:
# lets try to graph this by severity
df.hist(by='SEVERITY', figsize=(6, 4))
Out[6]:
array([[<AxesSubplot:title={'center':'high'}>,
        <AxesSubplot:title={'center':'low'}>],
       [<AxesSubplot:title={'center':'medium'}>, <AxesSubplot:>]],
      dtype=object)