# giving python code here. 
# pseudocode would be similar but with true python function names and loop syntax replaced 
# with some roughly similar creation

mystring = "this This THIS this enhances our commitment to open-source collaboration while providing additional protections for contributors and users alike. It provides a collection of working systems with different complexities."

word_freqs = {}
words = mystring.split(' ')

for word in words:
    if word not in word_freqs:
        word_freqs[word] = 1 # initialize counter 
    else:
        word_freqs[word] = word_freqs[word]+1 # increment counter

print(word_freqs) # not many repeats in this case. note puncutation included in words. also case

{'this': 2, 'This': 1, 'THIS': 1, 'enhances': 1, 'our': 1, 'commitment': 1, 'to': 1, 'open-source': 1, 'collaboration': 1, 'while': 1, 'providing': 1, 'additional': 1, 'protections': 1, 'for': 1, 'contributors': 1, 'and': 1, 'users': 1, 'alike.': 1, 'It': 1, 'provides': 1, 'a': 1, 'collection': 1, 'of': 1, 'working': 1, 'systems': 1, 'with': 1, 'different': 1, 'complexities.': 1}


# handle punctuation and case

word_freqs = {}
words = mystring.split(' ')

for word in words:
    word = word.lower() # convert all to lowercase
    if word[-1] in {'.',',','?','!'}:
        word = word[:-1] # chop off punctuation if found
    if word not in word_freqs:
        word_freqs[word] = 1
    else:
        word_freqs[word] = word_freqs[word]+1
        
print(word_freqs) # better (though won't be perfect)

{'this': 4, 'enhances': 1, 'our': 1, 'commitment': 1, 'to': 1, 'open-source': 1, 'collaboration': 1, 'while': 1, 'providing': 1, 'additional': 1, 'protections': 1, 'for': 1, 'contributors': 1, 'and': 1, 'users': 1, 'alike': 1, 'it': 1, 'provides': 1, 'a': 1, 'collection': 1, 'of': 1, 'working': 1, 'systems': 1, 'with': 1, 'different': 1, 'complexities': 1}


# python example

import re

mystring = "This 123.456 ... 78 ... 9."

pattern = "\\d+\\.\\d+|\\d+"
re.findall(pattern, mystring)

['123.456', '78', '9']

Quiz 1: Strings with Python¶

1. Zipf's law¶

2. Regex¶

answer in plain language¶