-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmultithreading-multiprocessing.html
461 lines (386 loc) · 29.2 KB
/
multithreading-multiprocessing.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta content="width=device-width, initial-scale=1.0" name="viewport">
<title>Multithreading and multiprocessing in python</title>
<meta content="" name="description">
<meta content="" name="keywords">
<!-- Favicons -->
<link href="assets/img/Favicon-1.png" rel="icon">
<link href="assets/img/Favicon-1.png" rel="apple-touch-icon">
<!-- Google Fonts -->
<link href="https://fonts.googleapis.com/css?family=Open+Sans:300,300i,400,400i,600,600i,700,700i|Raleway:300,300i,400,400i,500,500i,600,600i,700,700i|Poppins:300,300i,400,400i,500,500i,600,600i,700,700i" rel="stylesheet">
<!-- Vendor CSS Files -->
<link href="assets/vendor/aos/aos.css" rel="stylesheet">
<link href="assets/vendor/bootstrap/css/bootstrap.min.css" rel="stylesheet">
<link href="assets/vendor/bootstrap-icons/bootstrap-icons.css" rel="stylesheet">
<link href="assets/vendor/boxicons/css/boxicons.min.css" rel="stylesheet">
<link href="assets/vendor/glightbox/css/glightbox.min.css" rel="stylesheet">
<link href="assets/vendor/swiper/swiper-bundle.min.css" rel="stylesheet">
<!-- Creating a python code section-->
<link rel="stylesheet" href="assets/css/prism.css">
<script src="assets/js/prism.js"></script>
<!-- Template Main CSS File -->
<link href="assets/css/style.css" rel="stylesheet">
<!-- To set the icon, visit https://fontawesome.com/account-->
<script src="https://kit.fontawesome.com/5d25c1efd3.js" crossorigin="anonymous"></script>
<!-- end of icon-->
<script type="text/javascript" async
src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/MathJax.js?config=TeX-MML-AM_CHTML">
</script>
<!-- =======================================================
* Template Name: iPortfolio
* Updated: Sep 18 2023 with Bootstrap v5.3.2
* Template URL: https://bootstrapmade.com/iportfolio-bootstrap-portfolio-websites-template/
* Author: BootstrapMade.com
* License: https://bootstrapmade.com/license/
======================================================== -->
</head>
<body>
<!-- ======= Mobile nav toggle button ======= -->
<i class="bi bi-list mobile-nav-toggle d-xl-none"></i>
<!-- ======= Header ======= -->
<header id="header">
<div class="d-flex flex-column">
<div class="profile">
<img src="assets/img/myphoto.jpeg" alt="" class="img-fluid rounded-circle">
<h1 class="text-light"><a href="index.html">Arun</a></h1>
<div class="social-links mt-3 text-center">
<a href="https://www.linkedin.com/in/arunp77/" target="_blank" class="linkedin"><i class="bx bxl-linkedin"></i></a>
<a href="https://github.com/arunp77" target="_blank" class="github"><i class="bx bxl-github"></i></a>
<a href="https://twitter.com/arunp77_" target="_blank" class="twitter"><i class="bx bxl-twitter"></i></a>
<a href="https://www.instagram.com/arunp77/" target="_blank" class="instagram"><i class="bx bxl-instagram"></i></a>
<a href="https://arunp77.medium.com/" target="_blank" class="medium"><i class="bx bxl-medium"></i></a>
</div>
</div>
<nav id="navbar" class="nav-menu navbar">
<ul>
<li><a href="#hero" class="nav-link scrollto active"><i class="bx bx-home"></i> <span>Home</span></a></li>
<li><a href="#about" class="nav-link scrollto"><i class="bx bx-user"></i> <span>About</span></a></li>
<li><a href="#resume" class="nav-link scrollto"><i class="bx bx-file-blank"></i> <span>Resume</span></a></li>
<li><a href="#portfolio" class="nav-link scrollto"><i class="bx bx-book-content"></i> <span>Portfolio</span></a></li>
<li><a href="#skills-and-tools" class="nav-link scrollto"><i class="bx bx-wrench"></i> <span>Skills and Tools</span></a></li>
<li><a href="#language" class="nav-link scrollto"><i class="bi bi-menu-up"></i> <span>Languages</span></a></li>
<li><a href="#awards" class="nav-link scrollto"><i class="bi bi-award-fill"></i> <span>Awards</span></a></li>
<li><a href="#professionalcourses" class="nav-link scrollto"><i class="bx bx-book-alt"></i> <span>Professional Certification</span></a></li>
<li><a href="#publications" class="nav-link scrollto"><i class="bx bx-news"></i> <span>Publications</span></a></li>
<li><a href="#extra-curricular" class="nav-link scrollto"><i class="bx bx-rocket"></i> <span>Extra-Curricular Activities</span></a></li>
<!-- <li><a href="#contact" class="nav-link scrollto"><i class="bx bx-envelope"></i> <span>Contact</span></a></li> -->
</ul>
</nav><!-- .nav-menu -->
</div>
</header><!-- End Header -->
<main id="main">
<!-- ======= Breadcrumbs ======= -->
<section id="breadcrumbs" class="breadcrumbs">
<div class="container">
<div class="d-flex justify-content-between align-items-center">
<h2>Python</h2>
<ol>
<li><a href="portfolio-details.html" class="clickable-box">Content section</a></li>
<li><a href="index.html#portfolio" class="clickable-box">Portfolio section</a></li>
</ol>
</div>
</div>
</section><!-- End Breadcrumbs -->
<!------ right dropdown menue ------->
<div class="right-side-list">
<div class="dropdown">
<button class="dropbtn"><strong>Shortcuts:</strong></button>
<div class="dropdown-content">
<ul>
<li><a href="cloud-compute.html"><i class="fas fa-cloud"></i> Cloud</a></li>
<li><a href="AWS-GCP.html"><i class="fas fa-cloud"></i> AWS-GCP</a></li>
<li><a href="amazon-s3.html"><i class="fas fa-cloud"></i> AWS S3</a></li>
<li><a href="ec2-confi.html"><i class="fas fa-server"></i> EC2</a></li>
<li><a href="Docker-Container.html"><i class="fab fa-docker" style="color: rgb(29, 27, 27);"></i> Docker</a></li>
<li><a href="Jupyter-nifi.html"><i class="fab fa-python" style="color: rgb(34, 32, 32);"></i> Jupyter-nifi</a></li>
<li><a href="snowflake-task-stream.html"><i class="fas fa-snowflake"></i> Snowflake</a></li>
<li><a href="data-model.html"><i class="fas fa-database"></i> Data modeling</a></li>
<li><a href="sql-basics.html"><i class="fas fa-table"></i> QL</a></li>
<li><a href="sql-basic-details.html"><i class="fas fa-database"></i> SQL</a></li>
<li><a href="Bigquerry-sql.html"><i class="fas fa-database"></i> Bigquery</a></li>
<li><a href="scd.html"><i class="fas fa-archive"></i> SCD</a></li>
<li><a href="sql-project.html"><i class="fas fa-database"></i> SQL project</a></li>
<!-- Add more subsections as needed -->
</ul>
</div>
</div>
</div>
<!-- ======= Portfolio Details Section ======= -->
<section id="portfolio-details" class="portfolio-details">
<div class="container">
<div class="row gy-4">
<h1>Multithreading and multiprocessing in python</h1>
<div class="col-lg-8">
<div class="portfolio-details-slider swiper">
<div class="swiper-wrapper align-items-center">
<figure>
<img src="assets/img/data-engineering/Multithreading-vs-Multiprocessing-1.jpg" alt="" style="max-width: 100%; max-height: auto;">
<figcaption style="text-align: center;"></figcaption>
</figure>
</div>
<div class="swiper-pagination"></div>
</div>
</div>
<div class="col-lg-4 grey-box">
<div class="section-title">
<h3>Table of Contents</h3>
<ol>
<li><a href="#introduction">Introduction</a></li>
<li><a href="#project">Project: To-Do List Application</a></li>
<li><a href="#reference">Reference</a></li>
</ol>
</div>
</div>
</div>
<section>
<h2 id="introduction">Introduction</h2>
Multithreading and multiprocessing are two techniques used to achieve concurrent execution in Python. Although they share the common goal of improving performance by leveraging multiple tasks simultaneously, they are fundamentally different in how they manage and utilize system resources.
<div class="box" style="background-color: rgb(144, 238, 222);">
🛈 A processor (or CPU for Central Processing Unit), is one of the essential electronic components found in our computers and is responsible for executing instructions. A processor is mainly defined by 2 characteristics: its frequency and its number of cores. The frequency associated with the processor corresponds to the number of cycles it can perform per second. A processor can have one or more cores, which correspond to computing units. A multi-core processor will be able to execute several tasks simultaneously, if these tasks allow it, by distributing the tasks by available core.
</div>
<div class="box" style="background-color: rgba(255, 0, 55, 0.548);">
🛈 The RAM for Random Access Memory is the random access memory of a computer. It is a temporary storage space. The system accesses this memory instantaneously which allows the interface to run smoothly.
</div>
<div class="box" style="background-color: rgba(255, 238, 0, 0.534);">
<p><b>Global Interpreter Lock (GIL): </b> Python's standard implementation, CPython, has a Global Interpreter Lock (GIL) that prevents multiple native threads from executing Python bytecodes at once. This means that even though a program has multiple threads, only one thread can execute Python code at a time. This significantly limits the performance gains of multithreading for CPU-bound tasks in Python.</p>
<p><strong>I/O-Bound Tasks: </strong> Tasks that involve a lot of waiting, such as reading or writing files, making network requests, or interacting with databases. </p>
</div>
<div class="box" style="background-color: rgba(136, 255, 0, 0.534);">
<p><strong>What Are Threads?</strong> </p>
A thread is a smaller unit of a process that can be scheduled to run by the operating system. When a program starts, it runs as a single process that contains at least one thread — the main thread. This main thread is where the program begins execution. However, a process can create additional threads, and each of these threads can run code independently and concurrently within the same process. The key characteristics of threads are:
<ul>
<li><b>Shared Memory Space: </b>All threads within a process share the same memory space. This means they can access and modify the same variables and data structures. While this can be beneficial for sharing information between threads, it also requires careful management to avoid issues like race conditions, where two or more threads attempt to modify the same data simultaneously.</li>
<li><b>Independent Execution: </b>Each thread runs independently of the others. This means threads can perform different tasks simultaneously, which can improve the efficiency and responsiveness of a program.</li>
<li><b>Lightweight: </b>Compared to processes, threads are relatively lightweight. Creating a thread consumes fewer resources than creating a new process because threads within the same process share many resources like memory and file handles.</li>
<li><b>Concurrency: </b>Threads allow a program to perform multiple operations concurrently. For example, in a web server, one thread might handle client requests, while another handles logging, and yet another thread manages database queries, all simultaneously.</li>
<li><b>Context Switching: </b>The operating system can switch between different threads, a process known as context switching. This allows for the concurrent execution of threads, even on a single-core processor, by quickly switching between threads.</li>
</ul>
</div>
<ol>
<li><strong>Multithreading:</strong>
<ul>
<li>Multithreading involves running multiple threads in a single process. Threads share the same memory space and resources within the process.</li>
<li>Python's threading module is used for creating and managing threads.</li>
<li>It's suitable for I/O-bound tasks where the threads spend most of their time waiting for I/O operations to complete (e.g., network requests, file I/O, etc.).</li>
<li>Due to Python's Global Interpreter Lock (GIL), multithreading might not be as effective for CPU-bound tasks that require intensive computation because only one thread can execute Python bytecode at a time.</li>
<li>Example: concurrent downloading of files from the internet.</li>
</ul>
</li>
<li><strong>Multiprocessing:</strong>
<ul>
<li>Multiprocessing involves running multiple processes, each with its own memory space and resources. Processes do not share memory by default and communicate via inter-process communication (IPC) mechanisms.</li>
<li>Python's multiprocessing module is used for creating and managing processes.</li>
<li>It's suitable for CPU-bound tasks where parallelism can be achieved by distributing the workload across multiple processes.</li>
<li>Since each process has its own GIL, multiprocessing can effectively utilize multiple CPU cores.</li>
<li>Example: parallelizing a CPU-intensive task such as image processing.</li>
</ul>
</li>
</ol>
<h4 id="comparision">Comparison Between Multithreading and Multiprocessing</h4>
<table>
<tr>
<th>Aspect</th>
<th>Multithreading</th>
<th>Multiprocessing</th>
</tr>
<tr>
<td>Memory Usage</td>
<td>Less memory usage/Shared memory space</td>
<td>More memory usage/Separate memory space for each process</td>
</tr>
<tr>
<td>Concurrency Type</td>
<td>Concurrent threads within a single process</td>
<td>Parallel processes with separate memory spaces</td>
</tr>
<tr>
<td>GIL Impact</td>
<td>Affected by GIL (only one thread executes Python code at a time)</td>
<td>No GIL; multiple processes can run Python code simultaneously</td>
</tr>
<tr>
<td>Best For</td>
<td>I/O-bound tasks (e.g., web scraping, file I/O)</td>
<td>CPU-bound tasks (e.g., heavy computations)</td>
</tr>
<tr>
<td>Synchronization</td>
<td>More complex due to shared state</td>
<td>Less complex but requires IPC for communication</td>
</tr>
<tr>
<td>Overhead</td>
<td>Lower overhead, but limited by GIL</td>
<td>Higher overhead due to process creation and management, but no GIL limitation</td>
</tr>
<tr>
<td>Fault Isolation</td>
<td>A thread crash can affect the entire process</td>
<td>A process crash is isolated to that process</td>
</tr>
</table>
<br>
<!------------------------------------------------->
<h2 id="multithreading">What is Multithreading?</h2>
<p>Multithreading allows a program to run multiple threads concurrently, which is particularly useful in scenarios where the program needs to perform multiple tasks simultaneously without requiring significant CPU resources.</p>
<p>In multithreading, each thread operates independently but shares the same memory space with other threads within the same process. This is especially beneficial for I/O-bound tasks (e.g., file I/O, network operations), where the program often spends time waiting for operations to complete. While one thread is waiting for I/O, other threads can continue executing, leading to better overall performance and reduced idle time.</p>
<!------------------------------------------------>
<ul>
<li><b>When to use Multithreading? </b>Multithreading is most beneficial in the following scenarios:
<ul>
<li><b>Multiple Tasks Simultaneously: </b> When the program needs to handle several tasks at once without significant CPU load.</li>
<li><b>I/O-Bound Operations: </b>In tasks where the program spends a lot of time waiting for I/O operations to complete, such as reading/writing files, network communication, or database interactions.</li>
</ul>
</li>
<li><b>Why Use Multithreading?: </b>
<ul>
<li><b>Efficency: </b> Multithreading improves efficiency by allowing other threads to execute while one thread is waiting, minimizing idle time.</li>
<li><b>Improved Responsiveness: </b> In applications like GUIs, multithreading can keep the interface responsive while performing background tasks.</li>
</ul>
</li>
<li><b>Example: </b>
<ul>
<li><b>Web Scraping:</b> Fetching data from multiple web pages simultaneously.</li>
<li><b>Network Operations:</b> Handling multiple client connections on a server, downloading files, or sending requests to APIs.</li>
</ul>
</li>
</ul>
<h5 id="creating-threads1">Creating and Using Threads in Python</h5>
Python provides the threading module to work with threads. Below is a simple example of how to create and start a thread:
<pre class="language-python"><code>
import threading
def print_numbers():
for i in range(10):
print(i)
# Create a thread object
thread = threading.Thread(target=print_numbers)
# Start the thread
thread.start()
# Wait for the thread to finish
thread.join()
</code></pre>
<h5 id="threads2">Thread Synchronization</h5>
Due to the shared memory space, threads may encounter issues like race conditions when they try to access shared resources simultaneously. To avoid these issues, Python provides several synchronization primitives, such as Locks, RLocks, Semaphores, Events, and Conditions.
<ul>
<li><b>Lock:</b> A Lock object is a basic synchronization primitive. It ensures that only one thread can access a particular section of code at a time.
<pre class="language-python"><code>
import threading
lock = threading.Lock()
def safe_increment(counter):
with lock:
counter.value += 1
</code></pre>
</li>
<li><b>RLock: </b>A reentrant lock (RLock) allows a thread to acquire the same lock multiple times without blocking itself.</li>
</ul>
<h5 id="threads3">Thread Pools</h5>
For managing a pool of threads, Python provides <code>concurrent.futures.ThreadPoolExecutor</code>, which makes it easier to work with multiple threads.
<pre class="language-python"><code>
from concurrent.futures import ThreadPoolExecutor
def square(n):
return n * n
with ThreadPoolExecutor(max_workers=4) as executor:
results = executor.map(square, [1, 2, 3, 4])
print(list(results))
</code></pre>
<!---------------------------------------------->
<h2 id="multiprocessing">Multiprocessing</h2>
Multiprocessing involves running multiple processes simultaneously. Each process has its own memory space and Python interpreter, which means there is no GIL limitation. This makes multiprocessing more suitable for CPU-bound tasks where multiple processes can run in parallel on different CPU cores.
<h5 id="processes1">Creating and Using Processes in Python</h5>
Python provides the multiprocessing module to create and manage processes. Each process runs independently, and processes do not share memory space, which avoids issues like race conditions but requires inter-process communication (IPC) to share data.
<pre class="language-python"><code>
import multiprocessing
def print_numbers():
for i in range(10):
print(i)
# Create a process object
process = multiprocessing.Process(target=print_numbers)
# Start the process
process.start()
# Wait for the process to finish
process.join()
</code></pre>
<h5 id="processes2">Inter-Process Communication (IPC)</h5>
Since processes do not share memory, Python provides several IPC mechanisms:
<ul>
<li><b>Queues: </b>Used to pass messages or data between processes.</li>
<li><b>Pipes: </b>A Pipe provides a two-way communication channel between two processes.</li>
<li><b>Shared Memor: </b>Allows sharing of variables between processes using <code>multiprocessing.Value</code> or <code>multiprocessing.Array</code>.</li>
</ul>
<h5 id="processes3">Process Pools</h5>
Similar to thread pools, Python provides <code>concurrent.futures.ProcessPoolExecutor</code> for managing a pool of worker processes.
<pre class="language-python"><code>
from concurrent.futures import ProcessPoolExecutor
def square(n):
return n * n
with ProcessPoolExecutor(max_workers=4) as executor:
results = executor.map(square, [1, 2, 3, 4])
print(list(results))
</code></pre>
<h5 id="process4">Synchronization in Multiprocessing</h5>
Even though processes do not share memory, they might need to coordinate actions. Python provides synchronization primitives similar to those in threading, such as Locks, Events, Semaphores, and Conditions, but adapted for inter-process use.
<h3 id="choosing">Choosing Between Multithreading and Multiprocessing</h3>
<ul>
<li>Use Multithreading when your application is I/O-bound, meaning that the task spends most of its time waiting for I/O operations like file handling, network communication, etc.</li>
<li>Use Multiprocessing when your application is CPU-bound, meaning that the task spends most of its time performing computations, and you want to leverage multiple CPU cores for parallel execution.</li>
</ul>
</section>
<!-------Reference ------->
<section id="reference">
<h3>References</h3>
<ol>
<li>Udemy playlist on <a href="https://www.udemy.com/course/complete-machine-learning-nlp-bootcamp-mlops-deployment/?utm_campaign=email&utm_medium=email&utm_source=sendgrid.com&couponCode=SKILLS4SALEA" target="_blank">advanced python by Krish Naik</a>.</li>
<li>For more details, please chekout the <a href="https://docs.python.org/3/library/logging.html" target="_blank">official documentation</a>.</li>
</ol>
</section>
<hr>
<div style="background-color: #f0f0f0; padding: 15px; border-radius: 5px;">
<h3>Some other interesting things to know:</h3>
<ul style="list-style-type: disc; margin-left: 30px;">
<li>Visit my website on <a href="sql-project.html">For Data, Big Data, Data-modeling, Datawarehouse, SQL, cloud-compute.</a></li>
<li>Visit my website on <a href="Data-engineering.html">Data engineering</a></li>
</ul>
</div>
<p></p>
<div class="navigation">
<a href="index.html#portfolio" class="clickable-box">
<span class="arrow-left">Portfolio section</span>
</a>
<a href="portfolio-details.html" class="clickable-box">
<span class="arrow-right">Content</span>
</a>
</div>
</div>
</section><!-- End Portfolio Details Section -->
</main><!-- End #main --
<!-- ======= Footer ======= -->
<footer id="footer">
<div class="container">
<div class="copyright">
© Copyright <strong><span>Arun</span></strong>
</div>
</div>
</footer><!-- End Footer -->
<a href="#" class="back-to-top d-flex align-items-center justify-content-center"><i class="bi bi-arrow-up-short"></i></a>
<!-- Vendor JS Files -->
<script src="assets/vendor/purecounter/purecounter_vanilla.js"></script>
<script src="assets/vendor/aos/aos.js"></script>
<script src="assets/vendor/bootstrap/js/bootstrap.bundle.min.js"></script>
<script src="assets/vendor/glightbox/js/glightbox.min.js"></script>
<script src="assets/vendor/isotope-layout/isotope.pkgd.min.js"></script>
<script src="assets/vendor/swiper/swiper-bundle.min.js"></script>
<script src="assets/vendor/typed.js/typed.umd.js"></script>
<script src="assets/vendor/waypoints/noframework.waypoints.js"></script>
<script src="assets/vendor/php-email-form/validate.js"></script>
<!-- Template Main JS File -->
<script src="assets/js/main.js"></script>
<script>
document.addEventListener("DOMContentLoaded", function () {
hljs.initHighlightingOnLoad();
});
</script>
</body>
</html>